Last active 1 day ago

itunes.py Raw
1import json
2import os
3import re
4from xml.etree import ElementTree
5
6def get_track_attr (track, attr_name):
7 # iterate over each attr to find the filename
8 attributes = list(track)
9 for a in range(len(attributes)):
10 attr = attributes[a]
11
12 if attr.text == attr_name:
13 return attributes[a+1].text
14
15def set_track_attr (track, attr_name, value):
16 # iterate over each attr to find the filename
17 attributes = list(track)
18 for a in range(len(attributes)):
19 attr = attributes[a]
20
21 if attr.text == attr_name:
22 attributes[a+1].text = value
23
24def are_tracks_duplicate (track, track2):
25 return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')
26
27# dump the xml file into an ElementTree object
28# TODO: make this a real path to your iTunes Media Library.xml
29with open('somepath/itunes_library.xml', 'rw') as f:
30 tree = ElementTree.parse(f)
31
32root = tree.getroot()
33tracks_container = root.find('dict').find('dict')
34tracks = list(tracks_container)
35
36# init our lists so that we can separate modifying our tree from analyzing
37nodes_to_remove = []
38nodes_to_update = []
39
40# Iterate over track list in reverse (because we'll be removing items)
41for t in reversed(xrange(len(tracks))):
42 # only look at the dictionaries
43 if (tracks[t].tag == 'dict'):
44 track = {
45 'track_id': get_track_attr(tracks[t], 'Track ID'),
46 'filesize': get_track_attr(tracks[t], 'Size'),
47 'artist': get_track_attr(tracks[t], 'Artist'),
48 'name': get_track_attr(tracks[t], 'Name'),
49 }
50
51 print track.get('track_id')
52
53 # make sure this track hasn't already been flagged for removal
54 if track.get('track_id') not in nodes_to_remove:
55 print 'this track has not already been flagged for removal'
56
57 # check if track_id is different and name and total time match
58 for t2 in range(len(tracks)):
59 # only look at the dictionaries
60 if (tracks[t2].tag == 'dict'):
61 track2 = {
62 'track_id': get_track_attr(tracks[t2], 'Track ID'),
63 'filesize': get_track_attr(tracks[t2], 'Size'),
64 'artist': get_track_attr(tracks[t], 'Artist'),
65 'name': get_track_attr(tracks[t2], 'Name'),
66 }
67
68 # find duplicates
69 if (are_tracks_duplicate(track, track2)):
70 print 'found duplicate!'
71 # then compare sizes
72 if int(track.get('filesize')) <= int(track2.get('filesize')):
73 # the new one is smaller, remove the old one
74 nodes_to_remove.append(track2.get('track_id'))
75
76 # track id inheritance
77 if int(track.get('track_id')) > int(track2.get('track_id')):
78 # mark the new track to update id
79 nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})
80
81 # remove from list
82 tracks.remove(tracks[t2])
83 else:
84 # mark the old track for removal
85 nodes_to_remove.append(track.get('track_id'))
86
87 # track id inheritance
88 if int(track.get('track_id')) < int(track2.get('track_id')):
89 # mark the new track to update id
90 nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})
91
92 # remove from list
93 tracks.remove(tracks[t])
94
95 print len(nodes_to_remove)
96 break
97
98# # save our list of track ids to delete
99# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
100# json.dump(nodes_to_remove, outfile)
101#
102# # save our list of track dictionaries for swapping
103# with open('somepath/tracks_to_update.json', 'wb') as outfile:
104# json.dump(nodes_to_update, outfile)
105
106
107# # load our track ids
108# with open('somepath/tracks_to_delete.json', 'r') as outfile:
109# nodes_to_remove = json.load(outfile)
110#
111# with open('somepath/tracks_to_update.json', 'r') as outfile:
112# nodes_to_update = json.load(outfile)
113
114# delete the old tracks
115# reverse the list so we don't run into index out of bounds problems
116
117# we modified tracks previously, let's start fresh
118tracks = list(tracks_container)
119
120for t in reversed(xrange(len(tracks))):
121 track = tracks[t]
122
123 # remove the track id key object
124 if track.tag == 'key' and track.text in nodes_to_remove:
125 print 'removing track key'
126 print track.text
127 # remove from xml
128 tracks.remove(track)
129 # remove the dict
130 elif track.tag == 'dict':
131 track_dict = {
132 'track_id': get_track_attr(track, 'Track ID'),
133 'location': get_track_attr(track, 'Location'),
134 }
135
136 if track_dict.get('track_id') in nodes_to_remove:
137 print 'removing from xml tree'
138 # remove from xml
139 tracks.remove(track)
140
141 # get the filenames
142 filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')
143
144 print 'deleting file:'
145 print filepath
146 try:
147 os.remove(filepath)
148 except (OSError, IOError) as e:
149 # ignore errors
150 pass
151
152# update the nodes
153for n in nodes_to_update:
154 print 'updating nodes'
155 track_id = n.get('track_id')
156 new_track_id = n.get('new_track_id')
157
158 for t in range(len(tracks)):
159 track = tracks[t]
160
161 if track.tag == 'key' and track.text == track_id:
162 print 'found key match'
163 track.text = new_track_id
164 elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
165 print 'found dict match'
166 set_track_attr(track, 'Track ID', new_track_id)
167 break
168
169# update our xml tree with the list
170tracks_container.clear()
171tracks_container.extend(tracks)
172
173# save the changes we've made
174# TODO: fill in a file path for the modified library file
175# tree.write('somepath/itunes_library-modified.xml')
176