import json import os import re from xml.etree import ElementTree def get_track_attr (track, attr_name): # iterate over each attr to find the filename attributes = list(track) for a in range(len(attributes)): attr = attributes[a] if attr.text == attr_name: return attributes[a+1].text def set_track_attr (track, attr_name, value): # iterate over each attr to find the filename attributes = list(track) for a in range(len(attributes)): attr = attributes[a] if attr.text == attr_name: attributes[a+1].text = value def are_tracks_duplicate (track, track2): return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist') # dump the xml file into an ElementTree object # TODO: make this a real path to your iTunes Media Library.xml with open('somepath/itunes_library.xml', 'rw') as f: tree = ElementTree.parse(f) root = tree.getroot() tracks_container = root.find('dict').find('dict') tracks = list(tracks_container) # init our lists so that we can separate modifying our tree from analyzing nodes_to_remove = [] nodes_to_update = [] # Iterate over track list in reverse (because we'll be removing items) for t in reversed(xrange(len(tracks))): # only look at the dictionaries if (tracks[t].tag == 'dict'): track = { 'track_id': get_track_attr(tracks[t], 'Track ID'), 'filesize': get_track_attr(tracks[t], 'Size'), 'artist': get_track_attr(tracks[t], 'Artist'), 'name': get_track_attr(tracks[t], 'Name'), } print track.get('track_id') # make sure this track hasn't already been flagged for removal if track.get('track_id') not in nodes_to_remove: print 'this track has not already been flagged for removal' # check if track_id is different and name and total time match for t2 in range(len(tracks)): # only look at the dictionaries if (tracks[t2].tag == 'dict'): track2 = { 'track_id': get_track_attr(tracks[t2], 'Track ID'), 'filesize': get_track_attr(tracks[t2], 'Size'), 'artist': get_track_attr(tracks[t], 'Artist'), 'name': get_track_attr(tracks[t2], 'Name'), } # find duplicates if (are_tracks_duplicate(track, track2)): print 'found duplicate!' # then compare sizes if int(track.get('filesize')) <= int(track2.get('filesize')): # the new one is smaller, remove the old one nodes_to_remove.append(track2.get('track_id')) # track id inheritance if int(track.get('track_id')) > int(track2.get('track_id')): # mark the new track to update id nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')}) # remove from list tracks.remove(tracks[t2]) else: # mark the old track for removal nodes_to_remove.append(track.get('track_id')) # track id inheritance if int(track.get('track_id')) < int(track2.get('track_id')): # mark the new track to update id nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')}) # remove from list tracks.remove(tracks[t]) print len(nodes_to_remove) break # # save our list of track ids to delete # with open('somepath/tracks_to_delete.json', 'wb') as outfile: # json.dump(nodes_to_remove, outfile) # # # save our list of track dictionaries for swapping # with open('somepath/tracks_to_update.json', 'wb') as outfile: # json.dump(nodes_to_update, outfile) # # load our track ids # with open('somepath/tracks_to_delete.json', 'r') as outfile: # nodes_to_remove = json.load(outfile) # # with open('somepath/tracks_to_update.json', 'r') as outfile: # nodes_to_update = json.load(outfile) # delete the old tracks # reverse the list so we don't run into index out of bounds problems # we modified tracks previously, let's start fresh tracks = list(tracks_container) for t in reversed(xrange(len(tracks))): track = tracks[t] # remove the track id key object if track.tag == 'key' and track.text in nodes_to_remove: print 'removing track key' print track.text # remove from xml tracks.remove(track) # remove the dict elif track.tag == 'dict': track_dict = { 'track_id': get_track_attr(track, 'Track ID'), 'location': get_track_attr(track, 'Location'), } if track_dict.get('track_id') in nodes_to_remove: print 'removing from xml tree' # remove from xml tracks.remove(track) # get the filenames filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ') print 'deleting file:' print filepath try: os.remove(filepath) except (OSError, IOError) as e: # ignore errors pass # update the nodes for n in nodes_to_update: print 'updating nodes' track_id = n.get('track_id') new_track_id = n.get('new_track_id') for t in range(len(tracks)): track = tracks[t] if track.tag == 'key' and track.text == track_id: print 'found key match' track.text = new_track_id elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id: print 'found dict match' set_track_attr(track, 'Track ID', new_track_id) break # update our xml tree with the list tracks_container.clear() tracks_container.extend(tracks) # save the changes we've made # TODO: fill in a file path for the modified library file # tree.write('somepath/itunes_library-modified.xml')