itunes.py
· 6.3 KiB · Python
Raw
import json
import os
import re
from xml.etree import ElementTree
def get_track_attr (track, attr_name):
# iterate over each attr to find the filename
attributes = list(track)
for a in range(len(attributes)):
attr = attributes[a]
if attr.text == attr_name:
return attributes[a+1].text
def set_track_attr (track, attr_name, value):
# iterate over each attr to find the filename
attributes = list(track)
for a in range(len(attributes)):
attr = attributes[a]
if attr.text == attr_name:
attributes[a+1].text = value
def are_tracks_duplicate (track, track2):
return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')
# dump the xml file into an ElementTree object
# TODO: make this a real path to your iTunes Media Library.xml
with open('somepath/itunes_library.xml', 'rw') as f:
tree = ElementTree.parse(f)
root = tree.getroot()
tracks_container = root.find('dict').find('dict')
tracks = list(tracks_container)
# init our lists so that we can separate modifying our tree from analyzing
nodes_to_remove = []
nodes_to_update = []
# Iterate over track list in reverse (because we'll be removing items)
for t in reversed(xrange(len(tracks))):
# only look at the dictionaries
if (tracks[t].tag == 'dict'):
track = {
'track_id': get_track_attr(tracks[t], 'Track ID'),
'filesize': get_track_attr(tracks[t], 'Size'),
'artist': get_track_attr(tracks[t], 'Artist'),
'name': get_track_attr(tracks[t], 'Name'),
}
print track.get('track_id')
# make sure this track hasn't already been flagged for removal
if track.get('track_id') not in nodes_to_remove:
print 'this track has not already been flagged for removal'
# check if track_id is different and name and total time match
for t2 in range(len(tracks)):
# only look at the dictionaries
if (tracks[t2].tag == 'dict'):
track2 = {
'track_id': get_track_attr(tracks[t2], 'Track ID'),
'filesize': get_track_attr(tracks[t2], 'Size'),
'artist': get_track_attr(tracks[t], 'Artist'),
'name': get_track_attr(tracks[t2], 'Name'),
}
# find duplicates
if (are_tracks_duplicate(track, track2)):
print 'found duplicate!'
# then compare sizes
if int(track.get('filesize')) <= int(track2.get('filesize')):
# the new one is smaller, remove the old one
nodes_to_remove.append(track2.get('track_id'))
# track id inheritance
if int(track.get('track_id')) > int(track2.get('track_id')):
# mark the new track to update id
nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})
# remove from list
tracks.remove(tracks[t2])
else:
# mark the old track for removal
nodes_to_remove.append(track.get('track_id'))
# track id inheritance
if int(track.get('track_id')) < int(track2.get('track_id')):
# mark the new track to update id
nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})
# remove from list
tracks.remove(tracks[t])
print len(nodes_to_remove)
break
# # save our list of track ids to delete
# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
# json.dump(nodes_to_remove, outfile)
#
# # save our list of track dictionaries for swapping
# with open('somepath/tracks_to_update.json', 'wb') as outfile:
# json.dump(nodes_to_update, outfile)
# # load our track ids
# with open('somepath/tracks_to_delete.json', 'r') as outfile:
# nodes_to_remove = json.load(outfile)
#
# with open('somepath/tracks_to_update.json', 'r') as outfile:
# nodes_to_update = json.load(outfile)
# delete the old tracks
# reverse the list so we don't run into index out of bounds problems
# we modified tracks previously, let's start fresh
tracks = list(tracks_container)
for t in reversed(xrange(len(tracks))):
track = tracks[t]
# remove the track id key object
if track.tag == 'key' and track.text in nodes_to_remove:
print 'removing track key'
print track.text
# remove from xml
tracks.remove(track)
# remove the dict
elif track.tag == 'dict':
track_dict = {
'track_id': get_track_attr(track, 'Track ID'),
'location': get_track_attr(track, 'Location'),
}
if track_dict.get('track_id') in nodes_to_remove:
print 'removing from xml tree'
# remove from xml
tracks.remove(track)
# get the filenames
filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')
print 'deleting file:'
print filepath
try:
os.remove(filepath)
except (OSError, IOError) as e:
# ignore errors
pass
# update the nodes
for n in nodes_to_update:
print 'updating nodes'
track_id = n.get('track_id')
new_track_id = n.get('new_track_id')
for t in range(len(tracks)):
track = tracks[t]
if track.tag == 'key' and track.text == track_id:
print 'found key match'
track.text = new_track_id
elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
print 'found dict match'
set_track_attr(track, 'Track ID', new_track_id)
break
# update our xml tree with the list
tracks_container.clear()
tracks_container.extend(tracks)
# save the changes we've made
# TODO: fill in a file path for the modified library file
# tree.write('somepath/itunes_library-modified.xml')
| 1 | import json |
| 2 | import os |
| 3 | import re |
| 4 | from xml.etree import ElementTree |
| 5 | |
| 6 | def get_track_attr (track, attr_name): |
| 7 | # iterate over each attr to find the filename |
| 8 | attributes = list(track) |
| 9 | for a in range(len(attributes)): |
| 10 | attr = attributes[a] |
| 11 | |
| 12 | if attr.text == attr_name: |
| 13 | return attributes[a+1].text |
| 14 | |
| 15 | def set_track_attr (track, attr_name, value): |
| 16 | # iterate over each attr to find the filename |
| 17 | attributes = list(track) |
| 18 | for a in range(len(attributes)): |
| 19 | attr = attributes[a] |
| 20 | |
| 21 | if attr.text == attr_name: |
| 22 | attributes[a+1].text = value |
| 23 | |
| 24 | def are_tracks_duplicate (track, track2): |
| 25 | return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist') |
| 26 | |
| 27 | # dump the xml file into an ElementTree object |
| 28 | # TODO: make this a real path to your iTunes Media Library.xml |
| 29 | with open('somepath/itunes_library.xml', 'rw') as f: |
| 30 | tree = ElementTree.parse(f) |
| 31 | |
| 32 | root = tree.getroot() |
| 33 | tracks_container = root.find('dict').find('dict') |
| 34 | tracks = list(tracks_container) |
| 35 | |
| 36 | # init our lists so that we can separate modifying our tree from analyzing |
| 37 | nodes_to_remove = [] |
| 38 | nodes_to_update = [] |
| 39 | |
| 40 | # Iterate over track list in reverse (because we'll be removing items) |
| 41 | for t in reversed(xrange(len(tracks))): |
| 42 | # only look at the dictionaries |
| 43 | if (tracks[t].tag == 'dict'): |
| 44 | track = { |
| 45 | 'track_id': get_track_attr(tracks[t], 'Track ID'), |
| 46 | 'filesize': get_track_attr(tracks[t], 'Size'), |
| 47 | 'artist': get_track_attr(tracks[t], 'Artist'), |
| 48 | 'name': get_track_attr(tracks[t], 'Name'), |
| 49 | } |
| 50 | |
| 51 | print track.get('track_id') |
| 52 | |
| 53 | # make sure this track hasn't already been flagged for removal |
| 54 | if track.get('track_id') not in nodes_to_remove: |
| 55 | print 'this track has not already been flagged for removal' |
| 56 | |
| 57 | # check if track_id is different and name and total time match |
| 58 | for t2 in range(len(tracks)): |
| 59 | # only look at the dictionaries |
| 60 | if (tracks[t2].tag == 'dict'): |
| 61 | track2 = { |
| 62 | 'track_id': get_track_attr(tracks[t2], 'Track ID'), |
| 63 | 'filesize': get_track_attr(tracks[t2], 'Size'), |
| 64 | 'artist': get_track_attr(tracks[t], 'Artist'), |
| 65 | 'name': get_track_attr(tracks[t2], 'Name'), |
| 66 | } |
| 67 | |
| 68 | # find duplicates |
| 69 | if (are_tracks_duplicate(track, track2)): |
| 70 | print 'found duplicate!' |
| 71 | # then compare sizes |
| 72 | if int(track.get('filesize')) <= int(track2.get('filesize')): |
| 73 | # the new one is smaller, remove the old one |
| 74 | nodes_to_remove.append(track2.get('track_id')) |
| 75 | |
| 76 | # track id inheritance |
| 77 | if int(track.get('track_id')) > int(track2.get('track_id')): |
| 78 | # mark the new track to update id |
| 79 | nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')}) |
| 80 | |
| 81 | # remove from list |
| 82 | tracks.remove(tracks[t2]) |
| 83 | else: |
| 84 | # mark the old track for removal |
| 85 | nodes_to_remove.append(track.get('track_id')) |
| 86 | |
| 87 | # track id inheritance |
| 88 | if int(track.get('track_id')) < int(track2.get('track_id')): |
| 89 | # mark the new track to update id |
| 90 | nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')}) |
| 91 | |
| 92 | # remove from list |
| 93 | tracks.remove(tracks[t]) |
| 94 | |
| 95 | print len(nodes_to_remove) |
| 96 | break |
| 97 | |
| 98 | # # save our list of track ids to delete |
| 99 | # with open('somepath/tracks_to_delete.json', 'wb') as outfile: |
| 100 | # json.dump(nodes_to_remove, outfile) |
| 101 | # |
| 102 | # # save our list of track dictionaries for swapping |
| 103 | # with open('somepath/tracks_to_update.json', 'wb') as outfile: |
| 104 | # json.dump(nodes_to_update, outfile) |
| 105 | |
| 106 | |
| 107 | # # load our track ids |
| 108 | # with open('somepath/tracks_to_delete.json', 'r') as outfile: |
| 109 | # nodes_to_remove = json.load(outfile) |
| 110 | # |
| 111 | # with open('somepath/tracks_to_update.json', 'r') as outfile: |
| 112 | # nodes_to_update = json.load(outfile) |
| 113 | |
| 114 | # delete the old tracks |
| 115 | # reverse the list so we don't run into index out of bounds problems |
| 116 | |
| 117 | # we modified tracks previously, let's start fresh |
| 118 | tracks = list(tracks_container) |
| 119 | |
| 120 | for t in reversed(xrange(len(tracks))): |
| 121 | track = tracks[t] |
| 122 | |
| 123 | # remove the track id key object |
| 124 | if track.tag == 'key' and track.text in nodes_to_remove: |
| 125 | print 'removing track key' |
| 126 | print track.text |
| 127 | # remove from xml |
| 128 | tracks.remove(track) |
| 129 | # remove the dict |
| 130 | elif track.tag == 'dict': |
| 131 | track_dict = { |
| 132 | 'track_id': get_track_attr(track, 'Track ID'), |
| 133 | 'location': get_track_attr(track, 'Location'), |
| 134 | } |
| 135 | |
| 136 | if track_dict.get('track_id') in nodes_to_remove: |
| 137 | print 'removing from xml tree' |
| 138 | # remove from xml |
| 139 | tracks.remove(track) |
| 140 | |
| 141 | # get the filenames |
| 142 | filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ') |
| 143 | |
| 144 | print 'deleting file:' |
| 145 | print filepath |
| 146 | try: |
| 147 | os.remove(filepath) |
| 148 | except (OSError, IOError) as e: |
| 149 | # ignore errors |
| 150 | pass |
| 151 | |
| 152 | # update the nodes |
| 153 | for n in nodes_to_update: |
| 154 | print 'updating nodes' |
| 155 | track_id = n.get('track_id') |
| 156 | new_track_id = n.get('new_track_id') |
| 157 | |
| 158 | for t in range(len(tracks)): |
| 159 | track = tracks[t] |
| 160 | |
| 161 | if track.tag == 'key' and track.text == track_id: |
| 162 | print 'found key match' |
| 163 | track.text = new_track_id |
| 164 | elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id: |
| 165 | print 'found dict match' |
| 166 | set_track_attr(track, 'Track ID', new_track_id) |
| 167 | break |
| 168 | |
| 169 | # update our xml tree with the list |
| 170 | tracks_container.clear() |
| 171 | tracks_container.extend(tracks) |
| 172 | |
| 173 | # save the changes we've made |
| 174 | # TODO: fill in a file path for the modified library file |
| 175 | # tree.write('somepath/itunes_library-modified.xml') |
| 176 |