gist:c7393693654848eb834af46b09f55f04

itunes.py · 6.3 KiB · Python Raw

import json import os import re from xml.etree import ElementTree def get_track_attr (track, attr_name): # iterate over each attr to find the filename attributes = list(track) for a in range(len(attributes)): attr = attributes[a] if attr.text == attr_name: return attributes[a+1].text def set_track_attr (track, attr_name, value): # iterate over each attr to find the filename attributes = list(track) for a in range(len(attributes)): attr = attributes[a] if attr.text == attr_name: attributes[a+1].text = value def are_tracks_duplicate (track, track2): return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist') # dump the xml file into an ElementTree object # TODO: make this a real path to your iTunes Media Library.xml with open('somepath/itunes_library.xml', 'rw') as f: tree = ElementTree.parse(f) root = tree.getroot() tracks_container = root.find('dict').find('dict') tracks = list(tracks_container) # init our lists so that we can separate modifying our tree from analyzing nodes_to_remove = [] nodes_to_update = [] # Iterate over track list in reverse (because we'll be removing items) for t in reversed(xrange(len(tracks))): # only look at the dictionaries if (tracks[t].tag == 'dict'): track = { 'track_id': get_track_attr(tracks[t], 'Track ID'), 'filesize': get_track_attr(tracks[t], 'Size'), 'artist': get_track_attr(tracks[t], 'Artist'), 'name': get_track_attr(tracks[t], 'Name'), } print track.get('track_id') # make sure this track hasn't already been flagged for removal if track.get('track_id') not in nodes_to_remove: print 'this track has not already been flagged for removal' # check if track_id is different and name and total time match for t2 in range(len(tracks)): # only look at the dictionaries if (tracks[t2].tag == 'dict'): track2 = { 'track_id': get_track_attr(tracks[t2], 'Track ID'), 'filesize': get_track_attr(tracks[t2], 'Size'), 'artist': get_track_attr(tracks[t], 'Artist'), 'name': get_track_attr(tracks[t2], 'Name'), } # find duplicates if (are_tracks_duplicate(track, track2)): print 'found duplicate!' # then compare sizes if int(track.get('filesize')) <= int(track2.get('filesize')): # the new one is smaller, remove the old one nodes_to_remove.append(track2.get('track_id')) # track id inheritance if int(track.get('track_id')) > int(track2.get('track_id')): # mark the new track to update id nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')}) # remove from list tracks.remove(tracks[t2]) else: # mark the old track for removal nodes_to_remove.append(track.get('track_id')) # track id inheritance if int(track.get('track_id')) < int(track2.get('track_id')): # mark the new track to update id nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')}) # remove from list tracks.remove(tracks[t]) print len(nodes_to_remove) break # # save our list of track ids to delete # with open('somepath/tracks_to_delete.json', 'wb') as outfile: # json.dump(nodes_to_remove, outfile) # # # save our list of track dictionaries for swapping # with open('somepath/tracks_to_update.json', 'wb') as outfile: # json.dump(nodes_to_update, outfile) # # load our track ids # with open('somepath/tracks_to_delete.json', 'r') as outfile: # nodes_to_remove = json.load(outfile) # # with open('somepath/tracks_to_update.json', 'r') as outfile: # nodes_to_update = json.load(outfile) # delete the old tracks # reverse the list so we don't run into index out of bounds problems # we modified tracks previously, let's start fresh tracks = list(tracks_container) for t in reversed(xrange(len(tracks))): track = tracks[t] # remove the track id key object if track.tag == 'key' and track.text in nodes_to_remove: print 'removing track key' print track.text # remove from xml tracks.remove(track) # remove the dict elif track.tag == 'dict': track_dict = { 'track_id': get_track_attr(track, 'Track ID'), 'location': get_track_attr(track, 'Location'), } if track_dict.get('track_id') in nodes_to_remove: print 'removing from xml tree' # remove from xml tracks.remove(track) # get the filenames filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ') print 'deleting file:' print filepath try: os.remove(filepath) except (OSError, IOError) as e: # ignore errors pass # update the nodes for n in nodes_to_update: print 'updating nodes' track_id = n.get('track_id') new_track_id = n.get('new_track_id') for t in range(len(tracks)): track = tracks[t] if track.tag == 'key' and track.text == track_id: print 'found key match' track.text = new_track_id elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id: print 'found dict match' set_track_attr(track, 'Track ID', new_track_id) break # update our xml tree with the list tracks_container.clear() tracks_container.extend(tracks) # save the changes we've made # TODO: fill in a file path for the modified library file # tree.write('somepath/itunes_library-modified.xml')

1	import json
2	import os
3	import re
4	from xml.etree import ElementTree
5
6	def get_track_attr (track, attr_name):
7	# iterate over each attr to find the filename
8	attributes = list(track)
9	for a in range(len(attributes)):
10	attr = attributes[a]
11
12	if attr.text == attr_name:
13	return attributes[a+1].text
14
15	def set_track_attr (track, attr_name, value):
16	# iterate over each attr to find the filename
17	attributes = list(track)
18	for a in range(len(attributes)):
19	attr = attributes[a]
20
21	if attr.text == attr_name:
22	attributes[a+1].text = value
23
24	def are_tracks_duplicate (track, track2):
25	return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')
26
27	# dump the xml file into an ElementTree object
28	# TODO: make this a real path to your iTunes Media Library.xml
29	with open('somepath/itunes_library.xml', 'rw') as f:
30	tree = ElementTree.parse(f)
31
32	root = tree.getroot()
33	tracks_container = root.find('dict').find('dict')
34	tracks = list(tracks_container)
35
36	# init our lists so that we can separate modifying our tree from analyzing
37	nodes_to_remove = []
38	nodes_to_update = []
39
40	# Iterate over track list in reverse (because we'll be removing items)
41	for t in reversed(xrange(len(tracks))):
42	# only look at the dictionaries
43	if (tracks[t].tag == 'dict'):
44	track = {
45	'track_id': get_track_attr(tracks[t], 'Track ID'),
46	'filesize': get_track_attr(tracks[t], 'Size'),
47	'artist': get_track_attr(tracks[t], 'Artist'),
48	'name': get_track_attr(tracks[t], 'Name'),
49	}
50
51	print track.get('track_id')
52
53	# make sure this track hasn't already been flagged for removal
54	if track.get('track_id') not in nodes_to_remove:
55	print 'this track has not already been flagged for removal'
56
57	# check if track_id is different and name and total time match
58	for t2 in range(len(tracks)):
59	# only look at the dictionaries
60	if (tracks[t2].tag == 'dict'):
61	track2 = {
62	'track_id': get_track_attr(tracks[t2], 'Track ID'),
63	'filesize': get_track_attr(tracks[t2], 'Size'),
64	'artist': get_track_attr(tracks[t], 'Artist'),
65	'name': get_track_attr(tracks[t2], 'Name'),
66	}
67
68	# find duplicates
69	if (are_tracks_duplicate(track, track2)):
70	print 'found duplicate!'
71	# then compare sizes
72	if int(track.get('filesize')) <= int(track2.get('filesize')):
73	# the new one is smaller, remove the old one
74	nodes_to_remove.append(track2.get('track_id'))
75
76	# track id inheritance
77	if int(track.get('track_id')) > int(track2.get('track_id')):
78	# mark the new track to update id
79	nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})
80
81	# remove from list
82	tracks.remove(tracks[t2])
83	else:
84	# mark the old track for removal
85	nodes_to_remove.append(track.get('track_id'))
86
87	# track id inheritance
88	if int(track.get('track_id')) < int(track2.get('track_id')):
89	# mark the new track to update id
90	nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})
91
92	# remove from list
93	tracks.remove(tracks[t])
94
95	print len(nodes_to_remove)
96	break
97
98	# # save our list of track ids to delete
99	# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
100	# json.dump(nodes_to_remove, outfile)
101	#
102	# # save our list of track dictionaries for swapping
103	# with open('somepath/tracks_to_update.json', 'wb') as outfile:
104	# json.dump(nodes_to_update, outfile)
105
106
107	# # load our track ids
108	# with open('somepath/tracks_to_delete.json', 'r') as outfile:
109	# nodes_to_remove = json.load(outfile)
110	#
111	# with open('somepath/tracks_to_update.json', 'r') as outfile:
112	# nodes_to_update = json.load(outfile)
113
114	# delete the old tracks
115	# reverse the list so we don't run into index out of bounds problems
116
117	# we modified tracks previously, let's start fresh
118	tracks = list(tracks_container)
119
120	for t in reversed(xrange(len(tracks))):
121	track = tracks[t]
122
123	# remove the track id key object
124	if track.tag == 'key' and track.text in nodes_to_remove:
125	print 'removing track key'
126	print track.text
127	# remove from xml
128	tracks.remove(track)
129	# remove the dict
130	elif track.tag == 'dict':
131	track_dict = {
132	'track_id': get_track_attr(track, 'Track ID'),
133	'location': get_track_attr(track, 'Location'),
134	}
135
136	if track_dict.get('track_id') in nodes_to_remove:
137	print 'removing from xml tree'
138	# remove from xml
139	tracks.remove(track)
140
141	# get the filenames
142	filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')
143
144	print 'deleting file:'
145	print filepath
146	try:
147	os.remove(filepath)
148	except (OSError, IOError) as e:
149	# ignore errors
150	pass
151
152	# update the nodes
153	for n in nodes_to_update:
154	print 'updating nodes'
155	track_id = n.get('track_id')
156	new_track_id = n.get('new_track_id')
157
158	for t in range(len(tracks)):
159	track = tracks[t]
160
161	if track.tag == 'key' and track.text == track_id:
162	print 'found key match'
163	track.text = new_track_id
164	elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
165	print 'found dict match'
166	set_track_attr(track, 'Track ID', new_track_id)
167	break
168
169	# update our xml tree with the list
170	tracks_container.clear()
171	tracks_container.extend(tracks)
172
173	# save the changes we've made
174	# TODO: fill in a file path for the modified library file
175	# tree.write('somepath/itunes_library-modified.xml')
176