gist:c7393693654848eb834af46b09f55f04 的修订

1

+

import json

2

+

import os

3

+

import re

4

+

from xml.etree import ElementTree

5

+

6

+

def get_track_attr (track, attr_name):

7

+

# iterate over each attr to find the filename

8

+

attributes = list(track)

9

+

for a in range(len(attributes)):

10

+

attr = attributes[a]

11

+

12

+

if attr.text == attr_name:

13

+

return attributes[a+1].text

14

+

15

+

def set_track_attr (track, attr_name, value):

16

+

# iterate over each attr to find the filename

17

+

attributes = list(track)

18

+

for a in range(len(attributes)):

19

+

attr = attributes[a]

20

+

21

+

if attr.text == attr_name:

22

+

attributes[a+1].text = value

23

+

24

+

def are_tracks_duplicate (track, track2):

25

+

return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')

26

+

27

+

# dump the xml file into an ElementTree object

28

+

# TODO: make this a real path to your iTunes Media Library.xml

29

+

with open('somepath/itunes_library.xml', 'rw') as f:

30

+

tree = ElementTree.parse(f)

31

+

32

+

root = tree.getroot()

33

+

tracks_container = root.find('dict').find('dict')

34

+

tracks = list(tracks_container)

35

+

36

+

# init our lists so that we can separate modifying our tree from analyzing

37

+

nodes_to_remove = []

38

+

nodes_to_update = []

39

+

40

+

# Iterate over track list in reverse (because we'll be removing items)

41

+

for t in reversed(xrange(len(tracks))):

42

+

# only look at the dictionaries

43

+

if (tracks[t].tag == 'dict'):

44

+

track = {

45

+

'track_id': get_track_attr(tracks[t], 'Track ID'),

46

+

'filesize': get_track_attr(tracks[t], 'Size'),

47

+

'artist': get_track_attr(tracks[t], 'Artist'),

48

+

'name': get_track_attr(tracks[t], 'Name'),

49

+

}

50

+

51

+

print track.get('track_id')

52

+

53

+

# make sure this track hasn't already been flagged for removal

54

+

if track.get('track_id') not in nodes_to_remove:

55

+

print 'this track has not already been flagged for removal'

56

+

57

+

# check if track_id is different and name and total time match

58

+

for t2 in range(len(tracks)):

59

+

# only look at the dictionaries

60

+

if (tracks[t2].tag == 'dict'):

61

+

track2 = {

62

+

'track_id': get_track_attr(tracks[t2], 'Track ID'),

63

+

'filesize': get_track_attr(tracks[t2], 'Size'),

64

+

'artist': get_track_attr(tracks[t], 'Artist'),

65

+

'name': get_track_attr(tracks[t2], 'Name'),

66

+

}

67

+

68

+

# find duplicates

69

+

if (are_tracks_duplicate(track, track2)):

70

+

print 'found duplicate!'

71

+

# then compare sizes

72

+

if int(track.get('filesize')) <= int(track2.get('filesize')):

73

+

# the new one is smaller, remove the old one

74

+

nodes_to_remove.append(track2.get('track_id'))

75

+

76

+

# track id inheritance

77

+

if int(track.get('track_id')) > int(track2.get('track_id')):

78

+

# mark the new track to update id

79

+

nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})

80

+

81

+

# remove from list

82

+

tracks.remove(tracks[t2])

83

+

else:

84

+

# mark the old track for removal

85

+

nodes_to_remove.append(track.get('track_id'))

86

+

87

+

# track id inheritance

88

+

if int(track.get('track_id')) < int(track2.get('track_id')):

89

+

# mark the new track to update id

90

+

nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})

91

+

92

+

# remove from list

93

+

tracks.remove(tracks[t])

94

+

95

+

print len(nodes_to_remove)

96

+

break

97

+

98

+

# # save our list of track ids to delete

99

+

# with open('somepath/tracks_to_delete.json', 'wb') as outfile:

100

+

# json.dump(nodes_to_remove, outfile)

101

+

#

102

+

# # save our list of track dictionaries for swapping

103

+

# with open('somepath/tracks_to_update.json', 'wb') as outfile:

104

+

# json.dump(nodes_to_update, outfile)

105

+

106

+

107

+

# # load our track ids

108

+

# with open('somepath/tracks_to_delete.json', 'r') as outfile:

109

+

# nodes_to_remove = json.load(outfile)

110

+

#

111

+

# with open('somepath/tracks_to_update.json', 'r') as outfile:

112

+

# nodes_to_update = json.load(outfile)

113

+

114

+

# delete the old tracks

115

+

# reverse the list so we don't run into index out of bounds problems

116

+

117

+

# we modified tracks previously, let's start fresh

118

+

tracks = list(tracks_container)

119

+

120

+

for t in reversed(xrange(len(tracks))):

121

+

track = tracks[t]

122

+

123

+

# remove the track id key object

124

+

if track.tag == 'key' and track.text in nodes_to_remove:

125

+

print 'removing track key'

126

+

print track.text

127

+

# remove from xml

128

+

tracks.remove(track)

129

+

# remove the dict

130

+

elif track.tag == 'dict':

131

+

track_dict = {

132

+

'track_id': get_track_attr(track, 'Track ID'),

133

+

'location': get_track_attr(track, 'Location'),

134

+

}

135

+

136

+

if track_dict.get('track_id') in nodes_to_remove:

137

+

print 'removing from xml tree'

138

+

# remove from xml

139

+

tracks.remove(track)

140

+

141

+

# get the filenames

142

+

filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')

143

+

144

+

print 'deleting file:'

145

+

print filepath

146

+

try:

147

+

os.remove(filepath)

148

+

except (OSError, IOError) as e:

149

+

# ignore errors

150

+

pass

151

+

152

+

# update the nodes

153

+

for n in nodes_to_update:

154

+

print 'updating nodes'

155

+

track_id = n.get('track_id')

156

+

new_track_id = n.get('new_track_id')

157

+

158

+

for t in range(len(tracks)):

159

+

track = tracks[t]

160

+

161

+

if track.tag == 'key' and track.text == track_id:

162

+

print 'found key match'

163

+

track.text = new_track_id

164

+

elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:

165

+

print 'found dict match'

166

+

set_track_attr(track, 'Track ID', new_track_id)

167

+

break

168

+

169

+

# update our xml tree with the list

170

+

tracks_container.clear()

171

+

tracks_container.extend(tracks)

172

+

173

+

# save the changes we've made

174

+

# TODO: fill in a file path for the modified library file

175

+

# tree.write('somepath/itunes_library-modified.xml')

curiouser / gist:c7393693654848eb834af46b09f55f04

Winston Hoy 修订了这个 Gist 9 years ago. 转到此修订

		@@ -0,0 +1,175 @@
1	+	import json
2	+	import os
3	+	import re
4	+	from xml.etree import ElementTree
5	+
6	+	def get_track_attr (track, attr_name):
7	+	# iterate over each attr to find the filename
8	+	attributes = list(track)
9	+	for a in range(len(attributes)):
10	+	attr = attributes[a]
11	+
12	+	if attr.text == attr_name:
13	+	return attributes[a+1].text
14	+
15	+	def set_track_attr (track, attr_name, value):
16	+	# iterate over each attr to find the filename
17	+	attributes = list(track)
18	+	for a in range(len(attributes)):
19	+	attr = attributes[a]
20	+
21	+	if attr.text == attr_name:
22	+	attributes[a+1].text = value
23	+
24	+	def are_tracks_duplicate (track, track2):
25	+	return track.get('track_id') != track2.get('track_id') and track.get('name') == track2.get('name') and track.get('artist') == track2.get('artist')
26	+
27	+	# dump the xml file into an ElementTree object
28	+	# TODO: make this a real path to your iTunes Media Library.xml
29	+	with open('somepath/itunes_library.xml', 'rw') as f:
30	+	tree = ElementTree.parse(f)
31	+
32	+	root = tree.getroot()
33	+	tracks_container = root.find('dict').find('dict')
34	+	tracks = list(tracks_container)
35	+
36	+	# init our lists so that we can separate modifying our tree from analyzing
37	+	nodes_to_remove = []
38	+	nodes_to_update = []
39	+
40	+	# Iterate over track list in reverse (because we'll be removing items)
41	+	for t in reversed(xrange(len(tracks))):
42	+	# only look at the dictionaries
43	+	if (tracks[t].tag == 'dict'):
44	+	track = {
45	+	'track_id': get_track_attr(tracks[t], 'Track ID'),
46	+	'filesize': get_track_attr(tracks[t], 'Size'),
47	+	'artist': get_track_attr(tracks[t], 'Artist'),
48	+	'name': get_track_attr(tracks[t], 'Name'),
49	+	}
50	+
51	+	print track.get('track_id')
52	+
53	+	# make sure this track hasn't already been flagged for removal
54	+	if track.get('track_id') not in nodes_to_remove:
55	+	print 'this track has not already been flagged for removal'
56	+
57	+	# check if track_id is different and name and total time match
58	+	for t2 in range(len(tracks)):
59	+	# only look at the dictionaries
60	+	if (tracks[t2].tag == 'dict'):
61	+	track2 = {
62	+	'track_id': get_track_attr(tracks[t2], 'Track ID'),
63	+	'filesize': get_track_attr(tracks[t2], 'Size'),
64	+	'artist': get_track_attr(tracks[t], 'Artist'),
65	+	'name': get_track_attr(tracks[t2], 'Name'),
66	+	}
67	+
68	+	# find duplicates
69	+	if (are_tracks_duplicate(track, track2)):
70	+	print 'found duplicate!'
71	+	# then compare sizes
72	+	if int(track.get('filesize')) <= int(track2.get('filesize')):
73	+	# the new one is smaller, remove the old one
74	+	nodes_to_remove.append(track2.get('track_id'))
75	+
76	+	# track id inheritance
77	+	if int(track.get('track_id')) > int(track2.get('track_id')):
78	+	# mark the new track to update id
79	+	nodes_to_update.append({'track_id': track.get('track_id'), 'new_track_id': track2.get('track_id')})
80	+
81	+	# remove from list
82	+	tracks.remove(tracks[t2])
83	+	else:
84	+	# mark the old track for removal
85	+	nodes_to_remove.append(track.get('track_id'))
86	+
87	+	# track id inheritance
88	+	if int(track.get('track_id')) < int(track2.get('track_id')):
89	+	# mark the new track to update id
90	+	nodes_to_update.append({'track_id': track2.get('track_id'), 'new_track_id': track.get('track_id')})
91	+
92	+	# remove from list
93	+	tracks.remove(tracks[t])
94	+
95	+	print len(nodes_to_remove)
96	+	break
97	+
98	+	# # save our list of track ids to delete
99	+	# with open('somepath/tracks_to_delete.json', 'wb') as outfile:
100	+	# json.dump(nodes_to_remove, outfile)
101	+	#
102	+	# # save our list of track dictionaries for swapping
103	+	# with open('somepath/tracks_to_update.json', 'wb') as outfile:
104	+	# json.dump(nodes_to_update, outfile)
105	+
106	+
107	+	# # load our track ids
108	+	# with open('somepath/tracks_to_delete.json', 'r') as outfile:
109	+	# nodes_to_remove = json.load(outfile)
110	+	#
111	+	# with open('somepath/tracks_to_update.json', 'r') as outfile:
112	+	# nodes_to_update = json.load(outfile)
113	+
114	+	# delete the old tracks
115	+	# reverse the list so we don't run into index out of bounds problems
116	+
117	+	# we modified tracks previously, let's start fresh
118	+	tracks = list(tracks_container)
119	+
120	+	for t in reversed(xrange(len(tracks))):
121	+	track = tracks[t]
122	+
123	+	# remove the track id key object
124	+	if track.tag == 'key' and track.text in nodes_to_remove:
125	+	print 'removing track key'
126	+	print track.text
127	+	# remove from xml
128	+	tracks.remove(track)
129	+	# remove the dict
130	+	elif track.tag == 'dict':
131	+	track_dict = {
132	+	'track_id': get_track_attr(track, 'Track ID'),
133	+	'location': get_track_attr(track, 'Location'),
134	+	}
135	+
136	+	if track_dict.get('track_id') in nodes_to_remove:
137	+	print 'removing from xml tree'
138	+	# remove from xml
139	+	tracks.remove(track)
140	+
141	+	# get the filenames
142	+	filepath = track_dict.get('location').replace('file://', '').replace('%20', ' ')
143	+
144	+	print 'deleting file:'
145	+	print filepath
146	+	try:
147	+	os.remove(filepath)
148	+	except (OSError, IOError) as e:
149	+	# ignore errors
150	+	pass
151	+
152	+	# update the nodes
153	+	for n in nodes_to_update:
154	+	print 'updating nodes'
155	+	track_id = n.get('track_id')
156	+	new_track_id = n.get('new_track_id')
157	+
158	+	for t in range(len(tracks)):
159	+	track = tracks[t]
160	+
161	+	if track.tag == 'key' and track.text == track_id:
162	+	print 'found key match'
163	+	track.text = new_track_id
164	+	elif track.tag == 'dict' and get_track_attr(track, 'Track ID') == track_id:
165	+	print 'found dict match'
166	+	set_track_attr(track, 'Track ID', new_track_id)
167	+	break
168	+
169	+	# update our xml tree with the list
170	+	tracks_container.clear()
171	+	tracks_container.extend(tracks)
172	+
173	+	# save the changes we've made
174	+	# TODO: fill in a file path for the modified library file
175	+	# tree.write('somepath/itunes_library-modified.xml')