-
Notifications
You must be signed in to change notification settings - Fork 0
/
Call_Sync_Plays.py
401 lines (342 loc) · 15.6 KB
/
Call_Sync_Plays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# SYNCS PLAY COUNTS BETWEEN ITUNES AND WMP
# IF AN ITUNES PL IS PROVIDED, WILL SEARCH WMP ONLY FOR THOSE SAME FILES (NOT ENTIRE LIBRARY)
# ADDING A LOGIC TO LOCATE MISSING FILES IN PLAYLIST "DEAD"
# IF Srch_dead IS TRUE, WILL TRY TO LOCATE DEAD TRACKS BASED ON A FUZZY MATCH
# THE PURPOSE IS TO UPDATE THE PLAY COUNT FOR TRACKS THAT MATCH A DEAD TRACK
# THE LAST OPTION SHOULD NO LONGER BE NEEDED SINCE I CAN USE THE ITUNES XML FILE
from os.path import exists
from unidecode import unidecode
from Tags import similar_ratio
from Tags import Genre_is_live
import Read_PL
import WMP_Read_PL as WMP
import pandas as pd
import Files
# CALLS Read_PL FUNCTION
col_names = ["Arq", "Plays", "ID"]
# ONLY USED TO DEBUG, NOT BEING USED ANYMORE
def Save(df,output="iTunes_vs_WMP.xlsx"):
# CREATES A NEW COLUMN WITH THE FILE NAME WO/ THE PATH
# RENAME THE COLUMNS HEADERS
if "Arq" in df.columns:
df.loc[:, "File"] = df["Arq"].apply(Files.file_w_ext)
# df = df.rename(columns={"Arq": "Location" })
# SAVE TO EXCEL FILE:
file_nm = "D:\\iTunes\\Excel\\" + output
# save the dataframe to an Excel file
df.to_excel(file_nm, index=False)
# SEARCHES FOR FILES IN THE D:\MP3 FOLDER USING ART+TITLE
# RATIO IS THE MATCH RATIO
def find_files(file_list, substring):
match_files = [file for file in file_list if substring in file]
return match_files
# GIVEN A DF, SEARCHES MISSING FILES IN THE MP3 DIRECTORY
# THE OUTPUT IS iTUNES PL TO BE READ
def Search_missing(App, PLs, df, thres):
Art = [x for x in df["Miss_Art"]]
Title = [x for x in df["Miss_Title"]]
Miss_ID = [x for x in df["Miss_ID"]]
nbr_files = len(Art)
# LISTA DE GREATES HITS
dir_path = "D:\\MP3"
print("\nBuilding list of mp3 files in",dir_path,"(this may take a while...)")
filelist = Files.get_Win_files(dir_path, ".mp3")
# CREATES A PLAYLIST WITH THE RESULTS
PL_nm = "Find_dead"
Tag_PL = Read_PL.Cria_PL(PL_nm,recria="Y")
res = {}
# LOCATION IS USED SINCE "ARQ" IS LOWER CASE
res["Arq"] = []
res["Ratio"] = []
res["Miss_Art"] = []
res["Miss_Title"] = []
print()
for i in range(nbr_files):
# MISS TRACK METADATA
miss_track = App.GetITObjectByID(*Miss_ID[i])
try:
max_ratio_tag = float(miss_track.comment)
except ValueError:
max_ratio_tag = 0
if max_ratio_tag > thres or max_ratio_tag == 0:
# CHANGE ITUNES TRACK PLAY COUNT
# New_loc = Move(Arq[i],Art[i],Genre[i])
file_to_srch = unidecode(Art[i].lower()+" - "+Title[i].lower())
# Split the string by spaces and count the number of words
print("\nSearching",i+1,"of ",nbr_files,":",file_to_srch)
# Create a set with elements longer than 1 character
file_to_srch_set = set(word for word in file_to_srch.split(" ") if len(word)>1)
word_count = len(file_to_srch_set)
# Create a subset where some word in srch_str matches some word in the 2nd element
sublist = [tup for tup in filelist if sum(word in file_to_srch_set for word in set(tup[1].split(" ")))>=word_count/2]
sublist_len = len(sublist)
print("List len:",sublist_len)
found_file = "@"
cnt = 0
max_ratio = 0
# MATCH DOESN"T NEED TO BE EXACT
while (found_file):
found_file = False
for file, normal_file in sublist:
dict = similar_ratio(file_to_srch,normal_file,thres = thres)
ratio = dict["ratio"]
match = dict["match"]
max_ratio = max(max_ratio, ratio)
if match and file not in res["Arq"]:
cnt = cnt + 1
found_file = file
print("\tFound:",found_file,"-- Ratio",ratio)
res["Arq"].append(found_file)
res["Ratio"].append(ratio)
res["Miss_Art"].append(Art[i])
res["Miss_Title"].append(Title[i])
break
# UPDT MAX_RATIO TAG
miss_track.comment = max_ratio
# Convert the list to a DataFrame
df_res = pd.DataFrame(res)
# Sort the DataFrame by columns A, B, and X in descending order
df_res = df_res.sort_values(by=['Miss_Art', 'Miss_Title', 'Ratio'], ascending=[True, True, False])
# MANAGE DUPES
start_rows = df_res.shape[0]
# Drop duplicates based on columns A and B, keeping the first occurrence with the greatest X
df_res = df_res.drop_duplicates(subset=['Miss_Art', 'Miss_Title'], keep='first')
end_rows = df_res.shape[0]
print("\nThe missing match df has",start_rows,"tracks before deduping (",end_rows,"after)")
# CREATE RESULTING PL ONLY WITH DEDUPED FILES
Arq = [x for x in df_res["Arq"]]
# ADDS FOUND FILES TO PL
for i in range(len(Arq)):
Read_PL.Add_file_to_PL(PLs,PL_nm,Arq[i])
# MAKES ARQ LOWER CASE
if len(Arq)>0:
df_res["Arq"] = df_res["Arq"].str.lower()
# RETURNS A DF
return df_res
# THIS MODULE SETS ARQ TO LOWER CASE FOR THE MERGER
# IT ALSO DEDUPES THE DUPLICATE RECORDS
def df_dedupe(source,df):
# LOWERCASE THE FILE NAME
df["Arq"] = df["Arq"].str.lower()
# ADDS KEY COL. TO DF
df["max_Plays_" + source] = df.groupby("Arq")["Plays"].transform("max")
start_rows = df.shape[0]
# Eliminate duplicate records based on "Arq" (subset=df["Arq"].str.lower() also works)
df_dedupe = df.drop_duplicates(subset="Arq", keep="first")
end_rows = df_dedupe.shape[0]
print("\nThe",source,"df has",start_rows,"tracks before deduping (",end_rows,"after)")
dict = {}
dict["start_rows"] = start_rows
dict["end_rows"] = end_rows
dict["DF"] = df_dedupe
return dict
# DIFFERENCE BT 2 LENTGHS
def Diff_secs(t1,t2):
diff = Read_PL.time_to_sec(t1)-Read_PL.time_to_sec(t2)
return abs(diff)
# MAIN CODE
# Srch_dead IS USED FOR DEAD TRACKS (NEEDS TO SCAN WHOLE LIBRARY TO BE ABLE TO DELETE TRACKS)
def Sync_plays(PL_name=None,PL_nbr=None,Do_lib=False,rows=None,Srch_dead=0,thres=0.75):
# CALLS Read_PL FUNCTION ,Do_lib=True,rows=10
if Srch_dead:
Do_lib = True
# CALLS Read_PL FUNCTION
iTu_lib_dict = Read_PL.Read_lib_miss(rows=rows)
iTu_App = iTu_lib_dict["App"]
iTu_lib_df = iTu_lib_dict["DF"]
PLs = iTu_lib_dict["PLs"]
print("\nNumber of missing found:",iTu_lib_df.shape[0])
# IF TO RUN ON PL "DEAD", WILL SEARCH ACTUAL LOCATION OF THE MISSING FILES FIRST
miss_df = Search_missing(iTu_App, PLs, iTu_lib_df, thres)
# ONLY IN THIS CASE WE NEED TO ADD THE LENGTH OF THE TRACK TO VERIFY ACCURACY
iTu_col_names = col_names[:]
iTu_col_names.append("Len")
iTu_dict = Read_PL.Read_PL(iTu_col_names,PL_name="Find_missing",rows=None,Modify_cols=False,Len_type="char")
else:
iTu_dict = Read_PL.Read_PL(col_names,PL_name=PL_name,PL_nbr=PL_nbr,Do_lib=Do_lib,rows=rows,Modify_cols=False)
# ASSIGNS VARS
iTu_App = iTu_dict["App"]
# PLs = iTu_dict["PLs"]
iTu_df = iTu_dict["DF"]
# MAKES A COPY OF THE ORIGINAL PATH LIST ("ARQ")
iTu_df["Location"] = iTu_df["Arq"].copy()
# LOWERCASE THE FILE NAME
iTu_df["Arq"] = iTu_df["Arq"].str.lower()
# LEFT JOINS RESULTS WITH DEAD TRACKS (IT SHOULD NOT MISS ANY RECORDS)
if Srch_dead and iTu_df.shape[0]>0:
# Merge 3 DataFrames
iTu_df = iTu_df.merge(miss_df, on="Arq", how="left").merge(iTu_lib_df, on=["Miss_Art", "Miss_Title"], how="left")
# DROPS DUPES
dict = df_dedupe("iTunes", iTu_df)
iTu_df = dict["DF"]
iTu_start_rows = dict["start_rows"]
iTu_end_rows = dict["end_rows"]
# XLSX REPORT
# Save(iTu_df,output="Miss_files_merged2.xlsx")
# WMP
miss_files = []
if Do_lib:
print("\nReading the WMP playlist")
wmp_dict = WMP.Read_WMP_PL(col_names,PL_name=PL_name,PL_nbr=PL_nbr,Do_lib=Do_lib,rows=rows,Modify_cols=False)
else:
Arq = [x for x in iTu_df["Location"] if exists(x)]
print("\nReading the tracks from the iTunes playlist in WMP")
wmp_dict = WMP.Read_WMP_MC(col_names,Arq,Modify_cols=True)
miss_files = wmp_dict["Missing"]
# WMP ASSIGNING
wmp_df = wmp_dict["DF"]
# USED IF INPUT IS THE WHOLE LIBRARY
WMP_lib = wmp_dict["Lib"]
# USED IF INPUT IS A PLAYLIST (BASED ON iTunes)
WMP_player = wmp_dict["WMP"]
# CHANGE DF-ELIMINATE DUPES
dict = df_dedupe("WMP",wmp_df)
wmp_df = dict["DF"]
wmp_start_rows = dict["start_rows"]
wmp_end_rows = dict["end_rows"]
wmp_df = wmp_df.rename(columns={"Pos": "WMP_Pos"})
# JOIN THE DATAFRAMES [["Arq", "max_Plays_iTunes", "ID", "Location"]]
df = iTu_df.merge(wmp_df[["Arq", "max_Plays_WMP", "WMP_Pos"]], on="Arq", how="inner")
# DROP SOME COLS. (FILE IS CREATED ONLY WHEN SAVING THE DF TO XLSX)
# df = df.drop(columns=["Plays","Art_sort","Title_sort","Priority", "Pos", "Plays"])
# XLSX REPORT
# Save(df,output="Miss_files_merged.xlsx")
# LEFT JOINS RESULTS IF DEAD TRACKS (IT SHOULD NOT MISS ANY RECORDS)
if Srch_dead and df.shape[0]>0:
Miss_ID = [x for x in df["Miss_ID"]]
Miss_plays = [x for x in df["Miss_Plays"]]
Miss_Len = [x for x in df["Miss_Len"]]
Len = [x for x in df["Len"]]
Ratio = [x for x in df["Ratio"]]
#Miss_Art = [x for x in df["Miss_Art"]]
#Miss_Title = [x for x in df["Miss_Title"]]
merged_rows = df.shape[0]
print("\nThe merged df has",df.shape[0],"tracks")
# SELECT ONLY RELEVANT ROWS (IN CASE IT"S NOT THE DEAD TRACKS PL)
if not Srch_dead:
df = df[df["max_Plays_iTunes"] != df["max_Plays_WMP"]]
diff_plays = df.shape[0]
if not Srch_dead:
print("\nThe merged df has",df.shape[0],"tracks where the WMP and iTunes play counts differ")
# POPULATES LISTS
Arq = [x for x in df["Location"]]
ID = [x for x in df["ID"]]
WMP_Pos = [x for x in df["WMP_Pos"]]
iTunes_plays = [x for x in df["max_Plays_iTunes"]]
WMP_plays = [x for x in df["max_Plays_WMP"]]
nbr_files = len(Arq)
print()
wmp_cnt = 0
iTu_cnt = 0
del_cnt = 0
# COMPARE AND CHANGE THE FILES
for i in range(nbr_files):
# TRACK METADATA
cols = ["Art","Title","Genre"]
iTu_track = iTu_App.GetITObjectByID(*ID[i])
track_dict = Read_PL.iTunes_tag_dict(iTu_track, cols)
track_meta = track_dict["Art"] +" - "+ track_dict["Title"]
# CHANGE TAGS, BOTH ITUNES AND WMP CAN BE UPDATED AT THE SAME TIME NOW (NO LONGER XOR)
# STARTS
if Srch_dead:
# MISS TRACK METADATA
miss_track = iTu_App.GetITObjectByID(*Miss_ID[i])
miss_dict = Read_PL.iTunes_tag_dict(miss_track,cols)
miss_meta = miss_dict["Art"] +" - "+ miss_dict["Title"]
# CHECKS
Live_match = Genre_is_live(track_dict["Genre"])==Genre_is_live(miss_dict["Genre"])
# CHECK SIMILARITY AGAIN (BASED ON TAGS INSTEAD OF FILE NAME)
dict = similar_ratio(track_meta,miss_meta,thres = thres)
ratio = dict["ratio"]
match = dict["match"]
prt1 = "(match ratio "+ str(Ratio[i]) + ")"
if Ratio[i] != ratio:
prt1 = prt1 + "(Updated from "+ str(Ratio[i])+")"
Ratio[i] = ratio
prt2 = "(miss count: "+ str(Miss_plays[i]) + ")"
else:
Updt = True
prt1 = ""
prt2 = ""
# MAXIMUM OF THE 3 POSSIBLE SOURCES
max_plays = max(iTunes_plays[i], WMP_plays[i], Miss_plays[i] if Srch_dead else 0)
# MESSAGES
print("\nChecking file",i+1,"of",nbr_files,prt1)
print("Current:",track_meta,"--Length: "+Len[i] if Srch_dead else "")
if Srch_dead:
print("Missing:",miss_meta,"--Length:",Miss_Len[i])
print("WMP plays:",WMP_plays[i],"// iTunes plays:",iTunes_plays[i],prt2)
# UPDATE OR NOT?
if Srch_dead:
if not (Live_match and match):
print("Not updating...")
user_inp = "N"
elif Len[i] == Miss_Len[i]:
print("Lengths match, updating...")
user_inp = "Y"
elif Ratio[i]>0.849:
print("\nMatch ratio is high, updating...")
user_inp = "Y"
else:
user_inp = input("\nPress any key to update/delete (N to not): ")
# WILL UPDATE?
Updt = (user_inp.upper() != "N")
iTu_updt_cnt = Updt and iTunes_plays[i]<max_plays
WMP_updt_cnt = Updt and WMP_plays[i]<max_plays
if iTu_updt_cnt or WMP_updt_cnt:
print("Updating counts:","iTunes" if iTu_updt_cnt else "","WMP" if WMP_updt_cnt else "")
else:
print("Not updating counts")
# SETS COUNTS
# ITUNES COUNTS
if 0 <= iTunes_plays[i] < max_plays and Updt:
# CHANGE ITUNES TRACK PLAY COUNT
print("Doublechecking iTunes plays:",iTu_track.PlayedCount,"// Changing...")
iTu_track.PlayedCount = max_plays
iTu_cnt = iTu_cnt+1
# WMP COUNTS
if 0 <= WMP_plays[i] < max_plays and Updt:
# CHANGE WMP TRACK PLAY COUNT INSTEAD
wmp_cnt = wmp_cnt+1
if Do_lib:
WMP_track = WMP_lib.Item(WMP_Pos[i])
else:
WMP_track = WMP_player.mediaCollection.getByAttribute("SourceURL", Arq[i]).Item(0)
# TRIES TO RETRIEVE PLAYS
plays = WMP_track.getiteminfo("UserPlayCount")
# ENSURES THAT THE FILE IS THE SAME
if WMP_track.getiteminfo("SourceURL").lower() == Arq[i].lower():
print("Doublechecking WMP plays:",plays,"// Changing...", end=" ")
WMP_track.setItemInfo("UserPlayCount", str(max_plays))
print("(doublecheck WMP count:",WMP_track.getiteminfo("UserPlayCount"),")")
# REMOVE DEAD TRACKS AFTER COUNT UPDT
if Srch_dead:
if Updt:
print("\nDeleting dead track")
try:
m = Miss_ID[i]
miss_track = iTu_App.GetITObjectByID(*m)
Read_PL.Remove_track_from_PL(miss_track)
except:
print("Missing track has been deleted already")
else:
print("Dead track has been deleted!")
del_cnt = del_cnt+1
else:
print("Not deleting...")
print("\nUpdated",wmp_cnt,"WMP plays")
print("Updated",iTu_cnt,"iTunes plays")
print("Deleted",del_cnt,"dead iTunes tracks")
print("\nThe iTunes df has",iTu_start_rows,"tracks before deduping (",iTu_end_rows,"after)")
print("\nThe WMP df has",wmp_start_rows,"tracks before deduping (",wmp_end_rows,"after)")
print("\nThe merged df has",merged_rows,"tracks")
if not Srch_dead:
print("\nThe merged df has",diff_plays,"tracks where the WMP and iTunes play counts differ")
# MISSING
miss_nbr = len(miss_files)
print("\nFiles not found in WMP:",miss_nbr)
for i in range(miss_nbr):
print("Missing",i+1,"of",miss_nbr,":",miss_files[i])
print()
# CALLS PROGRAM
Sync_plays(PL_name="AAA", Do_lib=1, Srch_dead=0, thres=0.85, rows=None)