From 8cb79526bb3667e5c9ebd23cd45ab8b39c3db8d4 Mon Sep 17 00:00:00 2001 From: Chono N <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 15:13:38 +0200 Subject: [PATCH 01/18] Update requirements.txt --- requirements.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index ce40165..74a88cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -spotipy>=2.23 -plexapi>=4.15.4 -deezer-python>=6.1.0 +spotipy>=2.24 +plexapi>=4.15.16 +deezer-python>=7.0.0 urllib3>=2.2.2 -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability - +zipp>=3.20.1 # not directly required, pinned to avoid a vulnerability From 7c5d1103a7af70b29aa51babbe990c01e2290311 Mon Sep 17 00:00:00 2001 From: Chono N <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 15:50:40 +0200 Subject: [PATCH 02/18] Update plex.py Use the DB_PATH environment variable, with a default fallback --- plexist/modules/plex.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index f38e5ba..475abef 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -1,3 +1,4 @@ +import os import sqlite3 import logging import pathlib @@ -12,7 +13,7 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO) -conn = sqlite3.connect('plexist.db') +DB_PATH = os.getenv('DB_PATH', 'plexist.db') def initialize_db(): conn = sqlite3.connect('plexist.db') @@ -195,4 +196,4 @@ def update_or_create_plex_playlist( ) def end_session(): - conn.close() \ No newline at end of file + conn.close() From e92b944e5d0e684021dbbc99140e3000f76a324e Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:09:45 +0200 Subject: [PATCH 03/18] Update plex.py: Use the DB_PATH environment variable, with a default fallback. New match func that, starts with a strict matching criteria, including title, artist, and album. If no match is found, it relaxes the album requirement and searches again. If still no match, it uses only the first two words of the title (if available) along with the artist. It uses a scoring system that weights title, artist, and album differently. It also checks for version information in parentheses and factors that into the score. The thresholds for accepting a match are different at each stage, becoming more lenient as we progress. It provides more detailed logging about why a match was accepted at a particular stage. Changed the initial check from if tracks is None to if not tracks. This will handle both None and empty lists. Moved the plex.playlist(playlist.name) call inside the try block. This ensures we're not trying to access a playlist that doesn't exist. Improved error handling by catching specific exceptions and logging the error messages. Added more detailed logging, including success messages for updating description and poster. Changed some logging.info calls to logging.error or logging.warning where appropriate. Simplified the logic for handling missing tracks CSV files. Ensured consistent use of string formatting in logging calls. --- plexist/modules/plex.py | 195 ++++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 87 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 475abef..734e4dd 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -10,6 +10,7 @@ from plexapi.exceptions import BadRequest, NotFound from plexapi.server import PlexServer from .helperClasses import Playlist, Track, UserInputs +from concurrent.futures import ThreadPoolExecutor logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -30,6 +31,74 @@ def initialize_db(): ''') conn.commit() conn.close() + +MATCH_THRESHOLD = 0.6 + +def _match_single_track(plex, track, year=None, genre=None): + plex_id = get_matched_song(track.title, track.artist, track.album) + if plex_id: + return plex.fetchItem(plex_id), None + + def similarity(a, b): + return SequenceMatcher(None, a.lower(), b.lower()).ratio() + + def search_and_score(query, threshold): + try: + search = plex.search(query, mediatype="track", limit=10) + except BadRequest: + logging.info(f"Failed to search {query} on Plex") + return None, 0 + + best_match = None + best_score = 0 + + for s in search: + score = 0 + score += similarity(s.title, track.title) * 0.4 + score += similarity(s.artist().title, track.artist) * 0.3 + score += similarity(s.album().title, track.album) * 0.2 + + # Check for version in parentheses + if '(' in track.title and '(' in s.title: + version_similarity = similarity( + track.title.split('(')[1].split(')')[0], + s.title.split('(')[1].split(')')[0] + ) + score += version_similarity * 0.1 + + if score > best_score: + best_score = score + best_match = s + + return (best_match, best_score) if best_score >= threshold else (None, 0) + + # Stage 1: Strict matching + query = f"{track.title} {track.artist} {track.album}" + match, score = search_and_score(query, 0.8) + if match: + insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + return match, None + + # Stage 2: Relax album requirement + query = f"{track.title} {track.artist}" + match, score = search_and_score(query, 0.7) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with relaxed album criteria. Score: {score}") + insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + return match, None + + # Stage 3: Further relaxation + words = track.title.split() + if len(words) > 1: + query = f"{' '.join(words[:2])} {track.artist}" + match, score = search_and_score(query, 0.6) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") + insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + return match, None + + logging.info(f"No match found for track {track.title} by {track.artist}.") + return None, track def insert_matched_song(title, artist, album, plex_id): conn = sqlite3.connect('plexist.db') @@ -70,9 +139,6 @@ def _delete_csv(name: str, path: str = "/data") -> None: file = data_folder / f"{name}.csv" file.unlink() - -from concurrent.futures import ThreadPoolExecutor - def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: with ThreadPoolExecutor() as executor: results = list(executor.map(lambda track: _match_single_track(plex, track), tracks)) @@ -80,41 +146,6 @@ def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: missing_tracks = [result[1] for result in results if result[1]] return plex_tracks, missing_tracks -MATCH_THRESHOLD = 0.6 - -def _match_single_track(plex, track, year=None, genre=None): - # Check in local DB first - plex_id = get_matched_song(track.title, track.artist, track.album) - if plex_id: - return plex.fetchItem(plex_id), None - - search = [] - try: - # Combine track title and primary artist for a more refined search - primary_artist = track.artist.split("&")[0].split("ft.")[0].strip() # Get the primary artist - search_query = f"{track.title} {primary_artist}" - search = plex.search(search_query, mediatype="track", limit=5) - except BadRequest: - logging.info("Failed to search %s on Plex", track.title) - best_match = None - best_score = 0 - for s in search: - artist_similarity = SequenceMatcher(None, s.artist().title.lower(), primary_artist.lower()).quick_ratio() - title_similarity = SequenceMatcher(None, s.title.lower(), track.title.lower()).quick_ratio() - album_similarity = SequenceMatcher(None, s.album().title.lower(), track.album.lower()).quick_ratio() - year_similarity = 1 if year and s.year == year else 0 - genre_similarity = SequenceMatcher(None, s.genre.lower(), genre.lower()).quick_ratio() if genre else 0 - combined_score = (artist_similarity * 0.4) + (title_similarity * 0.3) + (album_similarity * 0.2) + (year_similarity * 0.05) + (genre_similarity * 0.05) - if combined_score > best_score: - best_score = combined_score - best_match = s - if best_match and best_score >= MATCH_THRESHOLD: - insert_matched_song(track.title, track.artist, track.album, best_match.ratingKey) - return best_match, None - else: - logging.info(f"No match found for track {track.title} by {track.artist} with a score of {best_score}.") - return None, track - def _update_plex_playlist( plex: PlexServer, available_tracks: List, @@ -128,18 +159,21 @@ def _update_plex_playlist( return plex_playlist -def update_or_create_plex_playlist( - plex: PlexServer, - playlist: Playlist, - tracks: List[Track], - userInputs: UserInputs, -) -> None: - if tracks is None: - logging.error("No tracks provided for playlist %s", playlist.name) - return - available_tracks, missing_tracks = _get_available_plex_tracks(plex, tracks) +def update_or_create_plex_playlist( + plex: PlexServer, + playlist: Playlist, + tracks: List[Track], + userInputs: UserInputs, +) -> None: + if not tracks: # Changed from 'is None' to handle empty lists as well + logging.error("No tracks provided for playlist %s", playlist.name) + return + + available_tracks, missing_tracks = _get_available_plex_tracks(plex, tracks) + if available_tracks: try: + plex_playlist = plex.playlist(playlist.name) plex_playlist = _update_plex_playlist( plex=plex, available_tracks=available_tracks, @@ -148,52 +182,39 @@ def update_or_create_plex_playlist( ) logging.info("Updated playlist %s", playlist.name) except NotFound: - plex.createPlaylist(title=playlist.name, items=available_tracks) + plex_playlist = plex.createPlaylist(title=playlist.name, items=available_tracks) logging.info("Created playlist %s", playlist.name) - plex_playlist = plex.playlist(playlist.name) + if playlist.description and userInputs.add_playlist_description: try: plex_playlist.edit(summary=playlist.description) - except: - logging.info( - "Failed to update description for playlist %s", - playlist.name, - ) + logging.info("Updated description for playlist %s", playlist.name) + except Exception as e: + logging.error("Failed to update description for playlist %s: %s", playlist.name, str(e)) + if playlist.poster and userInputs.add_playlist_poster: try: plex_playlist.uploadPoster(url=playlist.poster) - except: - logging.info( - "Failed to update poster for playlist %s", playlist.name - ) - logging.info( - "Updated playlist %s with summary and poster", playlist.name - ) + logging.info("Updated poster for playlist %s", playlist.name) + except Exception as e: + logging.error("Failed to update poster for playlist %s: %s", playlist.name, str(e)) else: - logging.info( - "No songs for playlist %s were found on plex, skipping the" - " playlist creation", - playlist.name, - ) - if missing_tracks and userInputs.write_missing_as_csv: - try: - _write_csv(missing_tracks, playlist.name) - logging.info("Missing tracks written to %s.csv", playlist.name) - except: - logging.info( - "Failed to write missing tracks for %s, likely permission" - " issue", - playlist.name, - ) - if (not missing_tracks) and userInputs.write_missing_as_csv: - try: - _delete_csv(playlist.name) - logging.info("Deleted old %s.csv", playlist.name) - except: - logging.info( - "Failed to delete %s.csv, likely permission issue", - playlist.name, - ) + logging.warning("No songs for playlist %s were found on Plex, skipping the playlist creation", playlist.name) + + if userInputs.write_missing_as_csv: + if missing_tracks: + try: + _write_csv(missing_tracks, playlist.name) + logging.info("Missing tracks written to %s.csv", playlist.name) + except Exception as e: + logging.error("Failed to write missing tracks for %s: %s", playlist.name, str(e)) + else: + try: + _delete_csv(playlist.name) + logging.info("Deleted old %s.csv as no missing tracks found", playlist.name) + except Exception as e: + logging.error("Failed to delete %s.csv: %s", playlist.name, str(e)) def end_session(): - conn.close() + if 'conn' in locals() or 'conn' in globals(): + conn.close() From c0557e4b7c4ecee73c360f88eaaf4cf75ac8f71f Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:32:10 +0200 Subject: [PATCH 04/18] refine match scoring --- plexist/modules/plex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 734e4dd..022491c 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -91,7 +91,7 @@ def search_and_score(query, threshold): words = track.title.split() if len(words) > 1: query = f"{' '.join(words[:2])} {track.artist}" - match, score = search_and_score(query, 0.6) + match, score = search_and_score(query, 0.4) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") insert_matched_song(track.title, track.artist, track.album, match.ratingKey) From 9ac4c970cf8f7c866bc81c50275692cfb1c47c02 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:34:00 +0200 Subject: [PATCH 05/18] Update docker-compose.yml filename to compose.yaml, update file contents and update readme. --- README.md | 4 ++-- assets/{docker-compose.yml => compose.yaml} | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) rename assets/{docker-compose.yml => compose.yaml} (93%) diff --git a/README.md b/README.md index e2ea232..4b6fb41 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ python3 plexist.py ## Docker Deployment -You can run the image via docker run or docker-compose, choice is yours. Multi-Platform mages are available on [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/). +You can run the image via docker run or docker compose, choice is yours. Multi-Platform mages are available on [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/). Configure the parameters as required. Plex URL and TOKEN are mandatory and the options for your respective streaming service. @@ -83,7 +83,7 @@ docker run -d \ docker-compose.yml should be configured per the below, if you don't user Spotify you can remove the Spotify variables and vice versa for Deezer. -A template is Here: [docker-compose.yml](https://github.com/gyarbij/plexist/blob/main/assets/docker-compose.yml) +A template is Here: [docker-compose.yml](https://github.com/gyarbij/plexist/blob/main/assets/compose.yaml) ``` version: '3.8' diff --git a/assets/docker-compose.yml b/assets/compose.yaml similarity index 93% rename from assets/docker-compose.yml rename to assets/compose.yaml index 3a45305..395d9c4 100644 --- a/assets/docker-compose.yml +++ b/assets/compose.yaml @@ -1,4 +1,3 @@ -version: '3.9' services: plexist: container_name: plexist @@ -16,4 +15,6 @@ services: - SPOTIFY_USER_ID= - DEEZER_USER_ID= - DEEZER_PLAYLIST_ID=https://www.deezer.com/en/playlist/10484834882 + volumes: + - :/app/data restart: unless-stopped \ No newline at end of file From 8d23d1034b2d75b1e81bacdc71f609e6f26b8150 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:39:54 +0200 Subject: [PATCH 06/18] Update SECURITY.md to include supported versions and reporting instructions --- SECURITY.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index bc1de66..5d1d043 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,11 +2,15 @@ ## Supported Versions -The only version supported is the current tag:latest build located at [Docker Hub](https://hub.docker.com/r/gyarbij/wireui/tags) +The only versions supported are tags: +- latest +- dev + +located at [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/tags) ## Reporting a Vulnerability To report a vulnerability, email gyarbij@pm.me -If it's something you can fix, please feel free to open a pull request. +If it's something you can fix, please feel free to open a pull request. \ No newline at end of file From 02bbd4aa029ad67d824511eeb9e11871a9d78eea Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:43:43 +0200 Subject: [PATCH 07/18] remove: global match threshold in favor of type match threshold --- plexist/modules/plex.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 022491c..ea5bc80 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -32,8 +32,6 @@ def initialize_db(): conn.commit() conn.close() -MATCH_THRESHOLD = 0.6 - def _match_single_track(plex, track, year=None, genre=None): plex_id = get_matched_song(track.title, track.artist, track.album) if plex_id: From 0c0cac880e75cf494ffa80bfc1b8523306992bad Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:54:38 +0200 Subject: [PATCH 08/18] chore: Refactor plex.py for improved matching and logging --- plexist/modules/plex.py | 138 +++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 58 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index ea5bc80..1248d2f 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -16,21 +16,28 @@ DB_PATH = os.getenv('DB_PATH', 'plexist.db') -def initialize_db(): - conn = sqlite3.connect('plexist.db') - cursor = conn.cursor() - cursor.execute(''' - CREATE TABLE IF NOT EXISTS plexist ( - title TEXT, - artist TEXT, - album TEXT, - year INTEGER, - genre TEXT, - plex_id INTEGER - ) - ''') - conn.commit() - conn.close() +def initialize_db(): + conn = sqlite3.connect('plexist.db') + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS plexist ( + title TEXT, + artist TEXT, + album TEXT, + year INTEGER, + genre TEXT, + plex_id INTEGER + ) + ''') + conn.commit() + conn.close() + +def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: + with ThreadPoolExecutor() as executor: + results = list(executor.map(lambda track: _match_single_track(plex, track), tracks)) + plex_tracks = [result[0] for result in results if result[0]] + missing_tracks = [result[1] for result in results if result[1]] + return plex_tracks, missing_tracks def _match_single_track(plex, track, year=None, genre=None): plex_id = get_matched_song(track.title, track.artist, track.album) @@ -40,9 +47,9 @@ def _match_single_track(plex, track, year=None, genre=None): def similarity(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio() - def search_and_score(query, threshold): + def search_and_score(query, threshold, year_weight=0, genre_weight=0): try: - search = plex.search(query, mediatype="track", limit=10) + search = plex.search(query, mediatype="track", limit=20) # Increased limit except BadRequest: logging.info(f"Failed to search {query} on Plex") return None, 0 @@ -55,7 +62,7 @@ def search_and_score(query, threshold): score += similarity(s.title, track.title) * 0.4 score += similarity(s.artist().title, track.artist) * 0.3 score += similarity(s.album().title, track.album) * 0.2 - + # Check for version in parentheses if '(' in track.title and '(' in s.title: version_similarity = similarity( @@ -64,50 +71,63 @@ def search_and_score(query, threshold): ) score += version_similarity * 0.1 + # Year and Genre Matching (if available) + if year and s.year: + score += (s.year == year) * year_weight + if genre and s.genres: + genre_matches = any(similarity(g.tag, genre) > 0.8 for g in s.genres) + score += genre_matches * genre_weight + if score > best_score: best_score = score best_match = s return (best_match, best_score) if best_score >= threshold else (None, 0) - # Stage 1: Strict matching + # Stage 1: Strict matching (including year and genre if available) query = f"{track.title} {track.artist} {track.album}" - match, score = search_and_score(query, 0.8) + match, score = search_and_score(query, 0.85, year_weight=0.2, genre_weight=0.1) # Adjusted weights if match: insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None # Stage 2: Relax album requirement query = f"{track.title} {track.artist}" - match, score = search_and_score(query, 0.7) + match, score = search_and_score(query, 0.75) # Slightly higher threshold if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with relaxed album criteria. Score: {score}") insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 3: Further relaxation + # Stage 3: Further relaxation (partial title) words = track.title.split() if len(words) > 1: query = f"{' '.join(words[:2])} {track.artist}" - match, score = search_and_score(query, 0.4) + match, score = search_and_score(query, 0.6) # Increased threshold if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None + # Stage 4: Artist Only Match (for compilations, soundtracks, etc.) + query = f"{track.artist}" + match, score = search_and_score(query, 0.65) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with artist only. Score: {score}") + insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + return match, None + + # Stage 5: Title Only Match (last resort) + query = f"{track.title}" + match, score = search_and_score(query, 0.55) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with title only. Score: {score}") + insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + return match, None + logging.info(f"No match found for track {track.title} by {track.artist}.") return None, track - -def insert_matched_song(title, artist, album, plex_id): - conn = sqlite3.connect('plexist.db') - cursor = conn.cursor() - cursor.execute(''' - INSERT INTO plexist (title, artist, album, plex_id) - VALUES (?, ?, ?, ?) - ''', (title, artist, album, plex_id)) - conn.commit() - conn.close() - + def get_matched_song(title, artist, album): conn = sqlite3.connect('plexist.db') cursor = conn.cursor() @@ -119,30 +139,15 @@ def get_matched_song(title, artist, album): conn.close() return result[0] if result else None - -def _write_csv(tracks: List[Track], name: str, path: str = "/data") -> None: - data_folder = pathlib.Path(path) - data_folder.mkdir(parents=True, exist_ok=True) - file = data_folder / f"{name}.csv" - with open(file, "w", encoding="utf-8") as csvfile: - writer = csv.writer(csvfile) - writer.writerow(Track.__annotations__.keys()) - for track in tracks: - writer.writerow( - [track.title, track.artist, track.album, track.url] - ) - -def _delete_csv(name: str, path: str = "/data") -> None: - data_folder = pathlib.Path(path) - file = data_folder / f"{name}.csv" - file.unlink() - -def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: - with ThreadPoolExecutor() as executor: - results = list(executor.map(lambda track: _match_single_track(plex, track), tracks)) - plex_tracks = [result[0] for result in results if result[0]] - missing_tracks = [result[1] for result in results if result[1]] - return plex_tracks, missing_tracks +def insert_matched_song(title, artist, album, plex_id): + conn = sqlite3.connect('plexist.db') + cursor = conn.cursor() + cursor.execute(''' + INSERT INTO plexist (title, artist, album, plex_id) + VALUES (?, ?, ?, ?) + ''', (title, artist, album, plex_id)) + conn.commit() + conn.close() def _update_plex_playlist( plex: PlexServer, @@ -213,6 +218,23 @@ def update_or_create_plex_playlist( except Exception as e: logging.error("Failed to delete %s.csv: %s", playlist.name, str(e)) +def _write_csv(tracks: List[Track], name: str, path: str = "/data") -> None: + data_folder = pathlib.Path(path) + data_folder.mkdir(parents=True, exist_ok=True) + file = data_folder / f"{name}.csv" + with open(file, "w", encoding="utf-8") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(Track.__annotations__.keys()) + for track in tracks: + writer.writerow( + [track.title, track.artist, track.album, track.url] + ) + +def _delete_csv(name: str, path: str = "/data") -> None: + data_folder = pathlib.Path(path) + file = data_folder / f"{name}.csv" + file.unlink() + def end_session(): if 'conn' in locals() or 'conn' in globals(): conn.close() From 4975b247919a9549715507a8b384a1f530423b4e Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 18:58:20 +0200 Subject: [PATCH 09/18] Update requirements.txt to include tenacity library --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 74a88cf..86bcbfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ spotipy>=2.24 plexapi>=4.15.16 deezer-python>=7.0.0 urllib3>=2.2.2 +tenacity>=9.0.0 zipp>=3.20.1 # not directly required, pinned to avoid a vulnerability From ef5ef7da98a30285b61ae42562927963c71d54bf Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 19:04:40 +0200 Subject: [PATCH 10/18] - Added tenacity for implementing retry logic. - Implemented a caching mechanism using a global dictionary plex_tracks_cache to store all Plex tracks. - Added function fetch_all_plex_tracks that fetches all tracks from Plex and caches them, function uses @retry decorator to handle temporary network issues. - Modified _get_available_plex_tracks to use the cached tracks instead of performing multiple searches. - Updated _match_single_track to work with the cached tracks instead of performing Plex searches. - Added retry mechanisms to _update_plex_playlist and update_or_create_plex_playlist functions. - Added a clear_cache function to clear the cache when needed (e.g., between runs or when you want to force a refresh). --- plexist/modules/plex.py | 62 ++++++++++++++++++++++++++--------------- plexist/plexist.py | 19 +++++++------ 2 files changed, 49 insertions(+), 32 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 1248d2f..eed5a99 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -4,18 +4,21 @@ import pathlib import sys from difflib import SequenceMatcher -from typing import List +from typing import List, Dict from concurrent.futures import ThreadPoolExecutor import plexapi from plexapi.exceptions import BadRequest, NotFound from plexapi.server import PlexServer from .helperClasses import Playlist, Track, UserInputs -from concurrent.futures import ThreadPoolExecutor +from tenacity import retry, stop_after_attempt, wait_exponential logging.basicConfig(stream=sys.stdout, level=logging.INFO) DB_PATH = os.getenv('DB_PATH', 'plexist.db') +# Global cache for Plex tracks +plex_tracks_cache = {} + def initialize_db(): conn = sqlite3.connect('plexist.db') cursor = conn.cursor() @@ -32,38 +35,47 @@ def initialize_db(): conn.commit() conn.close() +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) +def fetch_all_plex_tracks(plex: PlexServer) -> Dict[str, plexapi.audio.Track]: + global plex_tracks_cache + if not plex_tracks_cache: + logging.info("Fetching all Plex tracks...") + all_tracks = plex.library.search(libtype="track") + plex_tracks_cache = {f"{track.title}|{track.artist().title}|{track.album().title}": track for track in all_tracks} + logging.info(f"Fetched {len(plex_tracks_cache)} tracks from Plex") + return plex_tracks_cache + def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: + plex_tracks = fetch_all_plex_tracks(plex) + + def match_track(track): + return _match_single_track(plex_tracks, track) + with ThreadPoolExecutor() as executor: - results = list(executor.map(lambda track: _match_single_track(plex, track), tracks)) + results = list(executor.map(match_track, tracks)) + plex_tracks = [result[0] for result in results if result[0]] missing_tracks = [result[1] for result in results if result[1]] return plex_tracks, missing_tracks -def _match_single_track(plex, track, year=None, genre=None): +def _match_single_track(plex_tracks: Dict[str, plexapi.audio.Track], track: Track, year=None, genre=None): plex_id = get_matched_song(track.title, track.artist, track.album) if plex_id: - return plex.fetchItem(plex_id), None + return plex_tracks.get(f"{track.title}|{track.artist}|{track.album}"), None def similarity(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio() def search_and_score(query, threshold, year_weight=0, genre_weight=0): - try: - search = plex.search(query, mediatype="track", limit=20) # Increased limit - except BadRequest: - logging.info(f"Failed to search {query} on Plex") - return None, 0 - best_match = None best_score = 0 - for s in search: + for key, s in plex_tracks.items(): score = 0 score += similarity(s.title, track.title) * 0.4 score += similarity(s.artist().title, track.artist) * 0.3 score += similarity(s.album().title, track.album) * 0.2 - # Check for version in parentheses if '(' in track.title and '(' in s.title: version_similarity = similarity( track.title.split('(')[1].split(')')[0], @@ -71,7 +83,6 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): ) score += version_similarity * 0.1 - # Year and Genre Matching (if available) if year and s.year: score += (s.year == year) * year_weight if genre and s.genres: @@ -84,16 +95,16 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): return (best_match, best_score) if best_score >= threshold else (None, 0) - # Stage 1: Strict matching (including year and genre if available) + # Stage 1: Strict matching query = f"{track.title} {track.artist} {track.album}" - match, score = search_and_score(query, 0.85, year_weight=0.2, genre_weight=0.1) # Adjusted weights + match, score = search_and_score(query, 0.85, year_weight=0.2, genre_weight=0.1) if match: insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None # Stage 2: Relax album requirement query = f"{track.title} {track.artist}" - match, score = search_and_score(query, 0.75) # Slightly higher threshold + match, score = search_and_score(query, 0.75) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with relaxed album criteria. Score: {score}") insert_matched_song(track.title, track.artist, track.album, match.ratingKey) @@ -103,13 +114,13 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): words = track.title.split() if len(words) > 1: query = f"{' '.join(words[:2])} {track.artist}" - match, score = search_and_score(query, 0.6) # Increased threshold + match, score = search_and_score(query, 0.6) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 4: Artist Only Match (for compilations, soundtracks, etc.) + # Stage 4: Artist Only Match query = f"{track.artist}" match, score = search_and_score(query, 0.65) if match: @@ -117,7 +128,7 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 5: Title Only Match (last resort) + # Stage 5: Title Only Match query = f"{track.title}" match, score = search_and_score(query, 0.55) if match: @@ -143,12 +154,13 @@ def insert_matched_song(title, artist, album, plex_id): conn = sqlite3.connect('plexist.db') cursor = conn.cursor() cursor.execute(''' - INSERT INTO plexist (title, artist, album, plex_id) + INSERT OR REPLACE INTO plexist (title, artist, album, plex_id) VALUES (?, ?, ?, ?) ''', (title, artist, album, plex_id)) conn.commit() conn.close() +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def _update_plex_playlist( plex: PlexServer, available_tracks: List, @@ -161,14 +173,14 @@ def _update_plex_playlist( plex_playlist.addItems(available_tracks) return plex_playlist - +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def update_or_create_plex_playlist( plex: PlexServer, playlist: Playlist, tracks: List[Track], userInputs: UserInputs, ) -> None: - if not tracks: # Changed from 'is None' to handle empty lists as well + if not tracks: logging.error("No tracks provided for playlist %s", playlist.name) return @@ -238,3 +250,7 @@ def _delete_csv(name: str, path: str = "/data") -> None: def end_session(): if 'conn' in locals() or 'conn' in globals(): conn.close() + +def clear_cache(): + global plex_tracks_cache + plex_tracks_cache.clear() \ No newline at end of file diff --git a/plexist/plexist.py b/plexist/plexist.py index 3bda830..07993ee 100644 --- a/plexist/plexist.py +++ b/plexist/plexist.py @@ -3,17 +3,17 @@ import logging import os import time - import deezer import spotipy from plexapi.server import PlexServer from spotipy.oauth2 import SpotifyClientCredentials - from modules.deezer import deezer_playlist_sync from modules.helperClasses import UserInputs from modules.spotify import spotify_playlist_sync -from modules.plex import initialize_db # Importing the database initialization function +from modules.plex import initialize_db, clear_cache # Import clear_cache function +from tenacity import retry, stop_after_attempt, wait_exponential +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def read_environment_variables(): return UserInputs( @@ -31,19 +31,19 @@ def read_environment_variables(): deezer_playlist_ids=os.getenv("DEEZER_PLAYLIST_ID"), ) - +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def initialize_plex_server(user_inputs): if user_inputs.plex_url and user_inputs.plex_token: try: return PlexServer(user_inputs.plex_url, user_inputs.plex_token) except Exception as e: logging.error(f"Plex Authorization error: {e}") - return None + raise # Re-raise the exception to trigger retry else: logging.error("Missing Plex Authorization Variables") return None - +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def initialize_spotify_client(user_inputs): if ( user_inputs.spotipy_client_id @@ -59,12 +59,11 @@ def initialize_spotify_client(user_inputs): ) except Exception as e: logging.error(f"Spotify Authorization error: {e}") - return None + raise # Re-raise the exception to trigger retry else: logging.error("Missing one or more Spotify Authorization Variables") return None - def main(): initialize_db() # Initialize the database at the start of the main function @@ -76,6 +75,9 @@ def main(): while True: logging.info("Starting playlist sync") + + # Clear the cache at the beginning of each run + clear_cache() # Spotify sync logging.info("Starting Spotify playlist sync") @@ -97,6 +99,5 @@ def main(): time.sleep(user_inputs.wait_seconds) - if __name__ == "__main__": main() \ No newline at end of file From 8f0ee6b554329ff9c52141651663433b2dd4a265 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 21:09:41 +0200 Subject: [PATCH 11/18] chore: Refactor plex.py to improve caching --- plexist/modules/plex.py | 73 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index eed5a99..fa8a534 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -11,6 +11,8 @@ from plexapi.server import PlexServer from .helperClasses import Playlist, Track, UserInputs from tenacity import retry, stop_after_attempt, wait_exponential +import threading +import time logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -18,6 +20,8 @@ # Global cache for Plex tracks plex_tracks_cache = {} +cache_lock = threading.Lock() +cache_building = False def initialize_db(): conn = sqlite3.connect('plexist.db') @@ -36,14 +40,63 @@ def initialize_db(): conn.close() @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) -def fetch_all_plex_tracks(plex: PlexServer) -> Dict[str, plexapi.audio.Track]: - global plex_tracks_cache - if not plex_tracks_cache: - logging.info("Fetching all Plex tracks...") - all_tracks = plex.library.search(libtype="track") - plex_tracks_cache = {f"{track.title}|{track.artist().title}|{track.album().title}": track for track in all_tracks} - logging.info(f"Fetched {len(plex_tracks_cache)} tracks from Plex") - return plex_tracks_cache +def fetch_plex_tracks(plex: PlexServer, offset: int = 0, limit: int = 100) -> List[plexapi.audio.Track]: + return plex.library.search(libtype="track", container_start=offset, container_size=limit) + +def update_cache_in_background(plex: PlexServer): + global cache_building + if cache_building: + return + + cache_building = True + threading.Thread(target=_update_cache, args=(plex,), daemon=True).start() + +def _update_cache(plex: PlexServer): + global cache_building + offset = 0 + limit = 100 + total_tracks = 0 + last_update_time = get_last_update_time() + + while True: + try: + tracks = fetch_plex_tracks(plex, offset, limit) + if not tracks: + break + + with cache_lock: + for track in tracks: + if track.addedAt > last_update_time or track.updatedAt > last_update_time: + key = f"{track.title}|{track.artist().title}|{track.album().title}" + plex_tracks_cache[key] = track + _update_db_cache(track) + total_tracks += 1 + + offset += limit + logging.info(f"Updated {total_tracks} tracks in cache so far...") + time.sleep(1) # Add a small delay to avoid hitting rate limits + except Exception as e: + logging.error(f"Error while updating cache: {str(e)}") + break + + set_last_update_time() + cache_building = False + logging.info(f"Finished updating cache. Total tracks updated: {total_tracks}") + +def get_last_update_time(): + conn = sqlite3.connect('plexist.db') + cursor = conn.cursor() + cursor.execute('SELECT value FROM metadata WHERE key = "last_update_time"') + result = cursor.fetchone() + conn.close() + return float(result[0]) if result else 0 + +def set_last_update_time(): + conn = sqlite3.connect('plexist.db') + cursor = conn.cursor() + cursor.execute('INSERT OR REPLACE INTO metadata (key, value) VALUES ("last_update_time", ?)', (time.time(),)) + conn.commit() + conn.close() def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: plex_tracks = fetch_all_plex_tracks(plex) @@ -251,6 +304,10 @@ def end_session(): if 'conn' in locals() or 'conn' in globals(): conn.close() +def initialize_cache(plex: PlexServer): + load_cache_from_db() + update_cache_in_background(plex) + def clear_cache(): global plex_tracks_cache plex_tracks_cache.clear() \ No newline at end of file From 103039a4bd75cd9e6c796b38aed1edb84d702976 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 21:12:02 +0200 Subject: [PATCH 12/18] chore: Refactor plexist.py to persist cache --- plexist/plexist.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/plexist/plexist.py b/plexist/plexist.py index 07993ee..9dc1969 100644 --- a/plexist/plexist.py +++ b/plexist/plexist.py @@ -10,7 +10,7 @@ from modules.deezer import deezer_playlist_sync from modules.helperClasses import UserInputs from modules.spotify import spotify_playlist_sync -from modules.plex import initialize_db, clear_cache # Import clear_cache function +from modules.plex import initialize_db, initialize_cache from tenacity import retry, stop_after_attempt, wait_exponential logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -65,19 +65,21 @@ def initialize_spotify_client(user_inputs): return None def main(): - initialize_db() # Initialize the database at the start of the main function - + initialize_db() user_inputs = read_environment_variables() plex = initialize_plex_server(user_inputs) if plex is None: return + # Initialize the cache + initialize_cache(plex) + while True: logging.info("Starting playlist sync") - # Clear the cache at the beginning of each run - clear_cache() + # Update the cache + initialize_cache(plex) # Spotify sync logging.info("Starting Spotify playlist sync") From 22801efed8f63f14ba7eeaf9f36210404fe98a99 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 21:20:22 +0200 Subject: [PATCH 13/18] chore: Refactor plex.py to fic init cache --- plexist/modules/plex.py | 79 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index fa8a534..8faf1a9 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -24,7 +24,7 @@ cache_building = False def initialize_db(): - conn = sqlite3.connect('plexist.db') + conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS plexist ( @@ -36,6 +36,23 @@ def initialize_db(): plex_id INTEGER ) ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS plex_cache ( + key TEXT PRIMARY KEY, + title TEXT, + artist TEXT, + album TEXT, + year INTEGER, + genre TEXT, + plex_id INTEGER + ) + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS metadata ( + key TEXT PRIMARY KEY, + value TEXT + ) + ''') conn.commit() conn.close() @@ -84,7 +101,7 @@ def _update_cache(plex: PlexServer): logging.info(f"Finished updating cache. Total tracks updated: {total_tracks}") def get_last_update_time(): - conn = sqlite3.connect('plexist.db') + conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute('SELECT value FROM metadata WHERE key = "last_update_time"') result = cursor.fetchone() @@ -92,12 +109,53 @@ def get_last_update_time(): return float(result[0]) if result else 0 def set_last_update_time(): - conn = sqlite3.connect('plexist.db') + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('INSERT OR REPLACE INTO metadata (key, value) VALUES ("last_update_time", ?)', (str(time.time()),)) + conn.commit() + conn.close() + +def _update_db_cache(track): + conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - cursor.execute('INSERT OR REPLACE INTO metadata (key, value) VALUES ("last_update_time", ?)', (time.time(),)) + cursor.execute(''' + INSERT OR REPLACE INTO plex_cache (key, title, artist, album, year, genre, plex_id) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', ( + f"{track.title}|{track.artist().title}|{track.album().title}", + track.title, + track.artist().title, + track.album().title, + track.year, + ','.join(g.tag for g in track.genres) if track.genres else '', + track.ratingKey + )) conn.commit() conn.close() +def load_cache_from_db(): + global plex_tracks_cache + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('SELECT key, title, artist, album, year, genre, plex_id FROM plex_cache') + rows = cursor.fetchall() + conn.close() + + with cache_lock: + plex_tracks_cache.clear() + for row in rows: + key, title, artist, album, year, genre, plex_id = row + plex_tracks_cache[key] = plexapi.audio.Track(None, { + 'title': title, + 'parentTitle': artist, + 'grandparentTitle': album, + 'year': year, + 'genre': [{'tag': g} for g in genre.split(',')] if genre else [], + 'ratingKey': plex_id + }) + + logging.info(f"Loaded {len(plex_tracks_cache)} tracks from the database cache") + def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: plex_tracks = fetch_all_plex_tracks(plex) @@ -307,7 +365,16 @@ def end_session(): def initialize_cache(plex: PlexServer): load_cache_from_db() update_cache_in_background(plex) - + def clear_cache(): global plex_tracks_cache - plex_tracks_cache.clear() \ No newline at end of file + with cache_lock: + plex_tracks_cache.clear() + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('DELETE FROM plex_cache') + conn.commit() + conn.close() + + logging.info("Cache cleared") \ No newline at end of file From 61b28ca1a35415ec703360a9c2d5d4310fe815c5 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 21:26:37 +0200 Subject: [PATCH 14/18] chore: Refactor plex.py to fix cache initialization --- plexist/modules/plex.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 8faf1a9..8d1b3dd 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -60,6 +60,15 @@ def initialize_db(): def fetch_plex_tracks(plex: PlexServer, offset: int = 0, limit: int = 100) -> List[plexapi.audio.Track]: return plex.library.search(libtype="track", container_start=offset, container_size=limit) +def fetch_all_plex_tracks(plex: PlexServer) -> Dict[str, plexapi.audio.Track]: + global plex_tracks_cache + if not plex_tracks_cache: + load_cache_from_db() + if not plex_tracks_cache: + logging.info("Cache is empty. Fetching all Plex tracks...") + update_cache_in_background(plex) + return plex_tracks_cache + def update_cache_in_background(plex: PlexServer): global cache_building if cache_building: @@ -69,7 +78,7 @@ def update_cache_in_background(plex: PlexServer): threading.Thread(target=_update_cache, args=(plex,), daemon=True).start() def _update_cache(plex: PlexServer): - global cache_building + global cache_building, plex_tracks_cache offset = 0 limit = 100 total_tracks = 0 @@ -84,7 +93,7 @@ def _update_cache(plex: PlexServer): with cache_lock: for track in tracks: if track.addedAt > last_update_time or track.updatedAt > last_update_time: - key = f"{track.title}|{track.artist().title}|{track.album().title}" + key = f"{track.title}|{track.parentTitle}|{track.grandparentTitle}" plex_tracks_cache[key] = track _update_db_cache(track) total_tracks += 1 @@ -122,10 +131,10 @@ def _update_db_cache(track): INSERT OR REPLACE INTO plex_cache (key, title, artist, album, year, genre, plex_id) VALUES (?, ?, ?, ?, ?, ?, ?) ''', ( - f"{track.title}|{track.artist().title}|{track.album().title}", + f"{track.title}|{track.parentTitle}|{track.grandparentTitle}", track.title, - track.artist().title, - track.album().title, + track.parentTitle, + track.grandparentTitle, track.year, ','.join(g.tag for g in track.genres) if track.genres else '', track.ratingKey From 86f0ec25cb2279f975c46985863165c231c61cd1 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 21:51:03 +0200 Subject: [PATCH 15/18] Refactor plex.py to fix cache initialization revx1 then back --- plexist/modules/plex.py | 172 +++++++++++++++------------------------- 1 file changed, 62 insertions(+), 110 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 8d1b3dd..dbd0e3a 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -12,7 +12,7 @@ from .helperClasses import Playlist, Track, UserInputs from tenacity import retry, stop_after_attempt, wait_exponential import threading -import time +#import time logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -47,12 +47,6 @@ def initialize_db(): plex_id INTEGER ) ''') - cursor.execute(''' - CREATE TABLE IF NOT EXISTS metadata ( - key TEXT PRIMARY KEY, - value TEXT - ) - ''') conn.commit() conn.close() @@ -66,81 +60,33 @@ def fetch_all_plex_tracks(plex: PlexServer) -> Dict[str, plexapi.audio.Track]: load_cache_from_db() if not plex_tracks_cache: logging.info("Cache is empty. Fetching all Plex tracks...") - update_cache_in_background(plex) - return plex_tracks_cache - -def update_cache_in_background(plex: PlexServer): - global cache_building - if cache_building: - return - - cache_building = True - threading.Thread(target=_update_cache, args=(plex,), daemon=True).start() - -def _update_cache(plex: PlexServer): - global cache_building, plex_tracks_cache - offset = 0 - limit = 100 - total_tracks = 0 - last_update_time = get_last_update_time() - - while True: - try: + offset = 0 + limit = 100 + while True: tracks = fetch_plex_tracks(plex, offset, limit) if not tracks: break - - with cache_lock: - for track in tracks: - if track.addedAt > last_update_time or track.updatedAt > last_update_time: - key = f"{track.title}|{track.parentTitle}|{track.grandparentTitle}" - plex_tracks_cache[key] = track - _update_db_cache(track) - total_tracks += 1 - + for track in tracks: + key = f"{track.title}|{track.artist().title}|{track.album().title}" + plex_tracks_cache[key] = track offset += limit - logging.info(f"Updated {total_tracks} tracks in cache so far...") - time.sleep(1) # Add a small delay to avoid hitting rate limits - except Exception as e: - logging.error(f"Error while updating cache: {str(e)}") - break - - set_last_update_time() - cache_building = False - logging.info(f"Finished updating cache. Total tracks updated: {total_tracks}") - -def get_last_update_time(): - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - cursor.execute('SELECT value FROM metadata WHERE key = "last_update_time"') - result = cursor.fetchone() - conn.close() - return float(result[0]) if result else 0 - -def set_last_update_time(): - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - cursor.execute('INSERT OR REPLACE INTO metadata (key, value) VALUES ("last_update_time", ?)', (str(time.time()),)) - conn.commit() - conn.close() + logging.info(f"Fetched {len(plex_tracks_cache)} tracks so far...") + logging.info(f"Finished fetching all tracks. Total tracks: {len(plex_tracks_cache)}") + _update_db_cache_bulk(plex_tracks_cache) + return plex_tracks_cache -def _update_db_cache(track): +def _update_db_cache_bulk(tracks_cache): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - cursor.execute(''' + cursor.executemany(''' INSERT OR REPLACE INTO plex_cache (key, title, artist, album, year, genre, plex_id) VALUES (?, ?, ?, ?, ?, ?, ?) - ''', ( - f"{track.title}|{track.parentTitle}|{track.grandparentTitle}", - track.title, - track.parentTitle, - track.grandparentTitle, - track.year, - ','.join(g.tag for g in track.genres) if track.genres else '', - track.ratingKey - )) + ''', [(key, track.title, track.artist().title, track.album().title, track.year, + ','.join(g.tag for g in track.genres) if track.genres else '', track.ratingKey) + for key, track in tracks_cache.items()]) conn.commit() conn.close() + logging.info(f"Bulk updated {len(tracks_cache)} tracks in the database cache") def load_cache_from_db(): global plex_tracks_cache @@ -151,17 +97,16 @@ def load_cache_from_db(): conn.close() with cache_lock: - plex_tracks_cache.clear() - for row in rows: - key, title, artist, album, year, genre, plex_id = row - plex_tracks_cache[key] = plexapi.audio.Track(None, { - 'title': title, - 'parentTitle': artist, - 'grandparentTitle': album, - 'year': year, - 'genre': [{'tag': g} for g in genre.split(',')] if genre else [], - 'ratingKey': plex_id - }) + plex_tracks_cache = { + row[0]: plexapi.audio.Track(None, { + 'title': row[1], + 'parentTitle': row[2], + 'grandparentTitle': row[3], + 'year': row[4], + 'genre': [{'tag': g} for g in row[5].split(',')] if row[5] else [], + 'ratingKey': row[6] + }) for row in rows + } logging.info(f"Loaded {len(plex_tracks_cache)} tracks from the database cache") @@ -169,7 +114,7 @@ def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: plex_tracks = fetch_all_plex_tracks(plex) def match_track(track): - return _match_single_track(plex_tracks, track) + return _match_single_track(plex, plex_tracks, track) with ThreadPoolExecutor() as executor: results = list(executor.map(match_track, tracks)) @@ -178,24 +123,27 @@ def match_track(track): missing_tracks = [result[1] for result in results if result[1]] return plex_tracks, missing_tracks -def _match_single_track(plex_tracks: Dict[str, plexapi.audio.Track], track: Track, year=None, genre=None): - plex_id = get_matched_song(track.title, track.artist, track.album) - if plex_id: - return plex_tracks.get(f"{track.title}|{track.artist}|{track.album}"), None - +def _match_single_track(plex: PlexServer, plex_tracks: Dict[str, plexapi.audio.Track], track: Track): def similarity(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio() - def search_and_score(query, threshold, year_weight=0, genre_weight=0): + def search_and_score(query, threshold): + try: + search = plex.search(query, mediatype="track", limit=20) + except BadRequest: + logging.info(f"Failed to search {query} on Plex") + return None, 0 + best_match = None best_score = 0 - for key, s in plex_tracks.items(): + for s in search: score = 0 score += similarity(s.title, track.title) * 0.4 score += similarity(s.artist().title, track.artist) * 0.3 score += similarity(s.album().title, track.album) * 0.2 + # Check for version in parentheses if '(' in track.title and '(' in s.title: version_similarity = similarity( track.title.split('(')[1].split(')')[0], @@ -203,11 +151,12 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): ) score += version_similarity * 0.1 - if year and s.year: - score += (s.year == year) * year_weight - if genre and s.genres: - genre_matches = any(similarity(g.tag, genre) > 0.8 for g in s.genres) - score += genre_matches * genre_weight + # Year and Genre Matching (if available) + if track.year and s.year: + score += (int(track.year) == s.year) * 0.1 + if track.genre and s.genres: + genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) + score += genre_matches * 0.1 if score > best_score: best_score = score @@ -215,52 +164,54 @@ def search_and_score(query, threshold, year_weight=0, genre_weight=0): return (best_match, best_score) if best_score >= threshold else (None, 0) - # Stage 1: Strict matching + # Stage 1: Exact match from cache + key = f"{track.title}|{track.artist}|{track.album}" + if key in plex_tracks: + logging.info(f"Exact match found in cache for '{track.title}' by '{track.artist}'") + return plex_tracks[key], None + + # Stage 2: Strict matching query = f"{track.title} {track.artist} {track.album}" - match, score = search_and_score(query, 0.85, year_weight=0.2, genre_weight=0.1) + match, score = search_and_score(query, 0.85) if match: - insert_matched_song(track.title, track.artist, track.album, match.ratingKey) + logging.info(f"Strict match found for '{track.title}' by '{track.artist}'. Score: {score}") return match, None - # Stage 2: Relax album requirement + # Stage 3: Relax album requirement query = f"{track.title} {track.artist}" match, score = search_and_score(query, 0.75) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with relaxed album criteria. Score: {score}") - insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 3: Further relaxation (partial title) + # Stage 4: Further relaxation (partial title) words = track.title.split() if len(words) > 1: query = f"{' '.join(words[:2])} {track.artist}" match, score = search_and_score(query, 0.6) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") - insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 4: Artist Only Match + # Stage 5: Artist Only Match query = f"{track.artist}" match, score = search_and_score(query, 0.65) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with artist only. Score: {score}") - insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None - # Stage 5: Title Only Match + # Stage 6: Title Only Match query = f"{track.title}" - match, score = search_and_score(query, 0.55) + match, score = search_and_score(query, 0.55) if match: logging.info(f"Matched '{track.title}' by '{track.artist}' with title only. Score: {score}") - insert_matched_song(track.title, track.artist, track.album, match.ratingKey) return match, None logging.info(f"No match found for track {track.title} by {track.artist}.") return None, track def get_matched_song(title, artist, album): - conn = sqlite3.connect('plexist.db') + conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' SELECT plex_id FROM plexist @@ -271,7 +222,7 @@ def get_matched_song(title, artist, album): return result[0] if result else None def insert_matched_song(title, artist, album, plex_id): - conn = sqlite3.connect('plexist.db') + conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' INSERT OR REPLACE INTO plexist (title, artist, album, plex_id) @@ -373,7 +324,8 @@ def end_session(): def initialize_cache(plex: PlexServer): load_cache_from_db() - update_cache_in_background(plex) + if not plex_tracks_cache: + fetch_all_plex_tracks(plex) def clear_cache(): global plex_tracks_cache From 7de8ca519041e411120a07df37d6fab8e40c100b Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 22:02:03 +0200 Subject: [PATCH 16/18] Refactor plex.py to fix cache initialization --- plexist/modules/plex.py | 148 +++++++++++++++++++++++----------------- plexist/plexist.py | 2 +- 2 files changed, 88 insertions(+), 62 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index dbd0e3a..38c22bf 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -12,7 +12,7 @@ from .helperClasses import Playlist, Track, UserInputs from tenacity import retry, stop_after_attempt, wait_exponential import threading -#import time +import time logging.basicConfig(stream=sys.stdout, level=logging.INFO) @@ -54,26 +54,34 @@ def initialize_db(): def fetch_plex_tracks(plex: PlexServer, offset: int = 0, limit: int = 100) -> List[plexapi.audio.Track]: return plex.library.search(libtype="track", container_start=offset, container_size=limit) -def fetch_all_plex_tracks(plex: PlexServer) -> Dict[str, plexapi.audio.Track]: - global plex_tracks_cache - if not plex_tracks_cache: - load_cache_from_db() - if not plex_tracks_cache: - logging.info("Cache is empty. Fetching all Plex tracks...") - offset = 0 - limit = 100 +def fetch_and_cache_tracks(plex: PlexServer): + global plex_tracks_cache, cache_building + if cache_building: + return + + cache_building = True + offset = 0 + limit = 100 + + def background_fetch(): + nonlocal offset while True: tracks = fetch_plex_tracks(plex, offset, limit) if not tracks: break - for track in tracks: - key = f"{track.title}|{track.artist().title}|{track.album().title}" - plex_tracks_cache[key] = track + with cache_lock: + for track in tracks: + key = f"{track.title}|{track.artist().title}|{track.album().title}" + plex_tracks_cache[key] = track offset += limit - logging.info(f"Fetched {len(plex_tracks_cache)} tracks so far...") - logging.info(f"Finished fetching all tracks. Total tracks: {len(plex_tracks_cache)}") - _update_db_cache_bulk(plex_tracks_cache) - return plex_tracks_cache + _update_db_cache_bulk(dict(list(plex_tracks_cache.items())[-len(tracks):])) + logging.info(f"Fetched and cached {len(plex_tracks_cache)} tracks so far...") + + global cache_building + cache_building = False + logging.info(f"Finished fetching all tracks. Total tracks in cache: {len(plex_tracks_cache)}") + + threading.Thread(target=background_fetch, daemon=True).start() def _update_db_cache_bulk(tracks_cache): conn = sqlite3.connect(DB_PATH) @@ -86,7 +94,6 @@ def _update_db_cache_bulk(tracks_cache): for key, track in tracks_cache.items()]) conn.commit() conn.close() - logging.info(f"Bulk updated {len(tracks_cache)} tracks in the database cache") def load_cache_from_db(): global plex_tracks_cache @@ -111,10 +118,8 @@ def load_cache_from_db(): logging.info(f"Loaded {len(plex_tracks_cache)} tracks from the database cache") def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: - plex_tracks = fetch_all_plex_tracks(plex) - def match_track(track): - return _match_single_track(plex, plex_tracks, track) + return _match_single_track(plex, track) with ThreadPoolExecutor() as executor: results = list(executor.map(match_track, tracks)) @@ -123,52 +128,76 @@ def match_track(track): missing_tracks = [result[1] for result in results if result[1]] return plex_tracks, missing_tracks -def _match_single_track(plex: PlexServer, plex_tracks: Dict[str, plexapi.audio.Track], track: Track): +def _match_single_track(plex: PlexServer, track: Track): def similarity(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio() def search_and_score(query, threshold): - try: - search = plex.search(query, mediatype="track", limit=20) - except BadRequest: - logging.info(f"Failed to search {query} on Plex") - return None, 0 - best_match = None best_score = 0 - for s in search: - score = 0 - score += similarity(s.title, track.title) * 0.4 - score += similarity(s.artist().title, track.artist) * 0.3 - score += similarity(s.album().title, track.album) * 0.2 - - # Check for version in parentheses - if '(' in track.title and '(' in s.title: - version_similarity = similarity( - track.title.split('(')[1].split(')')[0], - s.title.split('(')[1].split(')')[0] - ) - score += version_similarity * 0.1 - - # Year and Genre Matching (if available) - if track.year and s.year: - score += (int(track.year) == s.year) * 0.1 - if track.genre and s.genres: - genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) - score += genre_matches * 0.1 - - if score > best_score: - best_score = score - best_match = s + # First, search in the cache + with cache_lock: + for key, s in plex_tracks_cache.items(): + score = 0 + score += similarity(s.title, track.title) * 0.4 + score += similarity(s.artist().title, track.artist) * 0.3 + score += similarity(s.album().title, track.album) * 0.2 + + if '(' in track.title and '(' in s.title: + version_similarity = similarity( + track.title.split('(')[1].split(')')[0], + s.title.split('(')[1].split(')')[0] + ) + score += version_similarity * 0.1 + + if track.year and s.year: + score += (int(track.year) == s.year) * 0.1 + if track.genre and s.genres: + genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) + score += genre_matches * 0.1 + + if score > best_score: + best_score = score + best_match = s + + # If no good match in cache, search Plex directly + if best_score < threshold: + try: + search = plex.search(query, mediatype="track", limit=20) + for s in search: + score = 0 + score += similarity(s.title, track.title) * 0.4 + score += similarity(s.artist().title, track.artist) * 0.3 + score += similarity(s.album().title, track.album) * 0.2 + + if '(' in track.title and '(' in s.title: + version_similarity = similarity( + track.title.split('(')[1].split(')')[0], + s.title.split('(')[1].split(')')[0] + ) + score += version_similarity * 0.1 + + if track.year and s.year: + score += (int(track.year) == s.year) * 0.1 + if track.genre and s.genres: + genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) + score += genre_matches * 0.1 + + if score > best_score: + best_score = score + best_match = s + except BadRequest: + logging.info(f"Failed to search {query} on Plex") return (best_match, best_score) if best_score >= threshold else (None, 0) # Stage 1: Exact match from cache key = f"{track.title}|{track.artist}|{track.album}" - if key in plex_tracks: - logging.info(f"Exact match found in cache for '{track.title}' by '{track.artist}'") - return plex_tracks[key], None + with cache_lock: + if key in plex_tracks_cache: + logging.info(f"Exact match found in cache for '{track.title}' by '{track.artist}'") + return plex_tracks_cache[key], None # Stage 2: Strict matching query = f"{track.title} {track.artist} {track.album}" @@ -177,13 +206,6 @@ def search_and_score(query, threshold): logging.info(f"Strict match found for '{track.title}' by '{track.artist}'. Score: {score}") return match, None - # Stage 3: Relax album requirement - query = f"{track.title} {track.artist}" - match, score = search_and_score(query, 0.75) - if match: - logging.info(f"Matched '{track.title}' by '{track.artist}' with relaxed album criteria. Score: {score}") - return match, None - # Stage 4: Further relaxation (partial title) words = track.title.split() if len(words) > 1: @@ -210,6 +232,10 @@ def search_and_score(query, threshold): logging.info(f"No match found for track {track.title} by {track.artist}.") return None, track +def initialize_cache(plex: PlexServer): + load_cache_from_db() + fetch_and_cache_tracks(plex) + def get_matched_song(title, artist, album): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() diff --git a/plexist/plexist.py b/plexist/plexist.py index 9dc1969..48720b4 100644 --- a/plexist/plexist.py +++ b/plexist/plexist.py @@ -79,7 +79,7 @@ def main(): logging.info("Starting playlist sync") # Update the cache - initialize_cache(plex) + #initialize_cache(plex) # Spotify sync logging.info("Starting Spotify playlist sync") From 90b065f9d5a8b10a70dbeaa4c645a16306199331 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 22:08:53 +0200 Subject: [PATCH 17/18] Refactor plex.py to fix cache initialization --- plexist/modules/plex.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 38c22bf..54667e8 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -234,8 +234,10 @@ def search_and_score(query, threshold): def initialize_cache(plex: PlexServer): load_cache_from_db() - fetch_and_cache_tracks(plex) - + load_cache_from_db() + if not plex_tracks_cache: + fetch_and_cache_tracks(plex) + def get_matched_song(title, artist, album): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() @@ -348,11 +350,6 @@ def end_session(): if 'conn' in locals() or 'conn' in globals(): conn.close() -def initialize_cache(plex: PlexServer): - load_cache_from_db() - if not plex_tracks_cache: - fetch_all_plex_tracks(plex) - def clear_cache(): global plex_tracks_cache with cache_lock: From aa079a706473067dbbda7a759da14f242dd879e3 Mon Sep 17 00:00:00 2001 From: "Gyarbij (Chono N)" <49493993+Gyarbij@users.noreply.github.com> Date: Sat, 31 Aug 2024 22:09:35 +0200 Subject: [PATCH 18/18] Refactor plex.py to fix cache initialization fix dupe fetch --- plexist/modules/plex.py | 1 - 1 file changed, 1 deletion(-) diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index 54667e8..6d3f311 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -233,7 +233,6 @@ def search_and_score(query, threshold): return None, track def initialize_cache(plex: PlexServer): - load_cache_from_db() load_cache_from_db() if not plex_tracks_cache: fetch_and_cache_tracks(plex)