diff --git a/README.md b/README.md index e2ea232..4b6fb41 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ python3 plexist.py ## Docker Deployment -You can run the image via docker run or docker-compose, choice is yours. Multi-Platform mages are available on [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/). +You can run the image via docker run or docker compose, choice is yours. Multi-Platform mages are available on [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/). Configure the parameters as required. Plex URL and TOKEN are mandatory and the options for your respective streaming service. @@ -83,7 +83,7 @@ docker run -d \ docker-compose.yml should be configured per the below, if you don't user Spotify you can remove the Spotify variables and vice versa for Deezer. -A template is Here: [docker-compose.yml](https://github.com/gyarbij/plexist/blob/main/assets/docker-compose.yml) +A template is Here: [docker-compose.yml](https://github.com/gyarbij/plexist/blob/main/assets/compose.yaml) ``` version: '3.8' diff --git a/SECURITY.md b/SECURITY.md index bc1de66..5d1d043 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,11 +2,15 @@ ## Supported Versions -The only version supported is the current tag:latest build located at [Docker Hub](https://hub.docker.com/r/gyarbij/wireui/tags) +The only versions supported are tags: +- latest +- dev + +located at [Docker Hub](https://hub.docker.com/r/gyarbij/plexist/tags) ## Reporting a Vulnerability To report a vulnerability, email gyarbij@pm.me -If it's something you can fix, please feel free to open a pull request. +If it's something you can fix, please feel free to open a pull request. \ No newline at end of file diff --git a/assets/docker-compose.yml b/assets/compose.yaml similarity index 93% rename from assets/docker-compose.yml rename to assets/compose.yaml index 3a45305..395d9c4 100644 --- a/assets/docker-compose.yml +++ b/assets/compose.yaml @@ -1,4 +1,3 @@ -version: '3.9' services: plexist: container_name: plexist @@ -16,4 +15,6 @@ services: - SPOTIFY_USER_ID= - DEEZER_USER_ID= - DEEZER_PLAYLIST_ID=https://www.deezer.com/en/playlist/10484834882 + volumes: + - :/app/data restart: unless-stopped \ No newline at end of file diff --git a/plexist/modules/plex.py b/plexist/modules/plex.py index f38e5ba..6d3f311 100644 --- a/plexist/modules/plex.py +++ b/plexist/modules/plex.py @@ -1,119 +1,264 @@ +import os import sqlite3 import logging import pathlib import sys from difflib import SequenceMatcher -from typing import List +from typing import List, Dict from concurrent.futures import ThreadPoolExecutor import plexapi from plexapi.exceptions import BadRequest, NotFound from plexapi.server import PlexServer from .helperClasses import Playlist, Track, UserInputs +from tenacity import retry, stop_after_attempt, wait_exponential +import threading +import time logging.basicConfig(stream=sys.stdout, level=logging.INFO) -conn = sqlite3.connect('plexist.db') +DB_PATH = os.getenv('DB_PATH', 'plexist.db') -def initialize_db(): - conn = sqlite3.connect('plexist.db') - cursor = conn.cursor() - cursor.execute(''' - CREATE TABLE IF NOT EXISTS plexist ( - title TEXT, - artist TEXT, - album TEXT, - year INTEGER, - genre TEXT, - plex_id INTEGER - ) - ''') - conn.commit() - conn.close() - -def insert_matched_song(title, artist, album, plex_id): - conn = sqlite3.connect('plexist.db') - cursor = conn.cursor() - cursor.execute(''' - INSERT INTO plexist (title, artist, album, plex_id) - VALUES (?, ?, ?, ?) - ''', (title, artist, album, plex_id)) - conn.commit() - conn.close() - -def get_matched_song(title, artist, album): - conn = sqlite3.connect('plexist.db') - cursor = conn.cursor() - cursor.execute(''' - SELECT plex_id FROM plexist - WHERE title = ? AND artist = ? AND album = ? - ''', (title, artist, album)) - result = cursor.fetchone() - conn.close() - return result[0] if result else None +# Global cache for Plex tracks +plex_tracks_cache = {} +cache_lock = threading.Lock() +cache_building = False +def initialize_db(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS plexist ( + title TEXT, + artist TEXT, + album TEXT, + year INTEGER, + genre TEXT, + plex_id INTEGER + ) + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS plex_cache ( + key TEXT PRIMARY KEY, + title TEXT, + artist TEXT, + album TEXT, + year INTEGER, + genre TEXT, + plex_id INTEGER + ) + ''') + conn.commit() + conn.close() -def _write_csv(tracks: List[Track], name: str, path: str = "/data") -> None: - data_folder = pathlib.Path(path) - data_folder.mkdir(parents=True, exist_ok=True) - file = data_folder / f"{name}.csv" - with open(file, "w", encoding="utf-8") as csvfile: - writer = csv.writer(csvfile) - writer.writerow(Track.__annotations__.keys()) - for track in tracks: - writer.writerow( - [track.title, track.artist, track.album, track.url] - ) +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) +def fetch_plex_tracks(plex: PlexServer, offset: int = 0, limit: int = 100) -> List[plexapi.audio.Track]: + return plex.library.search(libtype="track", container_start=offset, container_size=limit) -def _delete_csv(name: str, path: str = "/data") -> None: - data_folder = pathlib.Path(path) - file = data_folder / f"{name}.csv" - file.unlink() +def fetch_and_cache_tracks(plex: PlexServer): + global plex_tracks_cache, cache_building + if cache_building: + return + cache_building = True + offset = 0 + limit = 100 -from concurrent.futures import ThreadPoolExecutor + def background_fetch(): + nonlocal offset + while True: + tracks = fetch_plex_tracks(plex, offset, limit) + if not tracks: + break + with cache_lock: + for track in tracks: + key = f"{track.title}|{track.artist().title}|{track.album().title}" + plex_tracks_cache[key] = track + offset += limit + _update_db_cache_bulk(dict(list(plex_tracks_cache.items())[-len(tracks):])) + logging.info(f"Fetched and cached {len(plex_tracks_cache)} tracks so far...") + + global cache_building + cache_building = False + logging.info(f"Finished fetching all tracks. Total tracks in cache: {len(plex_tracks_cache)}") + + threading.Thread(target=background_fetch, daemon=True).start() + +def _update_db_cache_bulk(tracks_cache): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.executemany(''' + INSERT OR REPLACE INTO plex_cache (key, title, artist, album, year, genre, plex_id) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', [(key, track.title, track.artist().title, track.album().title, track.year, + ','.join(g.tag for g in track.genres) if track.genres else '', track.ratingKey) + for key, track in tracks_cache.items()]) + conn.commit() + conn.close() + +def load_cache_from_db(): + global plex_tracks_cache + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('SELECT key, title, artist, album, year, genre, plex_id FROM plex_cache') + rows = cursor.fetchall() + conn.close() + + with cache_lock: + plex_tracks_cache = { + row[0]: plexapi.audio.Track(None, { + 'title': row[1], + 'parentTitle': row[2], + 'grandparentTitle': row[3], + 'year': row[4], + 'genre': [{'tag': g} for g in row[5].split(',')] if row[5] else [], + 'ratingKey': row[6] + }) for row in rows + } + + logging.info(f"Loaded {len(plex_tracks_cache)} tracks from the database cache") def _get_available_plex_tracks(plex: PlexServer, tracks: List[Track]) -> List: + def match_track(track): + return _match_single_track(plex, track) + with ThreadPoolExecutor() as executor: - results = list(executor.map(lambda track: _match_single_track(plex, track), tracks)) + results = list(executor.map(match_track, tracks)) + plex_tracks = [result[0] for result in results if result[0]] missing_tracks = [result[1] for result in results if result[1]] return plex_tracks, missing_tracks -MATCH_THRESHOLD = 0.6 - -def _match_single_track(plex, track, year=None, genre=None): - # Check in local DB first - plex_id = get_matched_song(track.title, track.artist, track.album) - if plex_id: - return plex.fetchItem(plex_id), None - - search = [] - try: - # Combine track title and primary artist for a more refined search - primary_artist = track.artist.split("&")[0].split("ft.")[0].strip() # Get the primary artist - search_query = f"{track.title} {primary_artist}" - search = plex.search(search_query, mediatype="track", limit=5) - except BadRequest: - logging.info("Failed to search %s on Plex", track.title) - best_match = None - best_score = 0 - for s in search: - artist_similarity = SequenceMatcher(None, s.artist().title.lower(), primary_artist.lower()).quick_ratio() - title_similarity = SequenceMatcher(None, s.title.lower(), track.title.lower()).quick_ratio() - album_similarity = SequenceMatcher(None, s.album().title.lower(), track.album.lower()).quick_ratio() - year_similarity = 1 if year and s.year == year else 0 - genre_similarity = SequenceMatcher(None, s.genre.lower(), genre.lower()).quick_ratio() if genre else 0 - combined_score = (artist_similarity * 0.4) + (title_similarity * 0.3) + (album_similarity * 0.2) + (year_similarity * 0.05) + (genre_similarity * 0.05) - if combined_score > best_score: - best_score = combined_score - best_match = s - if best_match and best_score >= MATCH_THRESHOLD: - insert_matched_song(track.title, track.artist, track.album, best_match.ratingKey) - return best_match, None - else: - logging.info(f"No match found for track {track.title} by {track.artist} with a score of {best_score}.") - return None, track +def _match_single_track(plex: PlexServer, track: Track): + def similarity(a, b): + return SequenceMatcher(None, a.lower(), b.lower()).ratio() + + def search_and_score(query, threshold): + best_match = None + best_score = 0 + + # First, search in the cache + with cache_lock: + for key, s in plex_tracks_cache.items(): + score = 0 + score += similarity(s.title, track.title) * 0.4 + score += similarity(s.artist().title, track.artist) * 0.3 + score += similarity(s.album().title, track.album) * 0.2 + + if '(' in track.title and '(' in s.title: + version_similarity = similarity( + track.title.split('(')[1].split(')')[0], + s.title.split('(')[1].split(')')[0] + ) + score += version_similarity * 0.1 + + if track.year and s.year: + score += (int(track.year) == s.year) * 0.1 + if track.genre and s.genres: + genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) + score += genre_matches * 0.1 + + if score > best_score: + best_score = score + best_match = s + + # If no good match in cache, search Plex directly + if best_score < threshold: + try: + search = plex.search(query, mediatype="track", limit=20) + for s in search: + score = 0 + score += similarity(s.title, track.title) * 0.4 + score += similarity(s.artist().title, track.artist) * 0.3 + score += similarity(s.album().title, track.album) * 0.2 + + if '(' in track.title and '(' in s.title: + version_similarity = similarity( + track.title.split('(')[1].split(')')[0], + s.title.split('(')[1].split(')')[0] + ) + score += version_similarity * 0.1 + + if track.year and s.year: + score += (int(track.year) == s.year) * 0.1 + if track.genre and s.genres: + genre_matches = any(similarity(g.tag, track.genre) > 0.8 for g in s.genres) + score += genre_matches * 0.1 + + if score > best_score: + best_score = score + best_match = s + except BadRequest: + logging.info(f"Failed to search {query} on Plex") + + return (best_match, best_score) if best_score >= threshold else (None, 0) + + # Stage 1: Exact match from cache + key = f"{track.title}|{track.artist}|{track.album}" + with cache_lock: + if key in plex_tracks_cache: + logging.info(f"Exact match found in cache for '{track.title}' by '{track.artist}'") + return plex_tracks_cache[key], None + + # Stage 2: Strict matching + query = f"{track.title} {track.artist} {track.album}" + match, score = search_and_score(query, 0.85) + if match: + logging.info(f"Strict match found for '{track.title}' by '{track.artist}'. Score: {score}") + return match, None + # Stage 4: Further relaxation (partial title) + words = track.title.split() + if len(words) > 1: + query = f"{' '.join(words[:2])} {track.artist}" + match, score = search_and_score(query, 0.6) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with partial title. Score: {score}") + return match, None + + # Stage 5: Artist Only Match + query = f"{track.artist}" + match, score = search_and_score(query, 0.65) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with artist only. Score: {score}") + return match, None + + # Stage 6: Title Only Match + query = f"{track.title}" + match, score = search_and_score(query, 0.55) + if match: + logging.info(f"Matched '{track.title}' by '{track.artist}' with title only. Score: {score}") + return match, None + + logging.info(f"No match found for track {track.title} by {track.artist}.") + return None, track + +def initialize_cache(plex: PlexServer): + load_cache_from_db() + if not plex_tracks_cache: + fetch_and_cache_tracks(plex) + +def get_matched_song(title, artist, album): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + SELECT plex_id FROM plexist + WHERE title = ? AND artist = ? AND album = ? + ''', (title, artist, album)) + result = cursor.fetchone() + conn.close() + return result[0] if result else None + +def insert_matched_song(title, artist, album, plex_id): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + INSERT OR REPLACE INTO plexist (title, artist, album, plex_id) + VALUES (?, ?, ?, ?) + ''', (title, artist, album, plex_id)) + conn.commit() + conn.close() + +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def _update_plex_playlist( plex: PlexServer, available_tracks: List, @@ -126,19 +271,22 @@ def _update_plex_playlist( plex_playlist.addItems(available_tracks) return plex_playlist +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) +def update_or_create_plex_playlist( + plex: PlexServer, + playlist: Playlist, + tracks: List[Track], + userInputs: UserInputs, +) -> None: + if not tracks: + logging.error("No tracks provided for playlist %s", playlist.name) + return + + available_tracks, missing_tracks = _get_available_plex_tracks(plex, tracks) -def update_or_create_plex_playlist( - plex: PlexServer, - playlist: Playlist, - tracks: List[Track], - userInputs: UserInputs, -) -> None: - if tracks is None: - logging.error("No tracks provided for playlist %s", playlist.name) - return - available_tracks, missing_tracks = _get_available_plex_tracks(plex, tracks) if available_tracks: try: + plex_playlist = plex.playlist(playlist.name) plex_playlist = _update_plex_playlist( plex=plex, available_tracks=available_tracks, @@ -147,52 +295,69 @@ def update_or_create_plex_playlist( ) logging.info("Updated playlist %s", playlist.name) except NotFound: - plex.createPlaylist(title=playlist.name, items=available_tracks) + plex_playlist = plex.createPlaylist(title=playlist.name, items=available_tracks) logging.info("Created playlist %s", playlist.name) - plex_playlist = plex.playlist(playlist.name) + if playlist.description and userInputs.add_playlist_description: try: plex_playlist.edit(summary=playlist.description) - except: - logging.info( - "Failed to update description for playlist %s", - playlist.name, - ) + logging.info("Updated description for playlist %s", playlist.name) + except Exception as e: + logging.error("Failed to update description for playlist %s: %s", playlist.name, str(e)) + if playlist.poster and userInputs.add_playlist_poster: try: plex_playlist.uploadPoster(url=playlist.poster) - except: - logging.info( - "Failed to update poster for playlist %s", playlist.name - ) - logging.info( - "Updated playlist %s with summary and poster", playlist.name - ) + logging.info("Updated poster for playlist %s", playlist.name) + except Exception as e: + logging.error("Failed to update poster for playlist %s: %s", playlist.name, str(e)) else: - logging.info( - "No songs for playlist %s were found on plex, skipping the" - " playlist creation", - playlist.name, - ) - if missing_tracks and userInputs.write_missing_as_csv: - try: - _write_csv(missing_tracks, playlist.name) - logging.info("Missing tracks written to %s.csv", playlist.name) - except: - logging.info( - "Failed to write missing tracks for %s, likely permission" - " issue", - playlist.name, - ) - if (not missing_tracks) and userInputs.write_missing_as_csv: - try: - _delete_csv(playlist.name) - logging.info("Deleted old %s.csv", playlist.name) - except: - logging.info( - "Failed to delete %s.csv, likely permission issue", - playlist.name, + logging.warning("No songs for playlist %s were found on Plex, skipping the playlist creation", playlist.name) + + if userInputs.write_missing_as_csv: + if missing_tracks: + try: + _write_csv(missing_tracks, playlist.name) + logging.info("Missing tracks written to %s.csv", playlist.name) + except Exception as e: + logging.error("Failed to write missing tracks for %s: %s", playlist.name, str(e)) + else: + try: + _delete_csv(playlist.name) + logging.info("Deleted old %s.csv as no missing tracks found", playlist.name) + except Exception as e: + logging.error("Failed to delete %s.csv: %s", playlist.name, str(e)) + +def _write_csv(tracks: List[Track], name: str, path: str = "/data") -> None: + data_folder = pathlib.Path(path) + data_folder.mkdir(parents=True, exist_ok=True) + file = data_folder / f"{name}.csv" + with open(file, "w", encoding="utf-8") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(Track.__annotations__.keys()) + for track in tracks: + writer.writerow( + [track.title, track.artist, track.album, track.url] ) +def _delete_csv(name: str, path: str = "/data") -> None: + data_folder = pathlib.Path(path) + file = data_folder / f"{name}.csv" + file.unlink() + def end_session(): - conn.close() \ No newline at end of file + if 'conn' in locals() or 'conn' in globals(): + conn.close() + +def clear_cache(): + global plex_tracks_cache + with cache_lock: + plex_tracks_cache.clear() + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('DELETE FROM plex_cache') + conn.commit() + conn.close() + + logging.info("Cache cleared") \ No newline at end of file diff --git a/plexist/plexist.py b/plexist/plexist.py index 3bda830..48720b4 100644 --- a/plexist/plexist.py +++ b/plexist/plexist.py @@ -3,17 +3,17 @@ import logging import os import time - import deezer import spotipy from plexapi.server import PlexServer from spotipy.oauth2 import SpotifyClientCredentials - from modules.deezer import deezer_playlist_sync from modules.helperClasses import UserInputs from modules.spotify import spotify_playlist_sync -from modules.plex import initialize_db # Importing the database initialization function +from modules.plex import initialize_db, initialize_cache +from tenacity import retry, stop_after_attempt, wait_exponential +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def read_environment_variables(): return UserInputs( @@ -31,19 +31,19 @@ def read_environment_variables(): deezer_playlist_ids=os.getenv("DEEZER_PLAYLIST_ID"), ) - +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def initialize_plex_server(user_inputs): if user_inputs.plex_url and user_inputs.plex_token: try: return PlexServer(user_inputs.plex_url, user_inputs.plex_token) except Exception as e: logging.error(f"Plex Authorization error: {e}") - return None + raise # Re-raise the exception to trigger retry else: logging.error("Missing Plex Authorization Variables") return None - +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def initialize_spotify_client(user_inputs): if ( user_inputs.spotipy_client_id @@ -59,23 +59,27 @@ def initialize_spotify_client(user_inputs): ) except Exception as e: logging.error(f"Spotify Authorization error: {e}") - return None + raise # Re-raise the exception to trigger retry else: logging.error("Missing one or more Spotify Authorization Variables") return None - def main(): - initialize_db() # Initialize the database at the start of the main function - + initialize_db() user_inputs = read_environment_variables() plex = initialize_plex_server(user_inputs) if plex is None: return + # Initialize the cache + initialize_cache(plex) + while True: logging.info("Starting playlist sync") + + # Update the cache + #initialize_cache(plex) # Spotify sync logging.info("Starting Spotify playlist sync") @@ -97,6 +101,5 @@ def main(): time.sleep(user_inputs.wait_seconds) - if __name__ == "__main__": main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ce40165..86bcbfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -spotipy>=2.23 -plexapi>=4.15.4 -deezer-python>=6.1.0 +spotipy>=2.24 +plexapi>=4.15.16 +deezer-python>=7.0.0 urllib3>=2.2.2 -zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability - +tenacity>=9.0.0 +zipp>=3.20.1 # not directly required, pinned to avoid a vulnerability