CreamyCookie · fadookie · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,11 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# PyCharm / JetBrains IDEs
+/.idea
+
+# pa_to_ap specific stuff
+/podcast_addict_extracted
+/*.db
+/*.backup
diff --git a/README.md b/README.md
@@ -10,6 +10,18 @@ Migrate data from Podcast Addict's to AntennaPod's database
 
 This does not use any IDs for matching feeds and episodes from one db to another, as those tend to be very unreliable. (They're supposed to stay the same, but often they don't.) Instead, we match them by their name and, in some cases, other attributes. This will work even if the name changed. For example, when using the script one episode's name changed from something like `123. Great Title` to just `Great Title`, but they were still matched.
 
+## Configuration
+There are a few optional configuration settings you can change by editing the variables  at the top of `pa_to_ap.py` before running the script.
+
+MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True
+* `TRANSFER_DOWNLOADED_EPISODES` controls if existing downloads in Podcast Addict are copied to AntennaPod. 
+   Additional steps are required, see steps below.
+    * Default: `True` (downloads are transferred.)
+* `EPISODES_DIR_PATH ` controls the directory path for transferred episodes (to which you have to manually copy/move the files to).
+    * Default: `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
+* `MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE` If a name match for a given episode is not found, this setting controls if we should try to match on the episode media URL instead.
+    * Default: `True` (URL match is used as a fallback.)
+
 ## Steps
 
 0. Install Python 3.8 or later
@@ -20,18 +32,17 @@ This does not use any IDs for matching feeds and episodes from one db to another
 5. Run the [`pa_to_ap.py`](pa_to_ap.py) script (AntennaPod db file will be **modified**!) in a terminal
 6. Confirm that matches are correct (if they aren't you may need to increase `min_similarity`)
 7. Copy the modified db file back to your phone
-8. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` 
-9. Manually move (or copy) the folders **inside**
-`/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/`
-**to**
-`/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
-   as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.)
-10. Import the modified db in AntennaPod
+8. If you chose to enable `TRANSFER_DOWNLOADED_EPISODES` (this is on by default):
+    1. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` 
+    2. Manually move (or copy) the folders **inside**
+    `/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/`
+    **to**
+    `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
+       as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.)
+9. Import the modified db in AntennaPod
 
 Enjoy!
 
-Of course, you can change the location (to which you have to manually copy/move the files to) by modifying the `EPISODES_DIR_PATH` before running the script.
-
 ## Warning
 Note that this is somewhat rough and will likely not handle a lot of edge cases.
 

diff --git a/pa_to_ap.py b/pa_to_ap.py
@@ -3,7 +3,8 @@
 import sys
 import zipfile
 import sqlite3
-from dataclasses import dataclass
+import functools
+from dataclasses import dataclass, field
 from operator import itemgetter
 from pathlib import Path
 from sqlite3 import Cursor
@@ -14,9 +15,11 @@
 
 CUR_PATH = Path()
 
+TRANSFER_DOWNLOADED_EPISODES = True
 EPISODES_DIR_PATH = '/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict'
 MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True
 
+AP_TAG_SEPARATOR = "\u001e" # Separator character for AP tag column blob
 
 @dataclass
 class Feed:
@@ -25,8 +28,32 @@ class Feed:
     description: str
     author: str
     keep_updated: int
-    folder_name: str = ''
+    feed_url: str
 
+@dataclass
+class PAFeed(Feed):
+    tag: int # Tag for single row from JOIN
+    tags: list[int] = field(default_factory=list, init=False) # For merged PAFeed rows this will contain all tags
+    folder_name: str
+
+    def tag_names(self, pa_tags: dict[int, str]):
+        return [pa_tags[x] for x in self.tags]
+
+@dataclass
+class APFeed(Feed):
+    _tags: str
+
+    @property
+    def tags_str(self):
+        return self._tags
+
+    @property
+    def tags(self):
+        return self._tags.split(AP_TAG_SEPARATOR) if self._tags is not None else list()
+
+    @tags.setter
+    def tags(self, value: list[str]):
+        self._tags = AP_TAG_SEPARATOR.join(value)
 
 def error(msg):
     print("ERROR:", msg)
@@ -73,46 +100,49 @@ def get_antenna_pod_and_podcast_addict_backup_path():
 
 
 def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):
-    # first find match for all feeds in pa
-    pa_feeds = [Feed(*a) for a in podcast_addict_cur.execute(
-            'select _id, name, description, author, '
-            'automaticRefresh, folderName from podcasts '
-            'where subscribed_status = 1 and is_virtual = 0')]
+    # first find match for all feeds in pa, left join on tags relation table (so there may be multiple rows for each podcast)
+    pa_feeds_one_to_many_tags = [PAFeed(*a) for a in podcast_addict_cur.execute(
+            'SELECT podcasts._id, podcasts.name, description, author, '
+            'automaticRefresh, feed_url, tag_relation.tag_id, folderName FROM podcasts '
+            'LEFT JOIN tag_relation ON tag_relation.podcast_id = podcasts._id '
+            'WHERE subscribed_status = 1 AND is_virtual = 0 AND initialized_status = 1')]
+
+    # Collate multiple JOIN rows for each podcast if they had multiple tags
+    def reduce_by_tag(feeds: dict[str, PAFeed], current_feed: PAFeed):
+        if current_feed.id not in feeds:
+            if current_feed.tag is not None:
+                current_feed.tags.append(current_feed.tag)
+            feeds[current_feed.id] = current_feed
+        elif current_feed.tag is not None:
+            existing_feed: PAFeed = feeds[current_feed.id]
+            existing_feed.tags.append(current_feed.tag)
+        return feeds
+
+    pa_feeds_dict = functools.reduce(reduce_by_tag, pa_feeds_one_to_many_tags, dict())
+    pa_feeds = pa_feeds_dict.values()
+
+    pa_tags: dict[int, str] = dict(podcast_addict_cur.execute('SELECT _id, name FROM tags'))
 
     print("# Podcast addict feeds:")
     for feed in pa_feeds:
         print(feed.name)
-    print()
-    print()
-
-    ap_feeds = [Feed(*a) for a in antenna_pod_cur.execute(
-            'select id, title, description, author, keep_updated from Feeds '
-            'where downloaded = 1')]
+    print("\n\n")
 
-    feed_attr_to_weight = {  #
-        (lambda f: f.name): 0.85,  #
-        (lambda f: f.author): 0.15,  #
-    }
-    matcher = ObjectListMatcher(feed_attr_to_weight)
-
-    # should never be larger than the largest weight (otherwise is
-    # slightly unpredictable, as not every weight will be evaluated)
-    # value in range [0, 1]
-    matcher.minimum_similarity = 0.78
+    ap_feeds = {a[5]: APFeed(*a) for a in antenna_pod_cur.execute(
+            'select id, title, description, author, keep_updated, download_url, tags from Feeds '
+            )}
 
     pa_to_ap = []
 
-    ap_indices = matcher.get_indices(pa_feeds, ap_feeds)
     for n, pa in enumerate(pa_feeds):
-        ap_idx = ap_indices[n]
-
         ap_name = '!!! NO MATCH !!!'
-        if ap_idx >= 0:
-            ap = ap_feeds[ap_idx]
+        pa_name = pa.name if pa.name else pa.feed_url
+        if pa.feed_url in ap_feeds:
+            ap = ap_feeds[pa.feed_url]
             ap_name = ap.name
             pa_to_ap.append((pa, ap))
 
-        print(pa.name, ap_name, sep="  ->  ")
+        print(pa_name, ap_name, sep="  ->  ")
     print()
 
     if not confirmed("Is this correct? Can we continue?"):
@@ -125,13 +155,13 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):
     #            # FIXME: make it work if premium and non-premium share same name
     #            if ap.name == "Name of same podcast but premium version":
     #                transfer_from_feed_to_feed(podcast_addict_cur,
-    #                                           antenna_pod_cur, pa, ap)
+    #                                           antenna_pod_cur, pa, ap, pa_tags)
     #                break
     #        break
 
 
     for pa, ap in pa_to_ap:
-        transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap)
+        transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap, pa_tags)
         print()  # break
 
 
@@ -142,8 +172,9 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):
 
 def transfer_from_feed_to_feed(podcast_addict_cur: Cursor,  #
                                antenna_pod_cur: Cursor,  #
-                               pa: Feed,  #
-                               ap: Feed):
+                               pa: PAFeed,  #
+                               ap: APFeed,
+                               pa_tags: dict[int, str]):
     print(f'# Feed: {ap.name}')
     antenna_pod_cur.execute("UPDATE Feeds "
                             "SET keep_updated = ? "
@@ -153,27 +184,32 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor,  #
     pa_episodes = list(podcast_addict_cur.execute(  #
             #        0   1     n2            n3       n4
             'select _id, name, seen_status, favorite, local_file_name, '
-            # n5           n6           n7                  n8
-            'playbackDate, duration_ms, chapters_extracted, download_url '
+            # n5           n6           n7                  n8            n9
+            'playbackDate, duration_ms, chapters_extracted, download_url, position_to_resume '
             'from episodes where podcast_id = ? and '
-            '(seen_status = 1 or '
+            '(seen_status = 1 or position_to_resume < 0 or '
             '(local_file_name != "" and local_file_name IS NOT NULL))',
             (pa.id,)))
 
     ap_episodes = list(antenna_pod_cur.execute(  #
             'select fi.id, fi.title, fm.download_url '
             'from FeedItems fi '
             'LEFT JOIN FeedMedia fm ON fi.id = fm.feeditem '
-            'where fi.feed = ? and fi.read = 0 ', (ap.id,)))
+            'where fi.feed = ? and fi.read = 0 '
+            , (ap.id,)))
 
-    print()
     combinations = len(pa_episodes) * len(ap_episodes)
-    print(f"Rough estimate: {combinations / 4000:.2f} seconds")
-    print()
-    print()
+    print(f"\nRough estimate: {combinations / 4000:.2f} seconds\n\n")
     pa_indices = ITEM_MATCHER.get_indices(ap_episodes, pa_episodes)
     seen_match_count = 0
 
+    # Transfer tags, merge any existing tags with PA tags
+    ap.tags = list(set(ap.tags).union(pa.tag_names(pa_tags)))
+    antenna_pod_cur.execute("UPDATE Feeds "
+                            "SET tags = ? "
+                            "WHERE id = ?",  #
+                            (ap.tags_str, ap.id,))
+
 
     for ap_ep, pa_idx in zip(ap_episodes, pa_indices):
         if pa_idx < 0:
@@ -184,13 +220,15 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor,  #
             if MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE and ap_url is not None:
                 ap_url = ap_url.strip()
                 if len(ap_url) > 9:
-                    for pa_idx, pa_ep in enumerate(pa_episodes):
+                    for pa_idx_urlmatch, pa_ep in enumerate(pa_episodes):
                         if not pa_ep[8]:
                             continue
 
                         pa_url = pa_ep[8].strip()
                         if pa_url and pa_url == ap_url:
+                            print(f"! Fallback to URL match for: {ap_ep[1]}")
                             found = True
+                            pa_idx = pa_idx_urlmatch
                             break
 
             if not found:
@@ -202,13 +240,17 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor,  #
         if pa_ep[2]:
             transfer_from_seen_ep_to_ep(antenna_pod_cur, podcast_addict_cur,  #
                                         pa_ep, ap_ep)
+        else:
+            transfer_progress_ep_to_ep(antenna_pod_cur, podcast_addict_cur,  #
+                                        pa_ep, ap_ep)
+
 
         if pa_ep[3]:
             antenna_pod_cur.execute(
                 "INSERT INTO Favorites (feeditem, feed) VALUES "
                 "(?, ?)", (ap_ep[0], ap.id))
 
-        if pa_ep[4]:
+        if pa_ep[4] and TRANSFER_DOWNLOADED_EPISODES:
             transfer_from_dld_ep_to_ep(antenna_pod_cur, podcast_addict_cur,  #
                                        pa_ep, ap_ep, pa.folder_name)
 
@@ -226,18 +268,17 @@ def transfer_chapters(antenna_pod_cur: Cursor,  #
     for title, start in podcast_addict_cur.execute(  #
             "select name, start from chapters "
             "where podcastId = ? and episodeId = ?", (pa_feed_id, pa_ep[0])):
-        # we use chapter type 2 (id3) simply because it seems most likely
         antenna_pod_cur.execute("INSERT INTO SimpleChapters "
-                                "(title, start, feeditem, type) VALUES "
-                                "(?, ?, ?, 2)", (title, start, ap_ep[0],))
+                                "(title, start, feeditem) VALUES "
+                                "(?, ?, ?)", (title, start, ap_ep[0]))
 
 
 def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor,  #
                                podcast_addict_cur: Cursor,  #
                                pa_ep: tuple,  #
                                ap_ep: tuple,  #
                                pa_folder_name: str):
-    pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _ = pa_ep
+    pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _, _ = pa_ep
 
     dir_path = EPISODES_DIR_PATH.rstrip("/") + "/" + pa_folder_name
     file_path = dir_path + "/" + pa_local_file_name
@@ -252,8 +293,8 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor,  #
                                 podcast_addict_cur: Cursor,  #
                                 pa_ep: tuple,  #
                                 ap_ep: tuple):
-    print(ap_ep[1], "  <<matched to>>  ", pa_ep[1])
-    pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _ = pa_ep
+    print(ap_ep[1], "  <<matched to seen>>  ", pa_ep[1])
+    pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, _ = pa_ep
     antenna_pod_cur.execute("UPDATE FeedItems SET read = 1 WHERE id = ?",
                             (ap_ep[0],))
 
@@ -265,13 +306,26 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor,  #
                             (pa_playbackDate, pa_playbackDate, pa_duration_ms,
                              ap_ep[0],))
 
+def transfer_progress_ep_to_ep(antenna_pod_cur: Cursor,
+                                podcast_addict_cur: Cursor,
+                                pa_ep: tuple,
+                                ap_ep: tuple):
+    print(ap_ep[1], "  <<matched to in-progress>>  ", pa_ep[1])
+    pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, pa_position = pa_ep
+
+    antenna_pod_cur.execute("UPDATE FeedMedia "
+                            "SET last_played_time = ?, "
+                            "position = ?, "
+                            "played_duration = ? "
+                            "WHERE feeditem = ?",
+                            (pa_playbackDate, pa_position, pa_position,
+                             ap_ep[0],))
+
 
 ap_db, pa_db = get_antenna_pod_and_podcast_addict_backup_path()
-print()
-print("AntennaPod .db file found:", ap_db)
+print("\nAntennaPod .db file found:", ap_db)
 print("Podcast Addict .db file found:", pa_db)
-print()
-print()
+print("\n")
 
 podcast_addict_con = None
 antenna_pod_con = None