Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few major fixes and improvements #3

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,11 @@ dmypy.json

# Pyre type checker
.pyre/

# PyCharm / JetBrains IDEs
/.idea

# pa_to_ap specific stuff
/podcast_addict_extracted
/*.db
/*.backup
29 changes: 20 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ Migrate data from Podcast Addict's to AntennaPod's database

This does not use any IDs for matching feeds and episodes from one db to another, as those tend to be very unreliable. (They're supposed to stay the same, but often they don't.) Instead, we match them by their name and, in some cases, other attributes. This will work even if the name changed. For example, when using the script one episode's name changed from something like `123. Great Title` to just `Great Title`, but they were still matched.

## Configuration
There are a few optional configuration settings you can change by editing the variables at the top of `pa_to_ap.py` before running the script.

MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True
* `TRANSFER_DOWNLOADED_EPISODES` controls if existing downloads in Podcast Addict are copied to AntennaPod.
Additional steps are required, see steps below.
* Default: `True` (downloads are transferred.)
* `EPISODES_DIR_PATH ` controls the directory path for transferred episodes (to which you have to manually copy/move the files to).
* Default: `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
* `MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE` If a name match for a given episode is not found, this setting controls if we should try to match on the episode media URL instead.
* Default: `True` (URL match is used as a fallback.)

## Steps

0. Install Python 3.8 or later
Expand All @@ -20,18 +32,17 @@ This does not use any IDs for matching feeds and episodes from one db to another
5. Run the [`pa_to_ap.py`](pa_to_ap.py) script (AntennaPod db file will be **modified**!) in a terminal
6. Confirm that matches are correct (if they aren't you may need to increase `min_similarity`)
7. Copy the modified db file back to your phone
8. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
9. Manually move (or copy) the folders **inside**
`/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/`
**to**
`/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.)
10. Import the modified db in AntennaPod
8. If you chose to enable `TRANSFER_DOWNLOADED_EPISODES` (this is on by default):
1. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
2. Manually move (or copy) the folders **inside**
`/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/`
**to**
`/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict`
as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.)
9. Import the modified db in AntennaPod

Enjoy!

Of course, you can change the location (to which you have to manually copy/move the files to) by modifying the `EPISODES_DIR_PATH` before running the script.

## Warning
Note that this is somewhat rough and will likely not handle a lot of edge cases.

Expand Down
160 changes: 107 additions & 53 deletions pa_to_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import sys
import zipfile
import sqlite3
from dataclasses import dataclass
import functools
from dataclasses import dataclass, field
from operator import itemgetter
from pathlib import Path
from sqlite3 import Cursor
Expand All @@ -14,9 +15,11 @@

CUR_PATH = Path()

TRANSFER_DOWNLOADED_EPISODES = True
EPISODES_DIR_PATH = '/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict'
MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True

AP_TAG_SEPARATOR = "\u001e" # Separator character for AP tag column blob

@dataclass
class Feed:
Expand All @@ -25,8 +28,32 @@ class Feed:
description: str
author: str
keep_updated: int
folder_name: str = ''
feed_url: str

@dataclass
class PAFeed(Feed):
tag: int # Tag for single row from JOIN
tags: list[int] = field(default_factory=list, init=False) # For merged PAFeed rows this will contain all tags
folder_name: str

def tag_names(self, pa_tags: dict[int, str]):
return [pa_tags[x] for x in self.tags]

@dataclass
class APFeed(Feed):
_tags: str

@property
def tags_str(self):
return self._tags

@property
def tags(self):
return self._tags.split(AP_TAG_SEPARATOR) if self._tags is not None else list()

@tags.setter
def tags(self, value: list[str]):
self._tags = AP_TAG_SEPARATOR.join(value)

def error(msg):
print("ERROR:", msg)
Expand Down Expand Up @@ -73,46 +100,49 @@ def get_antenna_pod_and_podcast_addict_backup_path():


def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):
# first find match for all feeds in pa
pa_feeds = [Feed(*a) for a in podcast_addict_cur.execute(
'select _id, name, description, author, '
'automaticRefresh, folderName from podcasts '
'where subscribed_status = 1 and is_virtual = 0')]
# first find match for all feeds in pa, left join on tags relation table (so there may be multiple rows for each podcast)
pa_feeds_one_to_many_tags = [PAFeed(*a) for a in podcast_addict_cur.execute(
'SELECT podcasts._id, podcasts.name, description, author, '
'automaticRefresh, feed_url, tag_relation.tag_id, folderName FROM podcasts '
'LEFT JOIN tag_relation ON tag_relation.podcast_id = podcasts._id '
'WHERE subscribed_status = 1 AND is_virtual = 0 AND initialized_status = 1')]

# Collate multiple JOIN rows for each podcast if they had multiple tags
def reduce_by_tag(feeds: dict[str, PAFeed], current_feed: PAFeed):
if current_feed.id not in feeds:
if current_feed.tag is not None:
current_feed.tags.append(current_feed.tag)
feeds[current_feed.id] = current_feed
elif current_feed.tag is not None:
existing_feed: PAFeed = feeds[current_feed.id]
existing_feed.tags.append(current_feed.tag)
return feeds

pa_feeds_dict = functools.reduce(reduce_by_tag, pa_feeds_one_to_many_tags, dict())
pa_feeds = pa_feeds_dict.values()

pa_tags: dict[int, str] = dict(podcast_addict_cur.execute('SELECT _id, name FROM tags'))

print("# Podcast addict feeds:")
for feed in pa_feeds:
print(feed.name)
print()
print()

ap_feeds = [Feed(*a) for a in antenna_pod_cur.execute(
'select id, title, description, author, keep_updated from Feeds '
'where downloaded = 1')]
print("\n\n")

feed_attr_to_weight = { #
(lambda f: f.name): 0.85, #
(lambda f: f.author): 0.15, #
}
matcher = ObjectListMatcher(feed_attr_to_weight)

# should never be larger than the largest weight (otherwise is
# slightly unpredictable, as not every weight will be evaluated)
# value in range [0, 1]
matcher.minimum_similarity = 0.78
ap_feeds = {a[5]: APFeed(*a) for a in antenna_pod_cur.execute(
'select id, title, description, author, keep_updated, download_url, tags from Feeds '
)}

pa_to_ap = []

ap_indices = matcher.get_indices(pa_feeds, ap_feeds)
for n, pa in enumerate(pa_feeds):
ap_idx = ap_indices[n]

ap_name = '!!! NO MATCH !!!'
if ap_idx >= 0:
ap = ap_feeds[ap_idx]
pa_name = pa.name if pa.name else pa.feed_url
if pa.feed_url in ap_feeds:
ap = ap_feeds[pa.feed_url]
ap_name = ap.name
pa_to_ap.append((pa, ap))

print(pa.name, ap_name, sep=" -> ")
print(pa_name, ap_name, sep=" -> ")
print()

if not confirmed("Is this correct? Can we continue?"):
Expand All @@ -125,13 +155,13 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):
# # FIXME: make it work if premium and non-premium share same name
# if ap.name == "Name of same podcast but premium version":
# transfer_from_feed_to_feed(podcast_addict_cur,
# antenna_pod_cur, pa, ap)
# antenna_pod_cur, pa, ap, pa_tags)
# break
# break


for pa, ap in pa_to_ap:
transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap)
transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap, pa_tags)
print() # break


Expand All @@ -142,8 +172,9 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor):

def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, #
antenna_pod_cur: Cursor, #
pa: Feed, #
ap: Feed):
pa: PAFeed, #
ap: APFeed,
pa_tags: dict[int, str]):
print(f'# Feed: {ap.name}')
antenna_pod_cur.execute("UPDATE Feeds "
"SET keep_updated = ? "
Expand All @@ -153,27 +184,32 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, #
pa_episodes = list(podcast_addict_cur.execute( #
# 0 1 n2 n3 n4
'select _id, name, seen_status, favorite, local_file_name, '
# n5 n6 n7 n8
'playbackDate, duration_ms, chapters_extracted, download_url '
# n5 n6 n7 n8 n9
'playbackDate, duration_ms, chapters_extracted, download_url, position_to_resume '
'from episodes where podcast_id = ? and '
'(seen_status = 1 or '
'(seen_status = 1 or position_to_resume < 0 or '
'(local_file_name != "" and local_file_name IS NOT NULL))',
(pa.id,)))

ap_episodes = list(antenna_pod_cur.execute( #
'select fi.id, fi.title, fm.download_url '
'from FeedItems fi '
'LEFT JOIN FeedMedia fm ON fi.id = fm.feeditem '
'where fi.feed = ? and fi.read = 0 ', (ap.id,)))
'where fi.feed = ? and fi.read = 0 '
, (ap.id,)))

print()
combinations = len(pa_episodes) * len(ap_episodes)
print(f"Rough estimate: {combinations / 4000:.2f} seconds")
print()
print()
print(f"\nRough estimate: {combinations / 4000:.2f} seconds\n\n")
pa_indices = ITEM_MATCHER.get_indices(ap_episodes, pa_episodes)
seen_match_count = 0

# Transfer tags, merge any existing tags with PA tags
ap.tags = list(set(ap.tags).union(pa.tag_names(pa_tags)))
antenna_pod_cur.execute("UPDATE Feeds "
"SET tags = ? "
"WHERE id = ?", #
(ap.tags_str, ap.id,))


for ap_ep, pa_idx in zip(ap_episodes, pa_indices):
if pa_idx < 0:
Expand All @@ -184,13 +220,15 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, #
if MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE and ap_url is not None:
ap_url = ap_url.strip()
if len(ap_url) > 9:
for pa_idx, pa_ep in enumerate(pa_episodes):
for pa_idx_urlmatch, pa_ep in enumerate(pa_episodes):
if not pa_ep[8]:
continue

pa_url = pa_ep[8].strip()
if pa_url and pa_url == ap_url:
print(f"! Fallback to URL match for: {ap_ep[1]}")
found = True
pa_idx = pa_idx_urlmatch
break

if not found:
Expand All @@ -202,13 +240,17 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, #
if pa_ep[2]:
transfer_from_seen_ep_to_ep(antenna_pod_cur, podcast_addict_cur, #
pa_ep, ap_ep)
else:
transfer_progress_ep_to_ep(antenna_pod_cur, podcast_addict_cur, #
pa_ep, ap_ep)


if pa_ep[3]:
antenna_pod_cur.execute(
"INSERT INTO Favorites (feeditem, feed) VALUES "
"(?, ?)", (ap_ep[0], ap.id))

if pa_ep[4]:
if pa_ep[4] and TRANSFER_DOWNLOADED_EPISODES:
transfer_from_dld_ep_to_ep(antenna_pod_cur, podcast_addict_cur, #
pa_ep, ap_ep, pa.folder_name)

Expand All @@ -226,18 +268,17 @@ def transfer_chapters(antenna_pod_cur: Cursor, #
for title, start in podcast_addict_cur.execute( #
"select name, start from chapters "
"where podcastId = ? and episodeId = ?", (pa_feed_id, pa_ep[0])):
# we use chapter type 2 (id3) simply because it seems most likely
antenna_pod_cur.execute("INSERT INTO SimpleChapters "
"(title, start, feeditem, type) VALUES "
"(?, ?, ?, 2)", (title, start, ap_ep[0],))
"(title, start, feeditem) VALUES "
"(?, ?, ?)", (title, start, ap_ep[0]))


def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor, #
podcast_addict_cur: Cursor, #
pa_ep: tuple, #
ap_ep: tuple, #
pa_folder_name: str):
pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _ = pa_ep
pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _, _ = pa_ep

dir_path = EPISODES_DIR_PATH.rstrip("/") + "/" + pa_folder_name
file_path = dir_path + "/" + pa_local_file_name
Expand All @@ -252,8 +293,8 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, #
podcast_addict_cur: Cursor, #
pa_ep: tuple, #
ap_ep: tuple):
print(ap_ep[1], " <<matched to>> ", pa_ep[1])
pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _ = pa_ep
print(ap_ep[1], " <<matched to seen>> ", pa_ep[1])
pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, _ = pa_ep
antenna_pod_cur.execute("UPDATE FeedItems SET read = 1 WHERE id = ?",
(ap_ep[0],))

Expand All @@ -265,13 +306,26 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, #
(pa_playbackDate, pa_playbackDate, pa_duration_ms,
ap_ep[0],))

def transfer_progress_ep_to_ep(antenna_pod_cur: Cursor,
podcast_addict_cur: Cursor,
pa_ep: tuple,
ap_ep: tuple):
print(ap_ep[1], " <<matched to in-progress>> ", pa_ep[1])
pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, pa_position = pa_ep

antenna_pod_cur.execute("UPDATE FeedMedia "
"SET last_played_time = ?, "
"position = ?, "
"played_duration = ? "
"WHERE feeditem = ?",
(pa_playbackDate, pa_position, pa_position,
ap_ep[0],))


ap_db, pa_db = get_antenna_pod_and_podcast_addict_backup_path()
print()
print("AntennaPod .db file found:", ap_db)
print("\nAntennaPod .db file found:", ap_db)
print("Podcast Addict .db file found:", pa_db)
print()
print()
print("\n")

podcast_addict_con = None
antenna_pod_con = None
Expand Down