Skip to content

Commit

Permalink
Allow selection of JSON files & updated error message for wrong files
Browse files Browse the repository at this point in the history
  • Loading branch information
vloothuis committed Oct 8, 2023
1 parent f909931 commit 9cf778c
Show file tree
Hide file tree
Showing 6 changed files with 291 additions and 187 deletions.
Binary file modified public/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file modified src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
265 changes: 132 additions & 133 deletions src/framework/processing/py/poetry.lock

Large diffs are not rendered by default.

32 changes: 22 additions & 10 deletions src/framework/processing/py/port/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def load_tiktok_data(json_file):
return data


def get_json_data(zip_file):
def get_json_data_from_zip(zip_file):
with zipfile.ZipFile(zip_file, "r") as zip:
for name in zip.namelist():
if not name.endswith(".json"):
Expand All @@ -142,6 +142,14 @@ def get_json_data(zip_file):
return [load_tiktok_data(json_file)]
return []

def get_json_data_from_file(file_):
# TikTok exports can be a single JSON file or a zipped JSON file
try:
with open(file_) as f:
return [load_tiktok_data(f)]
except (json.decoder.JSONDecodeError, UnicodeDecodeError):
return get_json_data_from_zip(file_)


def filtered_count(data, *key_path):
items = get_list(data, *key_path)
Expand Down Expand Up @@ -400,8 +408,7 @@ def extract_tiktok_data(zip_file):
extract_comment_activity,
extract_videos_liked,
]
for data in get_json_data(zip_file):
print(repr(data))
for data in get_json_data_from_file(zip_file):
return [
table
for table in (extractor(data) for extractor in extractors)
Expand All @@ -413,9 +420,11 @@ def extract_tiktok_data(zip_file):
# Data donation flow #
######################


ExtractionResult = namedtuple("ExtractionResult", ["id", "title", "data_frame"])

class InvalidFileError(Exception):
"""Indicates that the file does not match expectations."""


class SkipToNextStep(Exception):
pass
Expand All @@ -431,20 +440,23 @@ def __init__(self, platform, mime_types, extractor, session_id):
self.meta_data = []

def process(self):
print("START")
with suppress(SkipToNextStep):
while True:
file_result = yield from self.prompt_file()

self.log(f"extracting file")
try:
print(file_result)
extraction_result = self.extract_data(file_result.value)
except IOError as e:
print("IOERROR")
self.log(f"prompt confirmation to retry file selection")
yield from self.prompt_retry()
return
except InvalidFileError:
self.log(f"invalid file detected, prompting for retry")
if (yield from self.prompt_retry()):
continue
else:
return
else:
if extraction_result is None:
try_again = yield from self.prompt_retry()
Expand Down Expand Up @@ -520,7 +532,7 @@ def __call__(self, session_id):


tik_tok_data_donation = DataDonation(
"TikTok", "application/zip, text/plain", extract_tiktok_data
"TikTok", "application/zip, text/plain, application/json", extract_tiktok_data
)


Expand All @@ -547,8 +559,8 @@ def render_donation_page(platform, body, progress):
def retry_confirmation(platform):
text = props.Translatable(
{
"en": f"Unfortunately, we cannot process your {platform} file. Continue, if you are sure that you selected the right file. Try again to select a different file.",
"nl": f"Helaas, kunnen we uw {platform} bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen.",
"en": "Unfortunately, we cannot process your data. Please make sure that you selected JSON as a file format when downloading your data from TikTok.",
"nl": "Helaas kunnen we uw gegevens niet verwerken. Zorg ervoor dat u JSON heeft geselecteerd als bestandsformaat bij het downloaden van uw gegevens van TikTok.",
}
)
ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"})
Expand Down
4 changes: 2 additions & 2 deletions src/framework/processing/py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ description = "Port package with Data Donation logic"
authors = ["Emiel van der Veen <[email protected]>"]

[tool.poetry.dependencies]
python = "^3.7"
panda = "^0.3.1"
python = "3.10.2"
pandas = "^2.1.1"

[tool.poetry.group.test.dependencies]
pytest = "^7.4.2"
Expand Down
177 changes: 135 additions & 42 deletions src/framework/processing/py/tests/tiktok_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import zipfile
import io
import tempfile
from pathlib import Path
from dataclasses import dataclass
from inspect import cleandoc
Expand All @@ -19,6 +20,92 @@
from port import script


complete_contents = {
"Profile": {
"Profile Information": {
"ProfileMap": {"userName": "jane_doe", "likesReceived": "77"}
}
},
"Direct Messages": {
"Chat History": {
"ChatHistory": {
"Chat History with john_doe:": [
{
"Date": "2023-01-08 17:38:59",
"From": "john_doe",
"Content": "https://www.tiktokv.com/share/video/7167866677751860486/",
},
{
"Date": "2023-01-08 17:38:59",
"From": "jane_doe",
"Content": "👍",
},
{
"Date": "2023-01-08 18:12:45",
"From": "john_doe",
"Content": "cool",
},
{
"Date": "2023-01-08 18:12:55",
"From": "john_doe",
"Content": "https://www.tiktokv.com/share/video/7175594838077787434/",
},
]
}
}
},
"Activity": {
"Follower List": {"FansList": [{"Date": "2023-01-14 18:01:16"}]},
"Following List": {
"Following": [
{"Date": "2023-01-14 18:01:16"},
{"Date": "2023-01-14 18:02:16"},
]
},
"Like List": {
"ItemFavoriteList": [
{"Date": "2023-01-14 18:01:16"},
{"Date": "2023-01-14 18:02:16"},
]
},
"Video Browsing History": {
"VideoList": [
{"Date": "2023-01-14 18:01:16"},
{"Date": "2023-01-14 18:02:16"},
{"Date": "2023-01-14 18:03:16"},
{"Date": "2023-01-14 18:04:16"},
]
},
},
"Video": {
"Videos": {
"VideoList": [
{
"Likes": "1",

"Date": "2023-01-14 18:01:16"},
{
"Likes": "1",

"Date": "2023-01-14 18:02:16"},
{
"Likes": "1",

"Date": "2023-01-14 18:03:16"},
]
}
},
"Comment": {
"Comments": {
"CommentList": [
{"Date": "2023-01-14 18:01:16"},
{"Date": "2023-01-14 18:02:16"},
{"Date": "2023-01-14 18:03:16"},
]
}
},
}

def get_test_file(name):
return str(Path(__file__).parent.joinpath(name))

Expand Down Expand Up @@ -96,20 +183,8 @@ def test_wrong_file_type_is_handled():
"ok": {"translations": {"en": "Try again", "nl": "Probeer opnieuw"}},
"text": {
"translations": {
"en": "Unfortunately, we cannot "
"process your TikTok file. "
"Continue, if you are sure "
"that you selected the "
"right file. Try again to "
"select a different file.",
"nl": "Helaas, kunnen we uw "
"TikTok bestand niet "
"verwerken. Weet u zeker "
"dat u het juiste bestand "
"heeft gekozen? Ga dan "
"verder. Probeer opnieuw "
"als u een ander bestand "
"wilt kiezen.",
"en": "Unfortunately, we cannot process your data. Please make sure that you selected JSON as a file format when downloading your data from TikTok.",
"nl": "Helaas kunnen we uw gegevens niet verwerken. Zorg ervoor dat u JSON heeft geselecteerd als bestandsformaat bij het downloaden van uw gegevens van TikTok.",
}
},
},
Expand Down Expand Up @@ -460,12 +535,13 @@ def test_direct_messages_table():
assert "Direct Message Activity" == result.title.translations["en"]

reference = """
Anonymous ID Sent
0 2 2023-01-08 17:38:59
1 1 2023-01-08 17:38:59
2 2 2023-01-08 18:12:45
3 2 2023-01-08 18:12:55
Anonymous ID Sent
0 2 2023-01-08 17:38
1 1 2023-01-08 17:38
2 2 2023-01-08 18:12
3 2 2023-01-08 18:12
"""
print(result.data_frame)
assert_frame_str_equal(reference, result.data_frame)


Expand All @@ -488,12 +564,12 @@ def test_comment_activity_table():
assert "Comment Activity" == result.title.translations["en"]

reference = """
Posted on
0 2023-03-26 15:40:06
1 2023-03-18 12:52:35
2 2023-03-11 15:06:35
3 2023-03-11 15:05:52
4 2023-03-03 14:22:03
Posted on
0 2023-03-26 15:40
1 2023-03-18 12:52
2 2023-03-11 15:06
3 2023-03-11 15:05
4 2023-03-03 14:22
"""
assert_frame_str_equal(reference, result.data_frame)

Expand Down Expand Up @@ -525,45 +601,62 @@ def test_videos_liked_table():
}
result = script.extract_videos_liked(data)
assert "tiktok_videos_liked" == result.id
assert "Comment Activity" == result.title.translations["en"]
assert "Videos liked" == result.title.translations["en"]

reference = """
Liked Link
0 2023-03-26 15:39:28 https://www.tiktokv.com/share/video/7199666315...
1 2023-03-18 12:53:14 https://www.tiktokv.com/share/video/7209355519...
2 2023-03-18 12:53:11 https://www.tiktokv.com/share/video/7209700824...
3 2023-03-11 15:06:37 https://www.tiktokv.com/share/video/7191669641...
Liked Link
0 2023-03-26 15:39 https://www.tiktokv.com/share/video/7199666315...
1 2023-03-18 12:53 https://www.tiktokv.com/share/video/7209355519...
2 2023-03-18 12:53 https://www.tiktokv.com/share/video/7209700824...
3 2023-03-11 15:06 https://www.tiktokv.com/share/video/7191669641...
"""
print(result.data_frame)
assert_frame_str_equal(reference, result.data_frame)


def test_timezone_to_uk():
assert False


def test_get_json_data_with_invalid_json():
def test_get_json_data_from_zip_with_invalid_json():
f = make_zip({"test.json": "testing"})
assert [] == script.get_json_data(f)
assert [] == script.get_json_data_from_zip(f)


def test_get_json_data_with_non_tiktok_json():
def test_get_json_data_from_zip_with_non_tiktok_json():
f = make_zip({"test.json": "{}"})
assert [] == script.get_json_data(f)
assert [] == script.get_json_data_from_zip(f)


def test_get_json_data_with_valid_tiktok_json():
def test_get_json_data_from_zip_with_valid_tiktok_json():
tiktok_data = {
"Profile": {"Profile Information": {"ProfileMap": {"userName": "test"}}}
}
f = make_zip({"test.json": json.dumps(tiktok_data)})
assert [tiktok_data] == script.get_json_data(f)
assert [tiktok_data] == script.get_json_data_from_zip(f)


def test_extract_tiktok_data_works_with_zip_files():
with tempfile.NamedTemporaryFile() as f:
make_zip({"test.json": json.dumps(complete_contents)}, f)
f.flush()
result = script.extract_tiktok_data(f.name)
assert len(result) > 1

def test_extract_tiktok_data_works_with_json_files():
with tempfile.NamedTemporaryFile(mode="w+t") as f:
json.dump(complete_contents, f)
f.flush()
result = script.extract_tiktok_data(f.name)
assert len(result) > 1


def make_zip(contents):
f = io.BytesIO()
z = zipfile.ZipFile(f, mode="w")
def make_zip(contents, out=None):
if out is None:
out = io.BytesIO()
z = zipfile.ZipFile(out, mode="w")
for filename, data in contents.items():
z.writestr(filename, data)
z.close()
f.seek(0)
return f
out.seek(0)
return out

0 comments on commit 9cf778c

Please sign in to comment.