Allow selection of JSON files & updated error message for wrong files

eyra · Oct 8, 2023 · 9cf778c · 9cf778c
1 parent f909931
commit 9cf778c
Show file tree

Hide file tree

Showing 6 changed files with 291 additions and 187 deletions.
diff --git a/public/port-0.0.0-py3-none-any.whl b/public/port-0.0.0-py3-none-any.whl
diff --git a/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl b/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
diff --git a/src/framework/processing/py/poetry.lock b/src/framework/processing/py/poetry.lock
diff --git a/src/framework/processing/py/port/script.py b/src/framework/processing/py/port/script.py
@@ -132,7 +132,7 @@ def load_tiktok_data(json_file):
     return data
 
 
-def get_json_data(zip_file):
+def get_json_data_from_zip(zip_file):
     with zipfile.ZipFile(zip_file, "r") as zip:
         for name in zip.namelist():
             if not name.endswith(".json"):
@@ -142,6 +142,14 @@ def get_json_data(zip_file):
                     return [load_tiktok_data(json_file)]
     return []
 
+def get_json_data_from_file(file_):
+    # TikTok exports can be a single JSON file or a zipped JSON file
+    try:
+        with open(file_) as f:
+            return [load_tiktok_data(f)]
+    except (json.decoder.JSONDecodeError, UnicodeDecodeError):
+        return get_json_data_from_zip(file_)
+
 
 def filtered_count(data, *key_path):
     items = get_list(data, *key_path)
@@ -400,8 +408,7 @@ def extract_tiktok_data(zip_file):
         extract_comment_activity,
         extract_videos_liked,
     ]
-    for data in get_json_data(zip_file):
-        print(repr(data))
+    for data in get_json_data_from_file(zip_file):
         return [
             table
             for table in (extractor(data) for extractor in extractors)
@@ -413,9 +420,11 @@ def extract_tiktok_data(zip_file):
 # Data donation flow #
 ######################
 
-
 ExtractionResult = namedtuple("ExtractionResult", ["id", "title", "data_frame"])
 
+class InvalidFileError(Exception):
+    """Indicates that the file does not match expectations."""
+
 
 class SkipToNextStep(Exception):
     pass
@@ -431,20 +440,23 @@ def __init__(self, platform, mime_types, extractor, session_id):
         self.meta_data = []
 
     def process(self):
-        print("START")
         with suppress(SkipToNextStep):
             while True:
                 file_result = yield from self.prompt_file()
 
                 self.log(f"extracting file")
                 try:
-                    print(file_result)
                     extraction_result = self.extract_data(file_result.value)
                 except IOError as e:
-                    print("IOERROR")
                     self.log(f"prompt confirmation to retry file selection")
                     yield from self.prompt_retry()
                     return
+                except InvalidFileError:
+                    self.log(f"invalid file detected, prompting for retry")
+                    if (yield from self.prompt_retry()):
+                        continue
+                    else:
+                        return
                 else:
                     if extraction_result is None:
                         try_again = yield from self.prompt_retry()
@@ -520,7 +532,7 @@ def __call__(self, session_id):
 
 
 tik_tok_data_donation = DataDonation(
-    "TikTok", "application/zip, text/plain", extract_tiktok_data
+    "TikTok", "application/zip, text/plain, application/json", extract_tiktok_data
 )
 
 
@@ -547,8 +559,8 @@ def render_donation_page(platform, body, progress):
 def retry_confirmation(platform):
     text = props.Translatable(
         {
-            "en": f"Unfortunately, we cannot process your {platform} file. Continue, if you are sure that you selected the right file. Try again to select a different file.",
-            "nl": f"Helaas, kunnen we uw {platform} bestand niet verwerken. Weet u zeker dat u het juiste bestand heeft gekozen? Ga dan verder. Probeer opnieuw als u een ander bestand wilt kiezen.",
+            "en": "Unfortunately, we cannot process your data. Please make sure that you selected JSON as a file format when downloading your data from TikTok.",
+            "nl": "Helaas kunnen we uw gegevens niet verwerken. Zorg ervoor dat u JSON heeft geselecteerd als bestandsformaat bij het downloaden van uw gegevens van TikTok.",
         }
     )
     ok = props.Translatable({"en": "Try again", "nl": "Probeer opnieuw"})

diff --git a/src/framework/processing/py/pyproject.toml b/src/framework/processing/py/pyproject.toml
@@ -5,8 +5,8 @@ description = "Port package with Data Donation logic"
 authors = ["Emiel van der Veen <[email protected]>"]
 
 [tool.poetry.dependencies]
-python = "^3.7"
-panda = "^0.3.1"
+python = "3.10.2"
+pandas = "^2.1.1"
 
 [tool.poetry.group.test.dependencies]
 pytest = "^7.4.2"

diff --git a/src/framework/processing/py/tests/tiktok_test.py b/src/framework/processing/py/tests/tiktok_test.py
@@ -10,6 +10,7 @@
 import json
 import zipfile
 import io
+import tempfile
 from pathlib import Path
 from dataclasses import dataclass
 from inspect import cleandoc
@@ -19,6 +20,92 @@
 from port import script
 
 
+complete_contents = {
+    "Profile": {
+            "Profile Information": {
+                "ProfileMap": {"userName": "jane_doe", "likesReceived": "77"}
+            }
+        },
+        "Direct Messages": {
+            "Chat History": {
+                "ChatHistory": {
+                    "Chat History with john_doe:": [
+                        {
+                            "Date": "2023-01-08 17:38:59",
+                            "From": "john_doe",
+                            "Content": "https://www.tiktokv.com/share/video/7167866677751860486/",
+                        },
+                        {
+                            "Date": "2023-01-08 17:38:59",
+                            "From": "jane_doe",
+                            "Content": "👍",
+                        },
+                        {
+                            "Date": "2023-01-08 18:12:45",
+                            "From": "john_doe",
+                            "Content": "cool",
+                        },
+                        {
+                            "Date": "2023-01-08 18:12:55",
+                            "From": "john_doe",
+                            "Content": "https://www.tiktokv.com/share/video/7175594838077787434/",
+                        },
+                    ]
+                }
+            }
+        },
+        "Activity": {
+            "Follower List": {"FansList": [{"Date": "2023-01-14 18:01:16"}]},
+            "Following List": {
+                "Following": [
+                    {"Date": "2023-01-14 18:01:16"},
+                    {"Date": "2023-01-14 18:02:16"},
+                ]
+            },
+            "Like List": {
+                "ItemFavoriteList": [
+                    {"Date": "2023-01-14 18:01:16"},
+                    {"Date": "2023-01-14 18:02:16"},
+                ]
+            },
+            "Video Browsing History": {
+                "VideoList": [
+                    {"Date": "2023-01-14 18:01:16"},
+                    {"Date": "2023-01-14 18:02:16"},
+                    {"Date": "2023-01-14 18:03:16"},
+                    {"Date": "2023-01-14 18:04:16"},
+                ]
+            },
+        },
+        "Video": {
+            "Videos": {
+                "VideoList": [
+                    {
+                        "Likes": "1",
+
+                        "Date": "2023-01-14 18:01:16"},
+                    {
+                        "Likes": "1",
+
+                        "Date": "2023-01-14 18:02:16"},
+                    {
+                        "Likes": "1",
+
+                        "Date": "2023-01-14 18:03:16"},
+                ]
+            }
+        },
+        "Comment": {
+            "Comments": {
+                "CommentList": [
+                    {"Date": "2023-01-14 18:01:16"},
+                    {"Date": "2023-01-14 18:02:16"},
+                    {"Date": "2023-01-14 18:03:16"},
+                ]
+            }
+        },
+}
+
 def get_test_file(name):
     return str(Path(__file__).parent.joinpath(name))
 
@@ -96,20 +183,8 @@ def test_wrong_file_type_is_handled():
                 "ok": {"translations": {"en": "Try again", "nl": "Probeer opnieuw"}},
                 "text": {
                     "translations": {
-                        "en": "Unfortunately, we cannot "
-                        "process your TikTok file. "
-                        "Continue, if you are sure "
-                        "that you selected the "
-                        "right file. Try again to "
-                        "select a different file.",
-                        "nl": "Helaas, kunnen we uw "
-                        "TikTok bestand niet "
-                        "verwerken. Weet u zeker "
-                        "dat u het juiste bestand "
-                        "heeft gekozen? Ga dan "
-                        "verder. Probeer opnieuw "
-                        "als u een ander bestand "
-                        "wilt kiezen.",
+            "en": "Unfortunately, we cannot process your data. Please make sure that you selected JSON as a file format when downloading your data from TikTok.",
+            "nl": "Helaas kunnen we uw gegevens niet verwerken. Zorg ervoor dat u JSON heeft geselecteerd als bestandsformaat bij het downloaden van uw gegevens van TikTok.",
                     }
                 },
             },
@@ -460,12 +535,13 @@ def test_direct_messages_table():
     assert "Direct Message Activity" == result.title.translations["en"]
 
     reference = """
-       Anonymous ID                Sent
-    0             2 2023-01-08 17:38:59
-    1             1 2023-01-08 17:38:59
-    2             2 2023-01-08 18:12:45
-    3             2 2023-01-08 18:12:55
+       Anonymous ID              Sent
+    0             2  2023-01-08 17:38
+    1             1  2023-01-08 17:38
+    2             2  2023-01-08 18:12
+    3             2  2023-01-08 18:12
     """
+    print(result.data_frame)
     assert_frame_str_equal(reference, result.data_frame)
 
 
@@ -488,12 +564,12 @@ def test_comment_activity_table():
     assert "Comment Activity" == result.title.translations["en"]
 
     reference = """
-                Posted on
-    0 2023-03-26 15:40:06
-    1 2023-03-18 12:52:35
-    2 2023-03-11 15:06:35
-    3 2023-03-11 15:05:52
-    4 2023-03-03 14:22:03
+              Posted on
+    0  2023-03-26 15:40
+    1  2023-03-18 12:52
+    2  2023-03-11 15:06
+    3  2023-03-11 15:05
+    4  2023-03-03 14:22
     """
     assert_frame_str_equal(reference, result.data_frame)
 
@@ -525,45 +601,62 @@ def test_videos_liked_table():
     }
     result = script.extract_videos_liked(data)
     assert "tiktok_videos_liked" == result.id
-    assert "Comment Activity" == result.title.translations["en"]
+    assert "Videos liked" == result.title.translations["en"]
 
     reference = """
-                    Liked                                               Link
-    0 2023-03-26 15:39:28  https://www.tiktokv.com/share/video/7199666315...
-    1 2023-03-18 12:53:14  https://www.tiktokv.com/share/video/7209355519...
-    2 2023-03-18 12:53:11  https://www.tiktokv.com/share/video/7209700824...
-    3 2023-03-11 15:06:37  https://www.tiktokv.com/share/video/7191669641...
+                  Liked                                               Link
+    0  2023-03-26 15:39  https://www.tiktokv.com/share/video/7199666315...
+    1  2023-03-18 12:53  https://www.tiktokv.com/share/video/7209355519...
+    2  2023-03-18 12:53  https://www.tiktokv.com/share/video/7209700824...
+    3  2023-03-11 15:06  https://www.tiktokv.com/share/video/7191669641...
     """
+    print(result.data_frame)
     assert_frame_str_equal(reference, result.data_frame)
 
 
 def test_timezone_to_uk():
     assert False
 
 
-def test_get_json_data_with_invalid_json():
+def test_get_json_data_from_zip_with_invalid_json():
     f = make_zip({"test.json": "testing"})
-    assert [] == script.get_json_data(f)
+    assert [] == script.get_json_data_from_zip(f)
 
 
-def test_get_json_data_with_non_tiktok_json():
+def test_get_json_data_from_zip_with_non_tiktok_json():
     f = make_zip({"test.json": "{}"})
-    assert [] == script.get_json_data(f)
+    assert [] == script.get_json_data_from_zip(f)
 
 
-def test_get_json_data_with_valid_tiktok_json():
+def test_get_json_data_from_zip_with_valid_tiktok_json():
     tiktok_data = {
         "Profile": {"Profile Information": {"ProfileMap": {"userName": "test"}}}
     }
     f = make_zip({"test.json": json.dumps(tiktok_data)})
-    assert [tiktok_data] == script.get_json_data(f)
+    assert [tiktok_data] == script.get_json_data_from_zip(f)
+
+
+def test_extract_tiktok_data_works_with_zip_files():
+    with tempfile.NamedTemporaryFile() as f:
+        make_zip({"test.json": json.dumps(complete_contents)}, f)
+        f.flush()
+        result = script.extract_tiktok_data(f.name)
+        assert len(result) > 1
+
+def test_extract_tiktok_data_works_with_json_files():
+    with tempfile.NamedTemporaryFile(mode="w+t") as f:
+        json.dump(complete_contents, f)
+        f.flush()
+        result = script.extract_tiktok_data(f.name)
+        assert len(result) > 1
 
 
-def make_zip(contents):
-    f = io.BytesIO()
-    z = zipfile.ZipFile(f, mode="w")
+def make_zip(contents, out=None):
+    if out is None:
+        out = io.BytesIO()
+    z = zipfile.ZipFile(out, mode="w")
     for filename, data in contents.items():
         z.writestr(filename, data)
     z.close()
-    f.seek(0)
-    return f
+    out.seek(0)
+    return out