From 2b9c005ea51cd6147bd87c2bcdfe47ccd17632d2 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 3 Oct 2024 14:37:48 -0700 Subject: [PATCH] use raw key prefix to construct raw key --- src/lambda_function/raw_sync/app.py | 10 ++++++---- tests/test_lambda_raw_sync.py | 27 ++++++++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/lambda_function/raw_sync/app.py b/src/lambda_function/raw_sync/app.py index a8679e0..4589a4d 100644 --- a/src/lambda_function/raw_sync/app.py +++ b/src/lambda_function/raw_sync/app.py @@ -483,14 +483,16 @@ def get_data_type_from_path(path: str) -> str: return data_type -def get_expected_raw_key(namespace: str, data_type: str, cohort: str, path: str) -> str: +def get_expected_raw_key( + raw_key_prefix: str, data_type: str, cohort: str, path: str +) -> str: """Get the expected raw S3 key Get the expected raw S3 key of a raw bucket object corresponding to the given input bucket object. Args: - namespace (str): The namespace of the corresponding input object. + raw_key_prefix (str): The namespaced S3 prefix where raw objects are written. data_type (str): The data type of the corresponding input object. cohort (str): The cohort of the corresponding input object. path (str): The path of the file relative to the zip archive (export). @@ -500,7 +502,7 @@ def get_expected_raw_key(namespace: str, data_type: str, cohort: str, path: str) """ file_identifier = os.path.basename(path).split(".")[0] expected_key = ( - f"{namespace}/json/dataset={data_type}" + f"{raw_key_prefix}/dataset={data_type}" f"/cohort={cohort}/{file_identifier}.ndjson.gz" ) return expected_key @@ -543,7 +545,7 @@ def main( ) data_type = get_data_type_from_path(path=filename) expected_raw_key = get_expected_raw_key( - namespace=namespace, + raw_key_prefix=raw_key_prefix, data_type=data_type, cohort=cohort, path=filename, diff --git a/tests/test_lambda_raw_sync.py b/tests/test_lambda_raw_sync.py index 960e62e..4c30d5b 100644 --- a/tests/test_lambda_raw_sync.py +++ b/tests/test_lambda_raw_sync.py @@ -639,27 +639,36 @@ def test_get_data_type_from_path_deleted(): def test_get_expected_raw_key_case1(): - namespace = "test-namespace" + raw_key_prefix = "test-raw_key_prefix/json" data_type = "test-data-type" cohort = "test-cohort" path = "path/to/FitbitIntradayCombined_20241111-20241112.json" - expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz" - assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key + expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz" + assert ( + app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) + == expected_key + ) def test_get_expected_raw_key_case2(): - namespace = "test-namespace" + raw_key_prefix = "test-raw_key_prefix/json" data_type = "test-data-type" cohort = "test-cohort" path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json" - expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz" - assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key + expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz" + assert ( + app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) + == expected_key + ) def test_get_expected_raw_key_case3(): - namespace = "test-namespace" + raw_key_prefix = "test-raw_key_prefix/json" data_type = "test-data-type" cohort = "test-cohort" path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json" - expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz" - assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key + expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz" + assert ( + app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path) + == expected_key + )