Skip to content

Commit

Permalink
add function for constructing raw key
Browse files Browse the repository at this point in the history
  • Loading branch information
philerooski committed Oct 2, 2024
1 parent ba3e414 commit e45f1cc
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 7 deletions.
38 changes: 31 additions & 7 deletions src/lambda_function/raw_sync/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def match_corresponding_raw_object(
data_type: str,
cohort: str,
expected_key: str,
raw_keys: list[dict],
raw_keys: defaultdict,
) -> Optional[str]:
"""
Find a matching raw object for a given export file and filename.
Expand Down Expand Up @@ -483,6 +483,29 @@ def get_data_type_from_path(path: str) -> str:
return data_type


def get_expected_raw_key(namespace: str, data_type: str, cohort: str, path: str) -> str:
"""Get the expected raw S3 key
Get the expected raw S3 key of a raw bucket object corresponding to the given
input bucket object.
Args:
namespace (str): The namespace of the corresponding input object.
data_type (str): The data type of the corresponding input object.
cohort (str): The cohort of the corresponding input object.
path (str): The path of the file relative to the zip archive (export).
Returns:
str: The expected S3 key of the corresponding raw object.
"""
file_identifier = os.path.basename(path).split(".")[0]
expected_key = (
f"{namespace}/json/dataset={data_type}"
f"/cohort={cohort}/{file_identifier}.ndjson.gz"
)
return expected_key


def main(
event: dict,
s3_client: boto3.client,
Expand Down Expand Up @@ -519,22 +542,23 @@ def main(
f"from s3://{input_bucket}/{export_key}"
)
data_type = get_data_type_from_path(path=filename)
file_identifier = filename.split(".")[0]
expected_key = (
f"{namespace}/json/dataset={data_type}"
f"/cohort={cohort}/{file_identifier}.ndjson.gz"
expected_raw_key = get_expected_raw_key(
namespace=namespace,
data_type=data_type,
cohort=cohort,
path=filename,
)
corresponding_raw_object = match_corresponding_raw_object(
data_type=data_type,
cohort=cohort,
expected_key=expected_key,
expected_key=expected_raw_key,
raw_keys=raw_keys,
)
if corresponding_raw_object is None:
logger.info(
f"Did not find corresponding raw object for {filename} from "
f"s3://{input_bucket}/{export_key} at "
f"s3://{raw_bucket}/{expected_key}"
f"s3://{raw_bucket}/{expected_raw_key}"
)
publish_to_sns(
bucket=input_bucket,
Expand Down
30 changes: 30 additions & 0 deletions tests/test_lambda_raw_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,33 @@ def test_get_data_type_from_path_deleted():
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
data_type = app.get_data_type_from_path(path=path)
assert data_type == "HealthKitV2Samples_Deleted"


import os


def test_get_expected_raw_key_case1():
namespace = "test-namespace"
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/FitbitIntradayCombined_20241111-20241112.json"
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz"
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key


def test_get_expected_raw_key_case2():
namespace = "test-namespace"
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json"
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz"
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key


def test_get_expected_raw_key_case3():
namespace = "test-namespace"
data_type = "test-data-type"
cohort = "test-cohort"
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz"
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key

0 comments on commit e45f1cc

Please sign in to comment.