From b5eaa0727a9c754ed37dd17b53a7dc1610b8b8f8 Mon Sep 17 00:00:00 2001 From: David McKee Date: Mon, 4 Dec 2023 15:41:26 +0000 Subject: [PATCH 1/3] Filter SQS on trigger_enrichment presence --- CHANGELOG.md | 2 ++ terraform/modules/lambda_s3/queue.tf | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3809e36ac..8c2f608a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +Unreleased: Update Terraform to filter only trigger_enrichment messages + 1: Ensure "No Year" does not appear in tags to avoid validation errors. 0.1.0: Before December 2023, versions were not tracked and the version string was "0.1.0" diff --git a/terraform/modules/lambda_s3/queue.tf b/terraform/modules/lambda_s3/queue.tf index d3ab5b64c..084fe897c 100644 --- a/terraform/modules/lambda_s3/queue.tf +++ b/terraform/modules/lambda_s3/queue.tf @@ -463,15 +463,19 @@ resource "aws_lambda_event_source_mapping" "sqs_replacements_fetch_xml_event_sou } resource "aws_sns_topic_subscription" "fetch_xml_queue_subscription" { - count = var.environment != "production" ? 1 : 0 - topic_arn = "arn:aws:sns:eu-west-2:626206937213:caselaw-stg-judgment-updated" - protocol = "sqs" - endpoint = aws_sqs_queue.fetch_xml_queue.arn + count = var.environment != "production" ? 1 : 0 + topic_arn = "arn:aws:sns:eu-west-2:626206937213:caselaw-stg-judgment-updated" + protocol = "sqs" + endpoint = aws_sqs_queue.fetch_xml_queue.arn + filter_policy_scope = "MessageAttributes" + filter_policy = "{\"trigger_enrichment\": [{\"exists\": true}]}" } resource "aws_sns_topic_subscription" "fetch_xml_queue_subscription_prod" { - count = var.environment == "production" ? 1 : 0 - topic_arn = "arn:aws:sns:eu-west-2:276505630421:caselaw-judgment-updated" - protocol = "sqs" - endpoint = aws_sqs_queue.fetch_xml_queue.arn + count = var.environment == "production" ? 1 : 0 + topic_arn = "arn:aws:sns:eu-west-2:276505630421:caselaw-judgment-updated" + protocol = "sqs" + endpoint = aws_sqs_queue.fetch_xml_queue.arn + filter_policy_scope = "MessageAttributes" + filter_policy = "{\"trigger_enrichment\": [{\"exists\": true}]}" } From 0bc18e91b114f6e8551a74607487657d635235d9 Mon Sep 17 00:00:00 2001 From: David McKee Date: Mon, 4 Dec 2023 16:00:02 +0000 Subject: [PATCH 2/3] v2: update CHANGELOG --- CHANGELOG.md | 10 +++++++--- src/lambdas/determine_legislation_provisions/index.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c2f608a2..04926f9d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ -Unreleased: Update Terraform to filter only trigger_enrichment messages +== v2 == +Update Terraform to filter only trigger_enrichment messages. +Enrichment lambdas do not attempt to filter based on body JSON. -1: Ensure "No Year" does not appear in tags to avoid validation errors. +== v1 == +Ensure "No Year" does not appear in tags to avoid validation errors. -0.1.0: Before December 2023, versions were not tracked and the version string was "0.1.0" +== v0.1.0 == +Before December 2023, versions were not tracked and the version string was "0.1.0" diff --git a/src/lambdas/determine_legislation_provisions/index.py b/src/lambdas/determine_legislation_provisions/index.py index bac5cff2c..41528935c 100644 --- a/src/lambdas/determine_legislation_provisions/index.py +++ b/src/lambdas/determine_legislation_provisions/index.py @@ -43,7 +43,7 @@ def add_timestamp_and_engine_version(file_data): "uk:tna-enrichment-engine", attrs={"xmlns:uk": "https://caselaw.nationalarchives.gov.uk/akn"}, ) - enrichment_version.string = "1" + enrichment_version.string = "2" soup.proprietary.append(enrichment_version) soup.FRBRManifestation.FRBRdate.insert_after(enriched_date) From 94bfdb9011a8782cff70d26dccdf353fe32468cd Mon Sep 17 00:00:00 2001 From: David McKee Date: Mon, 4 Dec 2023 16:05:30 +0000 Subject: [PATCH 3/3] Remove status=published check in lambda --- src/lambdas/fetch_xml/index.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/lambdas/fetch_xml/index.py b/src/lambdas/fetch_xml/index.py index 79c9ee412..4eff3f9df 100644 --- a/src/lambdas/fetch_xml/index.py +++ b/src/lambdas/fetch_xml/index.py @@ -90,25 +90,21 @@ def process_event(sqs_rec, api_endpoint): and upload to destination S3 bucket """ message = json.loads(sqs_rec["body"]) - status, query = read_message(message) + status, query = read_message( + message + ) # query is the URL of the item requested to be enriched print("Judgment status:", status) print("Judgment query:", query) - if status == "published": - print("Judgment:", query) - source_key = query.replace("/", "-") - print("Source key:", source_key) - - # fetch the xml content - xml_content = fetch_judgment_urllib( - api_endpoint, query, API_USERNAME, API_PASSWORD - ) - # print(xml_content) - upload_contents(source_key, xml_content) - lock_judgment_urllib(api_endpoint, query, API_USERNAME, API_PASSWORD) - check_lock_judgment_urllib(api_endpoint, query, API_USERNAME, API_PASSWORD) - else: - print("Judgment not published.") + source_key = query.replace("/", "-") + print("Source key:", source_key) + + # fetch the xml content + xml_content = fetch_judgment_urllib(api_endpoint, query, API_USERNAME, API_PASSWORD) + # print(xml_content) + upload_contents(source_key, xml_content) + lock_judgment_urllib(api_endpoint, query, API_USERNAME, API_PASSWORD) + check_lock_judgment_urllib(api_endpoint, query, API_USERNAME, API_PASSWORD) ############################################