From a3e18170967e755222d7473b709772b6636b6458 Mon Sep 17 00:00:00 2001 From: Dragon Dave McKee Date: Fri, 19 Jul 2024 15:19:04 +0100 Subject: [PATCH] Do not upload to public if unpublished --- utilities/spreadsheet_docx/make_script.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/utilities/spreadsheet_docx/make_script.py b/utilities/spreadsheet_docx/make_script.py index f47b6d8..7fa8b1c 100644 --- a/utilities/spreadsheet_docx/make_script.py +++ b/utilities/spreadsheet_docx/make_script.py @@ -20,7 +20,8 @@ DRY_RUN = "--dryrun" # DRY_RUN = False DALMATIAN_INFRASTRUCTURE = "caselaw" -ASSETS_BASE = "https://tna-caselaw-assets.s3.amazonaws.com/" +ASSETS_BASE = "https://tna-caselaw-assets.s3.amazonaws.com" +CASELAW_BASE = "https://caselaw.nationalarchives.gov.uk" SPREADSHEET = "bailii_files.csv" @@ -58,6 +59,11 @@ def has_docx_in_s3(self): response = requests.head(f"{ASSETS_BASE}/{self.target_key()}", timeout=30) return response.status_code == 200 + def is_published(self): + response = requests.head(f"{CASELAW_BASE}/{self.tna_id}", timeout=30) + print(response) + return response.status_code == 200 + def copy_command(self, target_bucket): if target_bucket == UNPUBLISHED_BUCKET: public_bonus = [] @@ -147,6 +153,10 @@ def clean_rows(nice_data): if retcode != 0: raise RuntimeError + if not doc.is_published(): + print(f"Skipping public upload of {doc.target_key()}, not published") + continue + command = doc.copy_command(PUBLISHED_BUCKET) print(command) retcode = subprocess.run(command, check=False).returncode