diff --git a/src/construction/construction.py b/src/construction/construction.py index 484ee4e51..b5f043cde 100644 --- a/src/construction/construction.py +++ b/src/construction/construction.py @@ -4,6 +4,7 @@ from typing import Callable from src.staging.validation import validate_data_with_schema +from src.staging.staging_helpers import postcode_topup from src.outputs.outputs_helpers import create_period_year construction_logger = logging.getLogger(__name__) @@ -87,6 +88,10 @@ def run_construction( updated_snapshot_df = create_period_year(updated_snapshot_df) construction_df = create_period_year(construction_df) + # Set instance=1 so longforms with status 'Form sent out' match correctly + form_sent_condition = (updated_snapshot_df.formtype == "0001") & (updated_snapshot_df.status == "Form sent out") + updated_snapshot_df.loc[form_sent_condition, "instance"] = 1 + # NI data has no instance but needs an instance of 1 if is_northern_ireland: construction_df["instance"] = 1 @@ -115,6 +120,19 @@ def run_construction( {"reference": "Int64", "instance": "Int64", "period_year": "Int64"} ) + # Long form records with a postcode in 601 use this as the postcode + long_form_cond = (~updated_snapshot_df["601"].isnull()) + updated_snapshot_df.loc[long_form_cond, "postcodes_harmonised"] = updated_snapshot_df["601"] + + # Short form records with nothing in 601 use referencepostcode instead + short_form_cond = (updated_snapshot_df["601"].isnull()) & (~updated_snapshot_df["referencepostcode"].isnull()) + updated_snapshot_df.loc[short_form_cond, "postcodes_harmonised"] = updated_snapshot_df["referencepostcode"] + + # Top up all new postcodes so they're all eight characters exactly + postcode_cols = ["601", "referencepostcode", "postcodes_harmonised"] + for col in postcode_cols: + updated_snapshot_df[col] = updated_snapshot_df[col].apply(postcode_topup) + construction_logger.info(f"Construction edited {construction_df.shape[0]} rows.") return updated_snapshot_df diff --git a/src/developer_config.yaml b/src/developer_config.yaml index a771aade6..20ed94430 100644 --- a/src/developer_config.yaml +++ b/src/developer_config.yaml @@ -9,7 +9,7 @@ global: # Staging and validation settings postcode_csv_check: False load_updated_snapshot: False # Whether to load the updated snapshots for amendments and additions - load_ni_data: True + load_ni_data: False load_historic_data: False run_construction: False run_ni_construction: True @@ -37,7 +37,7 @@ global: output_intram_by_itl1: False output_intram_by_civil_defence: False output_intram_by_sic: False - output_status_filtered: False + output_status_filtered: True output_fte_total_qa: False years: current_year: 2022 # TODO: put this in the userconfig diff --git a/src/staging/staging_helpers.py b/src/staging/staging_helpers.py index 0cb87b58e..4d53efa52 100644 --- a/src/staging/staging_helpers.py +++ b/src/staging/staging_helpers.py @@ -33,8 +33,7 @@ def postcode_topup(mystr: str, target_len: int = 8) -> str: spaces and cuts the tail on the right. If there is only one part, keeps the first 8 characters and tops it up with spaces on the right if needed. - Empty input string would have zero parts and will return a string of - eight spaces. + Empty input string would have zero parts and will return an empty string. Args: mystr (str): Input postcode. @@ -69,7 +68,7 @@ def postcode_topup(mystr: str, target_len: int = 8) -> str: return (part1 + part2)[:target_len] else: - return mystr[:target_len].ljust(target_len, " ") + return "" def fix_anon_data(responses_df, config):