Skip to content

Commit

Permalink
Merge branch 'develop' into RDRP-646
Browse files Browse the repository at this point in the history
  • Loading branch information
JenCheshire authored Jan 15, 2024
2 parents f89e860 + 63cdbc3 commit 61048d0
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 2,105 deletions.
File renamed without changes.
280 changes: 0 additions & 280 deletions config/output_schemas/status_filtered_qa_schema.toml
Original file line number Diff line number Diff line change
Expand Up @@ -646,286 +646,6 @@ Deduced_Data_Type = "object"
old_name = "imp_class"
Deduced_Data_Type = "object"

[202_prev]
old_name = "202_prev"
Deduced_Data_Type = "float64"

[203_prev]
old_name = "203_prev"
Deduced_Data_Type = "float64"

[204_prev]
old_name = "204_prev"
Deduced_Data_Type = "float64"

[205_prev]
old_name = "205_prev"
Deduced_Data_Type = "float64"

[206_prev]
old_name = "206_prev"
Deduced_Data_Type = "float64"

[207_prev]
old_name = "207_prev"
Deduced_Data_Type = "float64"

[209_prev]
old_name = "209_prev"
Deduced_Data_Type = "float64"

[210_prev]
old_name = "210_prev"
Deduced_Data_Type = "float64"

[211_prev]
old_name = "211_prev"
Deduced_Data_Type = "float64"

[212_prev]
old_name = "212_prev"
Deduced_Data_Type = "float64"

[214_prev]
old_name = "214_prev"
Deduced_Data_Type = "float64"

[216_prev]
old_name = "216_prev"
Deduced_Data_Type = "float64"

[218_prev]
old_name = "218_prev"
Deduced_Data_Type = "float64"

[219_prev]
old_name = "219_prev"
Deduced_Data_Type = "float64"

[220_prev]
old_name = "220_prev"
Deduced_Data_Type = "float64"

[221_prev]
old_name = "221_prev"
Deduced_Data_Type = "float64"

[222_prev]
old_name = "222_prev"
Deduced_Data_Type = "float64"

[223_prev]
old_name = "223_prev"
Deduced_Data_Type = "float64"

[225_prev]
old_name = "225_prev"
Deduced_Data_Type = "float64"

[226_prev]
old_name = "226_prev"
Deduced_Data_Type = "float64"

[227_prev]
old_name = "227_prev"
Deduced_Data_Type = "float64"

[228_prev]
old_name = "228_prev"
Deduced_Data_Type = "float64"

[229_prev]
old_name = "229_prev"
Deduced_Data_Type = "float64"

[237_prev]
old_name = "237_prev"
Deduced_Data_Type = "float64"

[242_prev]
old_name = "242_prev"
Deduced_Data_Type = "float64"

[243_prev]
old_name = "243_prev"
Deduced_Data_Type = "float64"

[244_prev]
old_name = "244_prev"
Deduced_Data_Type = "float64"

[245_prev]
old_name = "245_prev"
Deduced_Data_Type = "float64"

[246_prev]
old_name = "246_prev"
Deduced_Data_Type = "float64"

[247_prev]
old_name = "247_prev"
Deduced_Data_Type = "float64"

[248_prev]
old_name = "248_prev"
Deduced_Data_Type = "float64"

[249_prev]
old_name = "249_prev"
Deduced_Data_Type = "float64"

[250_prev]
old_name = "250_prev"
Deduced_Data_Type = "float64"

[302_prev]
old_name = "302_prev"
Deduced_Data_Type = "float64"

[303_prev]
old_name = "303_prev"
Deduced_Data_Type = "float64"

[304_prev]
old_name = "304_prev"
Deduced_Data_Type = "float64"

[305_prev]
old_name = "305_prev"
Deduced_Data_Type = "float64"

[emp_researcher_prev]
old_name = "emp_researcher_prev"
Deduced_Data_Type = "float64"

[emp_technician_prev]
old_name = "emp_technician_prev"
Deduced_Data_Type = "float64"

[emp_other_prev]
old_name = "emp_other_prev"
Deduced_Data_Type = "float64"

[emp_total_prev]
old_name = "emp_total_prev"
Deduced_Data_Type = "float64"

[headcount_res_m_prev]
old_name = "headcount_res_m_prev"
Deduced_Data_Type = "float64"

[headcount_res_f_prev]
old_name = "headcount_res_f_prev"
Deduced_Data_Type = "float64"

[headcount_tec_m_prev]
old_name = "headcount_tec_m_prev"
Deduced_Data_Type = "float64"

[headcount_tec_f_prev]
old_name = "headcount_tec_f_prev"
Deduced_Data_Type = "float64"

[headcount_oth_m_prev]
old_name = "headcount_oth_m_prev"
Deduced_Data_Type = "float64"

[headcount_oth_f_prev]
old_name = "headcount_oth_f_prev"
Deduced_Data_Type = "float64"

[headcount_tot_m_prev]
old_name = "headcount_tot_m_prev"
Deduced_Data_Type = "float64"

[headcount_tot_f_prev]
old_name = "headcount_tot_f_prev"
Deduced_Data_Type = "float64"

[headcount_total_prev]
old_name = "headcount_total_prev"
Deduced_Data_Type = "float64"

[211_link]
old_name = "211_link"
Deduced_Data_Type = "float64"

[305_link]
old_name = "305_link"
Deduced_Data_Type = "float64"

[emp_researcher_link]
old_name = "emp_researcher_link"
Deduced_Data_Type = "float64"

[emp_technician_link]
old_name = "emp_technician_link"
Deduced_Data_Type = "float64"

[emp_other_link]
old_name = "emp_other_link"
Deduced_Data_Type = "float64"

[headcount_res_m_link]
old_name = "headcount_res_m_link"
Deduced_Data_Type = "float64"

[headcount_res_f_link]
old_name = "headcount_res_f_link"
Deduced_Data_Type = "float64"

[headcount_tec_m_link]
old_name = "headcount_tec_m_link"
Deduced_Data_Type = "float64"

[headcount_tec_f_link]
old_name = "headcount_tec_f_link"
Deduced_Data_Type = "float64"

[headcount_oth_m_link]
old_name = "headcount_oth_m_link"
Deduced_Data_Type = "float64"

[headcount_oth_f_link]
old_name = "headcount_oth_f_link"
Deduced_Data_Type = "float64"

[200_original]
old_name = "200_original"
Deduced_Data_Type = "float64"

[pg_sic_class]
old_name = "pg_sic_class"
Deduced_Data_Type = "object"

[empty_pgsic_group]
old_name = "empty_pgsic_group"
Deduced_Data_Type = "object"

[empty_pg_group]
old_name = "empty_pg_group"
Deduced_Data_Type = "object"

[200_imp_marker]
old_name = "200_imp_marker"
Deduced_Data_Type = "object"

[211_trim]
old_name = "211_trim"
Deduced_Data_Type = "object"

[305_trim]
old_name = "305_trim"
Deduced_Data_Type = "object"

[manual_trim]
old_name = "manual_trim"
Deduced_Data_Type = "object"

[sf_expansion_grouping]
old_name = "sf_expansion_grouping"
Deduced_Data_Type = "object"

[auto_outlier]
old_name = "auto_outlier"
Deduced_Data_Type = "bool"
Expand Down
18 changes: 18 additions & 0 deletions src/construction/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Callable

from src.staging.validation import validate_data_with_schema
from src.staging.staging_helpers import postcode_topup
from src.outputs.outputs_helpers import create_period_year

construction_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -90,6 +91,10 @@ def run_construction(
updated_snapshot_df = create_period_year(updated_snapshot_df)
construction_df = create_period_year(construction_df)

# Set instance=1 so longforms with status 'Form sent out' match correctly
form_sent_condition = (updated_snapshot_df.formtype == "0001") & (updated_snapshot_df.status == "Form sent out")
updated_snapshot_df.loc[form_sent_condition, "instance"] = 1

# NI data has no instance but needs an instance of 1
if is_northern_ireland:
construction_df["instance"] = 1
Expand Down Expand Up @@ -118,6 +123,19 @@ def run_construction(
{"reference": "Int64", "instance": "Int64", "period_year": "Int64"}
)

# Long form records with a postcode in 601 use this as the postcode
long_form_cond = (~updated_snapshot_df["601"].isnull())
updated_snapshot_df.loc[long_form_cond, "postcodes_harmonised"] = updated_snapshot_df["601"]

# Short form records with nothing in 601 use referencepostcode instead
short_form_cond = (updated_snapshot_df["601"].isnull()) & (~updated_snapshot_df["referencepostcode"].isnull())
updated_snapshot_df.loc[short_form_cond, "postcodes_harmonised"] = updated_snapshot_df["referencepostcode"]

# Top up all new postcodes so they're all eight characters exactly
postcode_cols = ["601", "referencepostcode", "postcodes_harmonised"]
for col in postcode_cols:
updated_snapshot_df[col] = updated_snapshot_df[col].apply(postcode_topup)

updated_snapshot_df = updated_snapshot_df.sort_values(
["reference", "instance"], ascending=[True, True]
).reset_index(drop=True)
Expand Down
11 changes: 5 additions & 6 deletions src/developer_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ global:
output_auto_outliers: False
output_outlier_qa : False
output_estimation_qa: False
output_imputation_qa: False
output_apportionment_qa: False
output_long_form: False
output_short_form: False
Expand All @@ -37,7 +36,7 @@ global:
output_intram_by_itl1: False
output_intram_by_civil_defence: False
output_intram_by_sic: False
output_status_filtered: False
output_status_filtered: True
output_fte_total_qa: False
years:
current_year: 2022 # TODO: put this in the userconfig
Expand Down Expand Up @@ -68,8 +67,8 @@ hdfs_paths:
manual_imp_trim_path: "/ons/rdbe_dev/imputation/manual_trimming"
outliers_path: "/ons/rdbe_dev/outliers"
estimation_path: "/ons/rdbe_dev/estimation"
short_form_schema: "src/outputs/output_schemas/frozen_shortform_schema.toml"
long_form_schema: "src/outputs/output_schemas/frozen_longform_schema.toml"
short_form_schema: "src/outputs/output_schemas/short_form_schema.toml"
long_form_schema: "src/outputs/output_schemas/long_form_schema.toml"
export_path: /ons/rdbe_dev/outgoing_export
feather_path: "/ons/rdbe_dev/staging/feather"
network_paths:
Expand Down Expand Up @@ -113,8 +112,8 @@ network_paths:
civil_defence_detailed_mapper_path: "R:/BERD Results System Development 2023/DAP_emulation/mappers/civil_defence_detailed.csv"
sic_division_detailed_mapper_path: "R:/BERD Results System Development 2023/DAP_emulation/mappers/sic_div_detailed.csv"
schema_paths:
frozen_shortform_schema: "config/output_schemas/frozen_shortform_schema.toml"
frozen_longform_schema: "config/output_schemas/frozen_longform_schema.toml"
short_form_schema: "config/output_schemas/short_form_schema.toml"
long_form_schema: "config/output_schemas/long_form_schema.toml"
tau_schema: "config/output_schemas/tau_schema.toml"
gb_sas_schema: "config/output_schemas/gb_sas_schema.toml"
ni_sas_schema: "config/output_schemas/ni_sas_schema.toml"
Expand Down
Loading

0 comments on commit 61048d0

Please sign in to comment.