From 51e3ff5853dc32ec597405743419f6f5a21558a6 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 12 Nov 2024 15:23:44 +0100 Subject: [PATCH] new script for standardizing tables --- .../workflow/panel/standardize_panel.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/depiction_targeted_preproc/workflow/panel/standardize_panel.py diff --git a/src/depiction_targeted_preproc/workflow/panel/standardize_panel.py b/src/depiction_targeted_preproc/workflow/panel/standardize_panel.py new file mode 100644 index 0000000..7398b0e --- /dev/null +++ b/src/depiction_targeted_preproc/workflow/panel/standardize_panel.py @@ -0,0 +1,31 @@ +import cyclopts +import polars as pl +from pathlib import Path +from rich.pretty import pprint + +from depiction_targeted_preproc.panel.schema import PanelMainSchema +from depiction_targeted_preproc.panel.standardize_input_panel import StandardizeConfig, standardize + +app = cyclopts.App() + + +def _standardize(input_panel_path: Path, config_name: str) -> pl.DataFrame: + input_df = pl.read_csv(input_panel_path) + config = StandardizeConfig.load_packaged(config_name) + return standardize(config=config, raw_df=input_df) + + +def _validate(output_df: pl.DataFrame) -> None: + results = PanelMainSchema.validate(output_df) + pprint(results) + + +@app.default +def standardize_panel(input_panel_path: Path, config_name: str, output_panel_path: Path) -> None: + output_df = _standardize(input_panel_path=input_panel_path, config_name=config_name) + _validate(output_df=output_df) + output_df.write_csv(output_panel_path) + + +if __name__ == "__main__": + app()