Skip to content

Commit

Permalink
docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Nov 12, 2024
1 parent 1886991 commit 2399774
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/depiction_targeted_preproc/panel/standardize_input_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@


class StandardizeConfig(BaseModel):
"""Configuration for the input panel standardization."""

column_names: dict[str, set[str]] = {
"mass": {"m/z", "mass", "pc-mt (m+h)+"},
"label": {"marker", "label"},
Expand All @@ -13,6 +15,11 @@ class StandardizeConfig(BaseModel):


def _identify_column_correspondence(config: StandardizeConfig, raw_df: pl.DataFrame) -> dict[str, str]:
"""Identifies the correspondence between the columns in the raw dataframe and the standardized columns,
returning an entry for each match from raw to standardized column name.
If required columns are missing, raises a ValueError.
"""
identified_columns = {}
for column_name in raw_df.columns:
for key, values in config.column_names.items():
Expand All @@ -31,7 +38,8 @@ def _identify_column_correspondence(config: StandardizeConfig, raw_df: pl.DataFr
return {original: target for target, original in identified_columns.items()}


def standardize(config: StandardizeConfig, raw_df: pl.DataFrame):
def standardize(config: StandardizeConfig, raw_df: pl.DataFrame) -> pl.DataFrame:
"""Standardizes the provided raw dataframe, according to the configuration."""
column_correspondence = _identify_column_correspondence(config=config, raw_df=raw_df)
renamed_df = raw_df.select(column_correspondence.keys()).rename(column_correspondence)
full_df = renamed_df.with_columns(
Expand Down

0 comments on commit 2399774

Please sign in to comment.