Skip to content

Commit

Permalink
Nytt utkast til testing
Browse files Browse the repository at this point in the history
  • Loading branch information
MagnusEngh committed Jan 6, 2025
1 parent 1960dbe commit 49c3b07
Showing 1 changed file with 30 additions and 16 deletions.
46 changes: 30 additions & 16 deletions src/altinn/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Any
from zoneinfo import ZoneInfo

import numpy as np
import pandas as pd
import xmltodict
from dapla import FileClient
Expand Down Expand Up @@ -510,27 +511,30 @@ def isee_transform(
error_message = f"File is not a valid XML-file: {file_path}"
raise ValueError(error_message)


def transform_table_in_table(
table_data: pd.DataFrame,
rest_of_data: pd.DataFrame,
table_fields: list[str],
row_fields: list[str],
counter_starting_value: int = 1,
):
"""Function for transforming a "table in table" part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
) -> pd.DataFrame:
r"""Function for transforming a 'table in table' part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
Args:
table_data (pd.DataFrame): Part of the form representing a specific "table in table" component.
rest_of_data (pd.DataFrame): Part of the data not involved in the specific "table in table" component.
table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the "table in table".
row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the "table in table".
table_data (pd.DataFrame): Part of the form representing a specific 'table in table' component.
rest_of_data (pd.DataFrame): Part of the data not involved in the specific 'table in table' component.
table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the 'table in table'.
row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the 'table in table'.
counter_starting_value (int): Sets the lopenr to this value for the first part of the table object. If there are multiple table objects that belong in the same dynamic list, adjust this value to avoid duplicate FELTNAVN.
Notes:
A simple way to split data is using .loc to filter out relevant rows to a new dataframe, and then using the index to remove them from the primary dataset. See example below.
table_data = df.loc[df[FELTNAVN].str.startswith("tabell_prefix")]
rest_of_data = df.drop(table_data.index, axis=0)
assert df.shape[0] == table_data.shape[0]+rest_of_data.shape[0]
If you have several 'table in table' components to unpack, but want them in the same dynamic list, you need to manage the counter_starting_value.
A way to do this is using this: max(output.loc[output["FELTNAVN"].str.match(r"your_varname_here_\d{3}")]["FELTNAVN"].str[-3:].astype(int))+1
"""
table_data = table_data[["FELTNAVN", "FELTVERDI"]]
pattern = "|".join(table_fields)
Expand Down Expand Up @@ -570,19 +574,24 @@ def transform_table_in_table(
verdi = df_meta.loc[
(df_meta["FELTNAVN"].str.endswith(lopenr))
& (df_meta["FELTNAVN"].str.contains(felt))
]["FELTVERDI"].item()
test[felt] = verdi
]
try:
verdi = verdi["FELTVERDI"].item()
test[felt] = verdi
except ValueError as e:
logger.debug(e)

current_count = (
max(flattened_table_data["FELTNAVN"].str[-3:].astype(int)) + 1
if "FELTNAVN" in flattened_table_data.columns
else counter_starting_value
) # Finds the current number of rows in the dynamic list so the counting can continue.
logger.debug(
f"current_count: {current_count} - counter_starting_value: {counter_starting_value}"
)
dynamic_list_rows = (
test.reset_index()
.assign(
lopenr=lambda x: range(current_count, current_count + len(test))
) # This needs to be changed to count upwards for each row that exists, so they are not overwritten/become duplicates.
.assign(lopenr=_assign_lopenr(test, current_count))
.drop(columns="index")
.melt(id_vars="lopenr", value_name="FELTVERDI")
.assign(
Expand All @@ -595,11 +604,11 @@ def transform_table_in_table(
flattened_table_data = pd.concat([flattened_table_data, dynamic_list_rows])

# Re-adding form metadata to flattened_table_data
skjema_id = list(rest_of_data["SKJEMA_ID"].unique())[0]
delreg_nr = list(rest_of_data["DELREG_NR"].unique())[0]
ident_nr = list(rest_of_data["IDENT_NR"].unique())[0]
enhets_type = list(rest_of_data["ENHETS_TYPE"].unique())[0]
version_nr = list(rest_of_data["VERSION_NR"].unique())[0]
skjema_id = rest_of_data["SKJEMA_ID"].unique()[0]
delreg_nr = rest_of_data["DELREG_NR"].unique()[0]
ident_nr = rest_of_data["IDENT_NR"].unique()[0]
enhets_type = rest_of_data["ENHETS_TYPE"].unique()[0]
version_nr = rest_of_data["VERSION_NR"].unique()[0]
flattened_table_data = (
flattened_table_data.assign(SKJEMA_ID=skjema_id)
.assign(DELREG_NR=delreg_nr)
Expand All @@ -610,6 +619,11 @@ def transform_table_in_table(

return pd.concat([rest_of_data, flattened_table_data]).reset_index(drop=True)


def _assign_lopenr(df: pd.DataFrame, start: int):
return range(start, start + len(df))


def xml_transform(file_path: str) -> pd.DataFrame:
"""Transforms a XML to a pd.Dataframe using xmltodict.
Expand Down

0 comments on commit 49c3b07

Please sign in to comment.