Nytt utkast til testing

statisticsnorway · Jan 6, 2025 · 49c3b07 · 49c3b07
1 parent 1960dbe
commit 49c3b07
Showing 1 changed file with 30 additions and 16 deletions.
diff --git a/src/altinn/flatten.py b/src/altinn/flatten.py
@@ -17,6 +17,7 @@
 from typing import Any
 from zoneinfo import ZoneInfo
 
+import numpy as np
 import pandas as pd
 import xmltodict
 from dapla import FileClient
@@ -510,27 +511,30 @@ def isee_transform(
         error_message = f"File is not a valid XML-file: {file_path}"
         raise ValueError(error_message)
 
+
 def transform_table_in_table(
     table_data: pd.DataFrame,
     rest_of_data: pd.DataFrame,
     table_fields: list[str],
     row_fields: list[str],
     counter_starting_value: int = 1,
-):
-    """Function for transforming a "table in table" part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
+) -> pd.DataFrame:
+    r"""Function for transforming a 'table in table' part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
 
     Args:
-        table_data (pd.DataFrame): Part of the form representing a specific "table in table" component.
-        rest_of_data (pd.DataFrame): Part of the data not involved in the specific "table in table" component.
-        table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the "table in table".
-        row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the "table in table".
+        table_data (pd.DataFrame): Part of the form representing a specific 'table in table' component.
+        rest_of_data (pd.DataFrame): Part of the data not involved in the specific 'table in table' component.
+        table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the 'table in table'.
+        row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the 'table in table'.
         counter_starting_value (int): Sets the lopenr to this value for the first part of the table object. If there are multiple table objects that belong in the same dynamic list, adjust this value to avoid duplicate FELTNAVN.
 
     Notes:
         A simple way to split data is using .loc to filter out relevant rows to a new dataframe, and then using the index to remove them from the primary dataset. See example below.
             table_data = df.loc[df[FELTNAVN].str.startswith("tabell_prefix")]
             rest_of_data = df.drop(table_data.index, axis=0)
             assert df.shape[0] == table_data.shape[0]+rest_of_data.shape[0]
+        If you have several 'table in table' components to unpack, but want them in the same dynamic list, you need to manage the counter_starting_value.
+            A way to do this is using this: max(output.loc[output["FELTNAVN"].str.match(r"your_varname_here_\d{3}")]["FELTNAVN"].str[-3:].astype(int))+1
     """
     table_data = table_data[["FELTNAVN", "FELTVERDI"]]
     pattern = "|".join(table_fields)
@@ -570,19 +574,24 @@ def transform_table_in_table(
             verdi = df_meta.loc[
                 (df_meta["FELTNAVN"].str.endswith(lopenr))
                 & (df_meta["FELTNAVN"].str.contains(felt))
-            ]["FELTVERDI"].item()
-            test[felt] = verdi
+            ]
+            try:
+                verdi = verdi["FELTVERDI"].item()
+                test[felt] = verdi
+            except ValueError as e:
+                logger.debug(e)
 
         current_count = (
             max(flattened_table_data["FELTNAVN"].str[-3:].astype(int)) + 1
             if "FELTNAVN" in flattened_table_data.columns
             else counter_starting_value
         )  # Finds the current number of rows in the dynamic list so the counting can continue.
+        logger.debug(
+            f"current_count: {current_count} - counter_starting_value: {counter_starting_value}"
+        )
         dynamic_list_rows = (
             test.reset_index()
-            .assign(
-                lopenr=lambda x: range(current_count, current_count + len(test))
-            )  # This needs to be changed to count upwards for each row that exists, so they are not overwritten/become duplicates.
+            .assign(lopenr=_assign_lopenr(test, current_count))
             .drop(columns="index")
             .melt(id_vars="lopenr", value_name="FELTVERDI")
             .assign(
@@ -595,11 +604,11 @@ def transform_table_in_table(
         flattened_table_data = pd.concat([flattened_table_data, dynamic_list_rows])
 
     # Re-adding form metadata to flattened_table_data
-    skjema_id = list(rest_of_data["SKJEMA_ID"].unique())[0]
-    delreg_nr = list(rest_of_data["DELREG_NR"].unique())[0]
-    ident_nr = list(rest_of_data["IDENT_NR"].unique())[0]
-    enhets_type = list(rest_of_data["ENHETS_TYPE"].unique())[0]
-    version_nr = list(rest_of_data["VERSION_NR"].unique())[0]
+    skjema_id = rest_of_data["SKJEMA_ID"].unique()[0]
+    delreg_nr = rest_of_data["DELREG_NR"].unique()[0]
+    ident_nr = rest_of_data["IDENT_NR"].unique()[0]
+    enhets_type = rest_of_data["ENHETS_TYPE"].unique()[0]
+    version_nr = rest_of_data["VERSION_NR"].unique()[0]
     flattened_table_data = (
         flattened_table_data.assign(SKJEMA_ID=skjema_id)
         .assign(DELREG_NR=delreg_nr)
@@ -610,6 +619,11 @@ def transform_table_in_table(
 
     return pd.concat([rest_of_data, flattened_table_data]).reset_index(drop=True)
 
+
+def _assign_lopenr(df: pd.DataFrame, start: int):
+    return range(start, start + len(df))
+
+
 def xml_transform(file_path: str) -> pd.DataFrame:
     """Transforms a XML to a pd.Dataframe using xmltodict.