Skip to content

Commit

Permalink
Foreløpig arbeid
Browse files Browse the repository at this point in the history
  • Loading branch information
MagnusEngh committed Dec 20, 2024
1 parent 3cd6848 commit 1960dbe
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 20 deletions.
99 changes: 99 additions & 0 deletions src/altinn/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,105 @@ def isee_transform(
error_message = f"File is not a valid XML-file: {file_path}"
raise ValueError(error_message)

def transform_table_in_table(
table_data: pd.DataFrame,
rest_of_data: pd.DataFrame,
table_fields: list[str],
row_fields: list[str],
counter_starting_value: int = 1,
):
"""Function for transforming a "table in table" part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
Args:
table_data (pd.DataFrame): Part of the form representing a specific "table in table" component.
rest_of_data (pd.DataFrame): Part of the data not involved in the specific "table in table" component.
table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the "table in table".
row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the "table in table".
counter_starting_value (int): Sets the lopenr to this value for the first part of the table object. If there are multiple table objects that belong in the same dynamic list, adjust this value to avoid duplicate FELTNAVN.
Notes:
A simple way to split data is using .loc to filter out relevant rows to a new dataframe, and then using the index to remove them from the primary dataset. See example below.
table_data = df.loc[df[FELTNAVN].str.startswith("tabell_prefix")]
rest_of_data = df.drop(table_data.index, axis=0)
assert df.shape[0] == table_data.shape[0]+rest_of_data.shape[0]
"""
table_data = table_data[["FELTNAVN", "FELTVERDI"]]
pattern = "|".join(table_fields)
df_meta = table_data[
table_data["FELTNAVN"].str.contains(pattern, case=False, na=False)
]
pattern = "|".join(row_fields)
df_row = table_data[
table_data["FELTNAVN"].str.contains(pattern, case=False, na=False)
]
relevante_lopenr = list(df_row["FELTNAVN"].str.split("_").str[-1].unique())
df_meta = df_meta.loc[
df_meta["FELTNAVN"].str.split("_").str[-1].isin(relevante_lopenr)
]
flattened_table_data: pd.DataFrame = (
pd.DataFrame()
) # Dataframe to be filled with flattened data.
for lopenr in relevante_lopenr:
row_data: dict = {}
for felt in row_fields:
row_data[felt] = tuple(
df_row.loc[
(df_row["FELTNAVN"].str.endswith(lopenr))
& (df_row["FELTNAVN"].str.contains(felt))
]["FELTVERDI"].values
)
max_length = max(len(values) for values in row_data.values())
test = pd.DataFrame( # TODO: Må kanskje endres
{
key: list(values) + [np.nan] * (max_length - len(values))
for key, values in row_data.items()
}
)
table_data: dict = {}
for felt in table_fields:
logger.debug(f"df_meta for felt {felt}: \n{df_meta}")
verdi = df_meta.loc[
(df_meta["FELTNAVN"].str.endswith(lopenr))
& (df_meta["FELTNAVN"].str.contains(felt))
]["FELTVERDI"].item()
test[felt] = verdi

current_count = (
max(flattened_table_data["FELTNAVN"].str[-3:].astype(int)) + 1
if "FELTNAVN" in flattened_table_data.columns
else counter_starting_value
) # Finds the current number of rows in the dynamic list so the counting can continue.
dynamic_list_rows = (
test.reset_index()
.assign(
lopenr=lambda x: range(current_count, current_count + len(test))
) # This needs to be changed to count upwards for each row that exists, so they are not overwritten/become duplicates.
.drop(columns="index")
.melt(id_vars="lopenr", value_name="FELTVERDI")
.assign(
FELTNAVN=lambda x: x["variable"]
+ "_"
+ x["lopenr"].astype(str).str.zfill(3)
)[["FELTNAVN", "FELTVERDI"]]
)

flattened_table_data = pd.concat([flattened_table_data, dynamic_list_rows])

# Re-adding form metadata to flattened_table_data
skjema_id = list(rest_of_data["SKJEMA_ID"].unique())[0]
delreg_nr = list(rest_of_data["DELREG_NR"].unique())[0]
ident_nr = list(rest_of_data["IDENT_NR"].unique())[0]
enhets_type = list(rest_of_data["ENHETS_TYPE"].unique())[0]
version_nr = list(rest_of_data["VERSION_NR"].unique())[0]
flattened_table_data = (
flattened_table_data.assign(SKJEMA_ID=skjema_id)
.assign(DELREG_NR=delreg_nr)
.assign(IDENT_NR=ident_nr)
.assign(ENHETS_TYPE=enhets_type)
.assign(VERSION_NR=version_nr)
)

return pd.concat([rest_of_data, flattened_table_data]).reset_index(drop=True)

def xml_transform(file_path: str) -> pd.DataFrame:
"""Transforms a XML to a pd.Dataframe using xmltodict.
Expand Down
68 changes: 48 additions & 20 deletions src/altinn/tabell_i_tabell.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.2
# jupytext_version: 1.15.2
# kernelspec:
# display_name: Python 3
# display_name: egentesting
# language: python
# name: python3
# name: egentesting
# ---

# %%
# %%
print("Test")
import logging

import numpy as np
Expand All @@ -39,21 +39,23 @@ def transform_table_in_table(
table_fields: list[str],
row_fields: list[str],
counter_starting_value: int = 1,
):
"""Function for transforming a 'table in table' part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
) -> pd.DataFrame:
r"""Function for transforming a 'table in table' part of a flattened Altinn form into a shape that ISEE accepts as a dynamic list.
Args:
table_data (pd.DataFrame): Part of the form representing a 'table in table' component.
rest_of_data (pd.DataFrame):
table_data (pd.DataFrame): Part of the form representing a specific 'table in table' component.
rest_of_data (pd.DataFrame): Part of the data not involved in the specific 'table in table' component.
table_fields (list[str]): A list of FELTNAVN values that are valid for several rows in the 'table in table'.
row_fields (list[str]): A list of FELTNAVN values that are valid only for a siingle row in the 'table in table'.
counter_starting_value (int): Sets the lopenr to this value for the first part of the table object. If there are multiple table objects that belong in the same dynamic list, adjust this value to avoid duplicate FELTNAVN.
Notes:
A simple way to split data is using .loc to filter out relevant rows to a new dataframe, and then using the index to remove them from the primary dataset.
Example:
A simple way to split data is using .loc to filter out relevant rows to a new dataframe, and then using the index to remove them from the primary dataset. See example below.
table_data = df.loc[df[FELTNAVN].str.startswith("tabell_prefix")]
rest_of_data = df.drop(table_data.index, axis=0)
assert df.shape[0] == table_data.shape[0]+rest_of_data.shape[0]
If you have several 'table in table' components to unpack, but want them in the same dynamic list, you need to manage the counter_starting_value.
A way to do this is using this: max(output.loc[output["FELTNAVN"].str.match(r"your_varname_here_\d{3}")]["FELTNAVN"].str[-3:].astype(int))+1
"""
table_data = table_data[["FELTNAVN", "FELTVERDI"]]
pattern = "|".join(table_fields)
Expand Down Expand Up @@ -93,8 +95,12 @@ def transform_table_in_table(
verdi = df_meta.loc[
(df_meta["FELTNAVN"].str.endswith(lopenr))
& (df_meta["FELTNAVN"].str.contains(felt))
]["FELTVERDI"].item()
test[felt] = verdi
]
try:
verdi = verdi["FELTVERDI"].item()
test[felt] = verdi
except ValueError as e:
logger.debug(e)

current_count = (
max(flattened_table_data["FELTNAVN"].str[-3:].astype(int)) + 1
Expand All @@ -118,11 +124,11 @@ def transform_table_in_table(
flattened_table_data = pd.concat([flattened_table_data, dynamic_list_rows])

# Re-adding form metadata to flattened_table_data
skjema_id = list(rest_of_data["SKJEMA_ID"].unique())[0]
delreg_nr = list(rest_of_data["DELREG_NR"].unique())[0]
ident_nr = list(rest_of_data["IDENT_NR"].unique())[0]
enhets_type = list(rest_of_data["ENHETS_TYPE"].unique())[0]
version_nr = list(rest_of_data["VERSION_NR"].unique())[0]
skjema_id = next(rest_of_data["SKJEMA_ID"].unique())
delreg_nr = next(rest_of_data["DELREG_NR"].unique())
ident_nr = next(rest_of_data["IDENT_NR"].unique())
enhets_type = next(rest_of_data["ENHETS_TYPE"].unique())
version_nr = next(rest_of_data["VERSION_NR"].unique())
flattened_table_data = (
flattened_table_data.assign(SKJEMA_ID=skjema_id)
.assign(DELREG_NR=delreg_nr)
Expand All @@ -140,16 +146,38 @@ def transform_table_in_table(
# %%
df = alt.isee_transform(source_file)

table_data = df.loc[df["FELTNAVN"].str.startswith("AnVekstMineralGjodsGroup_")]
table_data = df.loc[df["FELTNAVN"].str.startswith("NyEngMineralGjodsGroup_")]
rest_of_data = df.drop(table_data.index, axis=0)
assert df.shape[0] == table_data.shape[0] + rest_of_data.shape[0]

output = flatten_table_in_table(
output = transform_table_in_table(
table_data=table_data,
rest_of_data=rest_of_data,
table_fields=["Navn"],
row_fields=["Aarstid", "Areal", "Mengde"],
)

table_data = output.loc[output["FELTNAVN"].str.startswith("NyEngHusdyrGjodsGroup_")]
rest_of_data = output.drop(table_data.index, axis=0)
assert output.shape[0] == table_data.shape[0] + rest_of_data.shape[0]

output = transform_table_in_table(
table_data=table_data,
rest_of_data=rest_of_data,
table_fields=["Navn"],
row_fields=["Aarstid", "Areal", "Mengde"],
counter_starting_value=max(
output.loc[output["FELTNAVN"].str.match(r"Areal_\d{3}")]["FELTNAVN"]
.str[-3:]
.astype(int)
)
+ 1,
)

# %%
output

# %%

# %%
alt.FileInfo(f"{source_file}").pretty_print()

0 comments on commit 1960dbe

Please sign in to comment.