From 4799b5aea91797ee719e9e59e2260fed5d1c9cf0 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 31 Oct 2024 09:26:59 +0100 Subject: [PATCH 1/3] update sector groups --- pyproject.toml | 2 +- src/arbmark/functions/files.py | 6 ++++++ src/arbmark/groups/sector.py | 14 ++++---------- tests/test_sector.py | 5 ++--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index be9af71..a53a8d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ssb-arbmark-fagfunksjoner" -version = "0.0.24" +version = "0.0.25" description = "SSB Arbeidsmarked og lønn Fag-fellesfunksjoner" authors = ["Jan Sebastian Rothe "] license = "MIT" diff --git a/src/arbmark/functions/files.py b/src/arbmark/functions/files.py index 97a9f46..7ac2b7b 100644 --- a/src/arbmark/functions/files.py +++ b/src/arbmark/functions/files.py @@ -1,3 +1,6 @@ +"""This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead.""" + + # Glob for Unix style pathname pattern expansion. import glob @@ -11,6 +14,8 @@ def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None: """Finds the latest version of a specified file in a given directory and returns its name. + This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead. + This function searches for files in the specified path that match the given name and file type, sorts them by modification time, and returns the path of the latest version. If no files are found, it returns None. @@ -23,6 +28,7 @@ def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None: Returns: Optional[str]: The path of the latest version of the file if found, None otherwise. """ + print("This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead.") # Inform the user about the file versions being checked print(f"Checking versions of file: {name}") diff --git a/src/arbmark/groups/sector.py b/src/arbmark/groups/sector.py index c2a44cd..704d403 100644 --- a/src/arbmark/groups/sector.py +++ b/src/arbmark/groups/sector.py @@ -9,21 +9,17 @@ import pandas as pd if TYPE_CHECKING: - PdSeriesInt = pd.Series[int] # type: ignore[misc] PdSeriesStr = pd.Series[str] # type: ignore[misc] - NpArrayInt = npt.NDArray[np.int_] # type: ignore[misc] NpArrayStr = npt.NDArray[np.str_] # type: ignore[misc] else: - PdSeriesInt = pd.Series PdSeriesStr = pd.Series - NpArrayInt = npt.NDArray NpArrayStr = npt.NDArray def sektor2_grp( - sektor: PdSeriesStr, undersektor: PdSeriesStr, display: str = "label" + sektor: PdSeriesStr, display: str = "label" ) -> NpArrayStr: - """Categorize a pandas Series of sectors and subsectors into predefined groups. + """Categorize a pandas Series of sectors into predefined groups. Parameters: sektor: A pandas Series containing the sector codes. @@ -32,13 +28,12 @@ def sektor2_grp( for any other string, returns a combination of keys and labels. Returns: - A numpy Array where the original sector and subsectors are replaced by group labels or keys. + A numpy Array where the original sector is replaced by group labels or keys. """ # Define the conditions for each group conditions = [ (sektor == "6100").to_numpy(), - np.logical_and(sektor == "6500", undersektor != "007"), - np.logical_and(sektor == "6500", undersektor == "007"), + (sektor == "6500").to_numpy(), (sektor == "1510").to_numpy(), (sektor == "1520").to_numpy(), ] @@ -46,7 +41,6 @@ def sektor2_grp( groups = { "110": "Statlig forvaltning", "550": "Kommunal forvaltning", - "510": "Fylkeskommunal forvaltning", "660": "Kommunale foretak med ubegrenset ansvar", "680": "Kommunalt eide aksjeselskaper m.v.", } diff --git a/tests/test_sector.py b/tests/test_sector.py index 9231386..40daf0a 100644 --- a/tests/test_sector.py +++ b/tests/test_sector.py @@ -10,20 +10,19 @@ def sample_df() -> pd.DataFrame: return pd.DataFrame( { "sektor": np.random.choice(["6100", "6500", "1510", "1520"], size=100), - "undersektor": np.random.choice(["007", "008", "009"], size=100), } ) def test_sektor2_grp(sample_df): df = sample_df - df["sektor2_grp"] = sektor2_grp(df["sektor"], df["undersektor"]).astype(str) + df["sektor2_grp"] = sektor2_grp(df["sektor"]).astype(str) assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values" def test_sektor2_grp_number(sample_df): df = sample_df df["sektor2_grp"] = sektor2_grp( - df["sektor"], df["undersektor"], display="number" + df["sektor"], display="number" ).astype(str) assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values" From 54c3f4dcda6eab90e21dd59d248aa3591213ad71 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 31 Oct 2024 09:30:02 +0100 Subject: [PATCH 2/3] pre-commit changes --- src/arbmark/functions/files.py | 7 ++++--- src/arbmark/groups/sector.py | 4 +--- tests/test_sector.py | 4 +--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/arbmark/functions/files.py b/src/arbmark/functions/files.py index 7ac2b7b..eb6b4c7 100644 --- a/src/arbmark/functions/files.py +++ b/src/arbmark/functions/files.py @@ -1,6 +1,5 @@ """This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead.""" - # Glob for Unix style pathname pattern expansion. import glob @@ -15,7 +14,7 @@ def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None: """Finds the latest version of a specified file in a given directory and returns its name. This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead. - + This function searches for files in the specified path that match the given name and file type, sorts them by modification time, and returns the path of the latest version. If no files are found, it returns None. @@ -28,7 +27,9 @@ def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None: Returns: Optional[str]: The path of the latest version of the file if found, None otherwise. """ - print("This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead.") + print( + "This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead." + ) # Inform the user about the file versions being checked print(f"Checking versions of file: {name}") diff --git a/src/arbmark/groups/sector.py b/src/arbmark/groups/sector.py index 704d403..b03f72d 100644 --- a/src/arbmark/groups/sector.py +++ b/src/arbmark/groups/sector.py @@ -16,9 +16,7 @@ NpArrayStr = npt.NDArray -def sektor2_grp( - sektor: PdSeriesStr, display: str = "label" -) -> NpArrayStr: +def sektor2_grp(sektor: PdSeriesStr, display: str = "label") -> NpArrayStr: """Categorize a pandas Series of sectors into predefined groups. Parameters: diff --git a/tests/test_sector.py b/tests/test_sector.py index 40daf0a..dae37d1 100644 --- a/tests/test_sector.py +++ b/tests/test_sector.py @@ -22,7 +22,5 @@ def test_sektor2_grp(sample_df): def test_sektor2_grp_number(sample_df): df = sample_df - df["sektor2_grp"] = sektor2_grp( - df["sektor"], display="number" - ).astype(str) + df["sektor2_grp"] = sektor2_grp(df["sektor"], display="number").astype(str) assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values" From 74f00182741b994cd64ea1e788aa9e26be23e4ee Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 31 Oct 2024 11:02:49 +0100 Subject: [PATCH 3/3] Forgot to remove a parameter --- src/arbmark/groups/sector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arbmark/groups/sector.py b/src/arbmark/groups/sector.py index b03f72d..d7fe78c 100644 --- a/src/arbmark/groups/sector.py +++ b/src/arbmark/groups/sector.py @@ -21,7 +21,6 @@ def sektor2_grp(sektor: PdSeriesStr, display: str = "label") -> NpArrayStr: Parameters: sektor: A pandas Series containing the sector codes. - undersektor: A pandas Series containing the subsector codes. display: If 'label', returns group labels; if 'number', returns keys; for any other string, returns a combination of keys and labels.