Merge pull request #85 from statisticsnorway/sector-update

Sector update
statisticsnorway · Oct 31, 2024 · d2c8708 · d2c8708
2 parents 5ce653d + 74f0018
commit d2c8708
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 19 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ssb-arbmark-fagfunksjoner"
-version = "0.0.24"
+version = "0.0.25"
 description = "SSB Arbeidsmarked og lønn Fag-fellesfunksjoner"
 authors = ["Jan Sebastian Rothe <[email protected]>"]
 license = "MIT"

diff --git a/src/arbmark/functions/files.py b/src/arbmark/functions/files.py
@@ -1,3 +1,5 @@
+"""This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead."""
+
 # Glob for Unix style pathname pattern expansion.
 import glob
 
@@ -11,6 +13,8 @@
 def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None:
     """Finds the latest version of a specified file in a given directory and returns its name.
 
+    This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead.
+
     This function searches for files in the specified path that match the given name and file
     type, sorts them by modification time, and returns the path of the latest version. If no
     files are found, it returns None.
@@ -23,6 +27,9 @@ def read_latest(path: str, name: str, dottype: str = ".parquet") -> str | None:
     Returns:
         Optional[str]: The path of the latest version of the file if found, None otherwise.
     """
+    print(
+        "This function is outdated use 'latest_version_path' from ssb-fagfunksjoner instead."
+    )
     # Inform the user about the file versions being checked
     print(f"Checking versions of file: {name}")
 

diff --git a/src/arbmark/groups/sector.py b/src/arbmark/groups/sector.py
@@ -9,44 +9,35 @@
 import pandas as pd
 
 if TYPE_CHECKING:
-    PdSeriesInt = pd.Series[int]  # type: ignore[misc]
     PdSeriesStr = pd.Series[str]  # type: ignore[misc]
-    NpArrayInt = npt.NDArray[np.int_]  # type: ignore[misc]
     NpArrayStr = npt.NDArray[np.str_]  # type: ignore[misc]
 else:
-    PdSeriesInt = pd.Series
     PdSeriesStr = pd.Series
-    NpArrayInt = npt.NDArray
     NpArrayStr = npt.NDArray
 
 
-def sektor2_grp(
-    sektor: PdSeriesStr, undersektor: PdSeriesStr, display: str = "label"
-) -> NpArrayStr:
-    """Categorize a pandas Series of sectors and subsectors into predefined groups.
+def sektor2_grp(sektor: PdSeriesStr, display: str = "label") -> NpArrayStr:
+    """Categorize a pandas Series of sectors into predefined groups.
 
     Parameters:
         sektor: A pandas Series containing the sector codes.
-        undersektor: A pandas Series containing the subsector codes.
         display: If 'label', returns group labels; if 'number', returns keys;
                        for any other string, returns a combination of keys and labels.
 
     Returns:
-        A numpy Array where the original sector and subsectors are replaced by group labels or keys.
+        A numpy Array where the original sector is replaced by group labels or keys.
     """
     # Define the conditions for each group
     conditions = [
         (sektor == "6100").to_numpy(),
-        np.logical_and(sektor == "6500", undersektor != "007"),
-        np.logical_and(sektor == "6500", undersektor == "007"),
+        (sektor == "6500").to_numpy(),
         (sektor == "1510").to_numpy(),
         (sektor == "1520").to_numpy(),
     ]
 
     groups = {
         "110": "Statlig forvaltning",
         "550": "Kommunal forvaltning",
-        "510": "Fylkeskommunal forvaltning",
         "660": "Kommunale foretak med ubegrenset ansvar",
         "680": "Kommunalt eide aksjeselskaper m.v.",
     }

diff --git a/tests/test_sector.py b/tests/test_sector.py
@@ -10,20 +10,17 @@ def sample_df() -> pd.DataFrame:
     return pd.DataFrame(
         {
             "sektor": np.random.choice(["6100", "6500", "1510", "1520"], size=100),
-            "undersektor": np.random.choice(["007", "008", "009"], size=100),
         }
     )
 
 
 def test_sektor2_grp(sample_df):
     df = sample_df
-    df["sektor2_grp"] = sektor2_grp(df["sektor"], df["undersektor"]).astype(str)
+    df["sektor2_grp"] = sektor2_grp(df["sektor"]).astype(str)
     assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values"
 
 
 def test_sektor2_grp_number(sample_df):
     df = sample_df
-    df["sektor2_grp"] = sektor2_grp(
-        df["sektor"], df["undersektor"], display="number"
-    ).astype(str)
+    df["sektor2_grp"] = sektor2_grp(df["sektor"], display="number").astype(str)
     assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values"