Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update #84

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ssb-arbmark-fagfunksjoner"
version = "0.0.24"
version = "0.0.25"
description = "SSB Arbeidsmarked og lønn Fag-fellesfunksjoner"
authors = ["Jan Sebastian Rothe <[email protected]>"]
license = "MIT"
Expand Down
1 change: 0 additions & 1 deletion src/arbmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from arbmark.functions.aggregation import proc_sums
from arbmark.functions.categorize_ranges import categorize_ranges
from arbmark.functions.files import read_latest
from arbmark.functions.interval import pinterval
from arbmark.functions.merge import indicate_merge
from arbmark.functions.quarter import first_last_date_quarter
Expand Down
71 changes: 0 additions & 71 deletions src/arbmark/functions/files.py

This file was deleted.

17 changes: 4 additions & 13 deletions src/arbmark/groups/sector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,35 @@
import pandas as pd

if TYPE_CHECKING:
PdSeriesInt = pd.Series[int] # type: ignore[misc]
PdSeriesStr = pd.Series[str] # type: ignore[misc]
NpArrayInt = npt.NDArray[np.int_] # type: ignore[misc]
NpArrayStr = npt.NDArray[np.str_] # type: ignore[misc]
else:
PdSeriesInt = pd.Series
PdSeriesStr = pd.Series
NpArrayInt = npt.NDArray
NpArrayStr = npt.NDArray


def sektor2_grp(
sektor: PdSeriesStr, undersektor: PdSeriesStr, display: str = "label"
) -> NpArrayStr:
"""Categorize a pandas Series of sectors and subsectors into predefined groups.
def sektor2_grp(sektor: PdSeriesStr, display: str = "label") -> NpArrayStr:
"""Categorize a pandas Series of sectors into predefined groups.

Parameters:
sektor: A pandas Series containing the sector codes.
undersektor: A pandas Series containing the subsector codes.
display: If 'label', returns group labels; if 'number', returns keys;
for any other string, returns a combination of keys and labels.

Returns:
A numpy Array where the original sector and subsectors are replaced by group labels or keys.
A numpy Array where the original sector is replaced by group labels or keys.
"""
# Define the conditions for each group
conditions = [
(sektor == "6100").to_numpy(),
np.logical_and(sektor == "6500", undersektor != "007"),
np.logical_and(sektor == "6500", undersektor == "007"),
(sektor == "6500").to_numpy(),
(sektor == "1510").to_numpy(),
(sektor == "1520").to_numpy(),
]

groups = {
"110": "Statlig forvaltning",
"550": "Kommunal forvaltning",
"510": "Fylkeskommunal forvaltning",
"660": "Kommunale foretak med ubegrenset ansvar",
"680": "Kommunalt eide aksjeselskaper m.v.",
}
Expand Down
Empty file removed tests/test_data/dataset_v1.parquet
Empty file.
Empty file removed tests/test_data/dataset_v2.parquet
Empty file.
Empty file removed tests/test_data/dataset_v3.parquet
Empty file.
13 changes: 0 additions & 13 deletions tests/test_files.py

This file was deleted.

7 changes: 2 additions & 5 deletions tests/test_sector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,17 @@ def sample_df() -> pd.DataFrame:
return pd.DataFrame(
{
"sektor": np.random.choice(["6100", "6500", "1510", "1520"], size=100),
"undersektor": np.random.choice(["007", "008", "009"], size=100),
}
)


def test_sektor2_grp(sample_df):
df = sample_df
df["sektor2_grp"] = sektor2_grp(df["sektor"], df["undersektor"]).astype(str)
df["sektor2_grp"] = sektor2_grp(df["sektor"]).astype(str)
assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values"


def test_sektor2_grp_number(sample_df):
df = sample_df
df["sektor2_grp"] = sektor2_grp(
df["sektor"], df["undersektor"], display="number"
).astype(str)
df["sektor2_grp"] = sektor2_grp(df["sektor"], display="number").astype(str)
assert not df["sektor2_grp"].isnull().any(), "Sector 2 group contains null values"