-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #63 from statisticsnorway/statbank-formats
Added functions for statbank formats
- Loading branch information
Showing
4 changed files
with
115 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "ssb-arbmark-fagfunksjoner" | ||
version = "0.0.20" | ||
version = "0.0.21" | ||
description = "SSB Arbeidsmarked og lønn Fag-fellesfunksjoner" | ||
authors = ["Jan Sebastian Rothe <[email protected]>"] | ||
license = "MIT" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Type hints | ||
from typing import TYPE_CHECKING | ||
from typing import Any | ||
|
||
# Pandas for table management | ||
import pandas as pd | ||
|
||
if TYPE_CHECKING: | ||
PdSeriesAny = pd.Series[Any] # type: ignore[misc] | ||
else: | ||
PdSeriesAny = pd.Series | ||
|
||
|
||
def sb_integer(number: PdSeriesAny, unit: int = 0) -> PdSeriesAny: | ||
"""Format a pandas Series of numbers as rounded integers, with optional unit scaling. | ||
Args: | ||
number: A pandas Series containing numeric values. | ||
unit: The power of 10 to which to round the numbers. Default is 0 (no scaling). | ||
Returns: | ||
A pandas Series with the numbers rounded to the specified unit, | ||
converted to strings, and with NaNs replaced by empty strings. | ||
""" | ||
return ( | ||
number.fillna(-1 * 10 ** abs(unit)) # Replace NaN values with a placeholder | ||
.round(-abs(unit)) # Round numbers to the nearest specified unit | ||
.astype(int) # Convert the Series to integer type | ||
.astype(str) # Convert the Series to string type | ||
.replace(f"{-1 * 10**abs(unit)}", "") # Replace placeholder with empty strings | ||
) | ||
|
||
|
||
def sb_percent(fraction: PdSeriesAny, decimals: int = 1) -> PdSeriesAny: | ||
"""Convert a pandas Series of fractions to percentages, formatted as strings. | ||
Args: | ||
fraction: A pandas Series containing fractional values (e.g., 0.25 for 25%). | ||
decimals: Number of decimal places to round the percentage values to. Default is 1. | ||
Returns: | ||
A pandas Series with the percentage values formatted as strings, | ||
with a comma as the decimal separator and empty strings for NaNs and infinities. | ||
""" | ||
return ( | ||
fraction.multiply(100) # Convert fractions to percentages | ||
.round(decimals) # Round to the specified number of decimal places | ||
.fillna("") # Replace NaN values with empty strings | ||
.astype(str) # Convert the Series to string type | ||
.replace("inf", "") # Replace 'inf' strings with empty strings | ||
.str.replace(".", ",") # Replace periods with commas | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
|
||
from arbmark import sb_integer | ||
from arbmark import sb_percent | ||
|
||
|
||
@pytest.fixture | ||
def sample_df() -> pd.DataFrame: | ||
return pd.DataFrame( | ||
{ | ||
"antall_ansatte": pd.to_numeric([np.nan, None, 0, 1.00001, 975, 25]), | ||
"sykefravaersprosent": pd.to_numeric( | ||
[np.nan, None, 0, 0.9999, 0.5, 0.3246588] | ||
), | ||
} | ||
) | ||
|
||
|
||
def test_sb_integer(sample_df: pd.DataFrame) -> None: | ||
test1_result = sb_integer(sample_df["antall_ansatte"], unit=2).to_list() | ||
test1_expected = ["", "", "0", "0", "1000", "0"] | ||
|
||
assert ( | ||
test1_result == test1_expected | ||
), f"Expected {test1_expected}, but got {test1_result}" | ||
|
||
test2_result = sb_integer(sample_df["antall_ansatte"], unit=1).to_list() | ||
test2_expected = ["", "", "0", "0", "980", "20"] | ||
|
||
assert ( | ||
test2_result == test2_expected | ||
), f"Expected {test2_expected}, but got {test2_result}" | ||
|
||
test3_result = sb_integer(sample_df["antall_ansatte"], unit=0).to_list() | ||
test3_expected = ["", "", "0", "1", "975", "25"] | ||
|
||
assert ( | ||
test3_result == test3_expected | ||
), f"Expected {test3_expected}, but got {test3_result}" | ||
|
||
|
||
def test_sb_percent(sample_df: pd.DataFrame) -> None: | ||
test1_result = sb_percent(sample_df["sykefravaersprosent"], decimals=1).to_list() | ||
test1_expected = ["", "", "0,0", "100,0", "50,0", "32,5"] | ||
|
||
assert ( | ||
test1_result == test1_expected | ||
), f"Expected {test1_expected}, but got {test1_result}" | ||
|
||
test2_result = sb_percent(sample_df["sykefravaersprosent"], decimals=2).to_list() | ||
test2_expected = ["", "", "0,0", "99,99", "50,0", "32,47"] | ||
|
||
assert ( | ||
test2_result == test2_expected | ||
), f"Expected {test2_expected}, but got {test2_result}" |