Skip to content

Commit

Permalink
Also accept dash in SSB format date strings (#51)
Browse files Browse the repository at this point in the history
* Add test for klargjorte-data format

* Add failing test cases with dash in SSB date formats

* Also accept dash in SSB format date strings
  • Loading branch information
mmwinther authored Oct 1, 2024
1 parent 0cf48da commit 83be553
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
20 changes: 16 additions & 4 deletions src/dapla_metadata/datasets/dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def get_floor(self, period_string: str) -> date | None:
>>> SSB_BIMESTER.get_floor("2003B4")
datetime.date(2003, 7, 1)
>>> SSB_BIMESTER.get_floor("2003-B4")
datetime.date(2003, 7, 1)
"""
try:
year = period_string[:4]
Expand Down Expand Up @@ -170,6 +173,9 @@ def get_ceil(self, period_string: str) -> date | None:
>>> SSB_HALF_YEAR.get_ceil("2024H1")
datetime.date(2024, 6, 30)
>>> SSB_HALF_YEAR.get_ceil("2024-H1")
datetime.date(2024, 6, 30)
"""
try:
year = period_string[:4]
Expand All @@ -182,7 +188,7 @@ def get_ceil(self, period_string: str) -> date | None:

SSB_BIMESTER = SsbDateFormat(
name="SSB_BIMESTER",
regex_pattern=r"^\d{4}[B]\d{1}$",
regex_pattern=r"^\d{4}-?[B]\d{1}$",
arrow_pattern="YYYYMM",
timeframe="month",
ssb_dates={
Expand Down Expand Up @@ -215,7 +221,7 @@ def get_ceil(self, period_string: str) -> date | None:

SSB_QUARTERLY = SsbDateFormat(
name="SSB_QUARTERLY",
regex_pattern=r"^\d{4}[Q]\d{1}$",
regex_pattern=r"^\d{4}-?[Q]\d{1}$",
arrow_pattern="YYYYMM",
timeframe="month",
ssb_dates={
Expand All @@ -240,7 +246,7 @@ def get_ceil(self, period_string: str) -> date | None:

SSB_TRIANNUAL = SsbDateFormat(
name="SSB_TRIANNUAL",
regex_pattern=r"^\d{4}[T]\d{1}$",
regex_pattern=r"^\d{4}-?[T]\d{1}$",
arrow_pattern="YYYYMM",
timeframe="month",
ssb_dates={
Expand All @@ -260,7 +266,7 @@ def get_ceil(self, period_string: str) -> date | None:
)
SSB_HALF_YEAR = SsbDateFormat(
name="SSB_HALF_YEAR",
regex_pattern=r"^\d{4}[H]\d{1}$",
regex_pattern=r"^\d{4}-?[H]\d{1}$",
arrow_pattern="YYYYMM",
timeframe="month",
ssb_dates={
Expand Down Expand Up @@ -413,6 +419,9 @@ def _extract_period_strings(dataset_name_sections: list[str]) -> list[str]:
>>> DaplaDatasetPathInfo._extract_period_strings(['p1990Q1', 'kommune', 'v1'])
['1990Q1']
>>> DaplaDatasetPathInfo._extract_period_strings(['p1990-Q1', 'kommune', 'v1'])
['1990-Q1']
>>> DaplaDatasetPathInfo._extract_period_strings(['varehandel','v1'])
[]
"""
Expand Down Expand Up @@ -586,6 +595,9 @@ def dataset_state(
>>> DaplaDatasetPathInfo('klargjorte_data/person_data_v1.parquet').dataset_state
<DataSetState.PROCESSED_DATA: 'PROCESSED_DATA'>
>>> DaplaDatasetPathInfo('klargjorte-data/person_data_v1.parquet').dataset_state
<DataSetState.PROCESSED_DATA: 'PROCESSED_DATA'>
>>> DaplaDatasetPathInfo('utdata/min_statistikk/person_data_v1.parquet').dataset_state
<DataSetState.OUTPUT_DATA: 'OUTPUT_DATA'>
Expand Down
30 changes: 30 additions & 0 deletions tests/datasets/test_dapla_dataset_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,31 +69,61 @@ class DatasetPathTestCase:
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 6, 30),
),
DatasetPathTestCase(
path="personinntekt_p2022-H1_v1.parquet",
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 6, 30),
),
DatasetPathTestCase(
path="nybilreg_p2022T1_v1.parquet",
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 4, 30),
),
DatasetPathTestCase(
path="nybilreg_p2022-T1_v1.parquet",
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 4, 30),
),
DatasetPathTestCase(
path="varehandel_p2018Q1_p2018Q4_v1.parquet",
expected_contains_data_from=datetime.date(2018, 1, 1),
expected_contains_data_until=datetime.date(2018, 12, 31),
),
DatasetPathTestCase(
path="varehandel_p2018-Q1_p2018-Q4_v1.parquet",
expected_contains_data_from=datetime.date(2018, 1, 1),
expected_contains_data_until=datetime.date(2018, 12, 31),
),
DatasetPathTestCase(
path="pensjon_p2018Q1_v1.parquet",
expected_contains_data_from=datetime.date(2018, 1, 1),
expected_contains_data_until=datetime.date(2018, 3, 31),
),
DatasetPathTestCase(
path="pensjon_p2018-Q1_v1.parquet",
expected_contains_data_from=datetime.date(2018, 1, 1),
expected_contains_data_until=datetime.date(2018, 3, 31),
),
DatasetPathTestCase(
path="skipsanloep_p2021B2_v1.parquet",
expected_contains_data_from=datetime.date(2021, 3, 1),
expected_contains_data_until=datetime.date(2021, 4, 30),
),
DatasetPathTestCase(
path="skipsanloep_p2021-B2_v1.parquet",
expected_contains_data_from=datetime.date(2021, 3, 1),
expected_contains_data_until=datetime.date(2021, 4, 30),
),
DatasetPathTestCase(
path="skipsanloep_p2022B1_v1.parquet",
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 2, 28),
),
DatasetPathTestCase(
path="skipsanloep_p2022-B1_v1.parquet",
expected_contains_data_from=datetime.date(2022, 1, 1),
expected_contains_data_until=datetime.date(2022, 2, 28),
),
]


Expand Down

0 comments on commit 83be553

Please sign in to comment.