Skip to content

Commit

Permalink
Add config validation
Browse files Browse the repository at this point in the history
  • Loading branch information
= committed Feb 28, 2024
1 parent 3d9f1db commit f00b71f
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 2 deletions.
20 changes: 20 additions & 0 deletions nad_ch/application/data_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ def read_column_map(self) -> dict[any]:
)
return column_map_config

def validate_column_map(self):
column_map = self.column_map["data_column_mapping"]
column_map_reverse = {}

for key, values in column_map.items():
for value in values:
value_lcase = value.lower()
if value_lcase in column_map_reverse:
column_map_reverse[value_lcase].append(key)
else:
column_map_reverse[value_lcase] = [key]
duplicates = {k: v for k, v in column_map_reverse.items() if len(v) > 1}
if duplicates:
duplicate_nad_fields = ", ".join(
[" & ".join(nad_fields) for nad_fields in list(duplicates.values())]
)
raise Exception(
f"Duplicate inputs found for destination fields: {duplicate_nad_fields}"
)

def rename_columns(self, gdf: GeoDataFrame) -> GeoDataFrame:
column_map = self.column_map["data_column_mapping"]
original_names = {col.lower(): col for col in gdf.columns}
Expand Down
2 changes: 2 additions & 0 deletions nad_ch/application/nad_column_maps/testprovider1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ data_column_mapping:
- AREASQMETER
COL_2:
- TRACT
- Pacific
COL_20:
- Shape_Length
COL_21:
Expand All @@ -39,6 +40,7 @@ data_column_mapping:
- TOTPOP
COL_6:
- POPDENS
- totPop
COL_7:
- RACEBASE
COL_8:
Expand Down
14 changes: 14 additions & 0 deletions tests/application/test_data_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
)
import pickle
from pandas.testing import assert_frame_equal
import pytest

TEST_DATA_DIR = "tests/test_data"

Expand Down Expand Up @@ -36,6 +37,19 @@ def test_read_column_map():
)


def test_validate_column_map():
with pytest.raises(Exception) as exc:
reader = DataReader("testprovider1")
reader.validate_column_map()
assert (
str(exc.value)
== "Duplicate inputs found for destination fields: COL_13 & COL_2, COL_5 & COL_6"
)

reader = DataReader("testprovider2")
reader.validate_column_map()


def test_read_file_in_batches_shape():
file_path = os.path.join(
TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/config_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@
"COL_17": ["LASTEDITOR"],
"COL_18": ["AGEMAJOR"],
"COL_19": ["AREASQMETER"],
"COL_2": ["TRACT"],
"COL_2": ["TRACT", "Pacific"],
"COL_20": ["Shape_Length"],
"COL_21": ["Shape_Area"],
"COL_22": ["geometry"],
"COL_3": ["STFID"],
"COL_4": ["BLOCK"],
"COL_5": ["TOTPOP"],
"COL_6": ["POPDENS"],
"COL_6": ["POPDENS", "totPop"],
"COL_7": ["RACEBASE"],
"COL_8": ["WHITE"],
"COL_9": ["BLACK"],
Expand Down

0 comments on commit f00b71f

Please sign in to comment.