diff --git a/.gitignore b/.gitignore
index e20844c..e7cc0d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -175,4 +175,7 @@ control/
node_modules/
# Local notes
-.notes
\ No newline at end of file
+.notes
+
+# Mac Desktop Services Store
+*.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..8539279
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+# Yaml for setting up precommit hooks using:
+# `pre-commit` library
+
+exclude: '^$'
+
+fail_fast: false
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v3.4.0
+ hooks:
+ - id: trailing-whitespace
+ exclude: '[snap|json]$'
+ - id: end-of-file-fixer
+ - id: check-ast
+ - id: debug-statements
+
+ - repo: https://github.com/python/black
+ rev: 20.8b1
+ hooks:
+ - id: black
+ language_version: python
+ additional_dependencies: ['click==8.0.4']
+
+ - repo: https://github.com/pre-commit/mirrors-pylint
+ rev: v2.7.2
+ hooks:
+ - id: pylint
+ language: python
+ args: [
+ '--disable=R,wrong-import-position,fixme',
+ ]
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v0.961
+ hooks:
+ - id: mypy
diff --git a/nad_ch/application/data_reader.py b/nad_ch/application/data_reader.py
new file mode 100644
index 0000000..434ff7e
--- /dev/null
+++ b/nad_ch/application/data_reader.py
@@ -0,0 +1,62 @@
+from geopandas import GeoDataFrame, read_file
+import fiona
+from typing import Optional
+import yaml
+import os
+
+
+class DataReader(object):
+ def __init__(self, config_name: Optional[str] = None) -> None:
+ self.config_name = config_name
+ self.default_config_path = "nad_ch/application/nad_column_maps/default.yaml"
+ self.column_map = self.read_column_map()
+
+ def read_column_map(self) -> dict[any]:
+ custom_config_path = (
+ f"nad_ch/application/nad_column_maps/{self.config_name}.yaml"
+ )
+ with open(self.default_config_path, "r") as file:
+ default_config = yaml.safe_load(file)
+ if not os.path.exists(custom_config_path):
+ column_map_config = default_config
+ else:
+ with open(custom_config_path, "r") as file:
+ column_map_config = yaml.safe_load(file)
+ column_map_config["data_required_fields"] = default_config.get(
+ "data_required_fields"
+ )
+ return column_map_config
+
+ def rename_columns(self, gdf: GeoDataFrame) -> GeoDataFrame:
+ column_map = self.column_map["data_column_mapping"]
+ original_names = {col.lower(): col for col in gdf.columns}
+ valid_renames = {}
+ for nad_column, fields_to_check in column_map.items():
+ orig_matched_name = original_names.get(nad_column.lower())
+ if orig_matched_name:
+ valid_renames[orig_matched_name] = nad_column
+ continue
+ for field in fields_to_check:
+ orig_matched_name = original_names.get(field.lower())
+ if orig_matched_name:
+ valid_renames[orig_matched_name] = nad_column
+ break
+ gdf = gdf.rename(columns=valid_renames)
+ return gdf[[col for col in valid_renames.values()]]
+
+ def read_file_in_batches(
+ self, path: str, table_name: Optional[str] = None, batch_size: int = 100000
+ ) -> GeoDataFrame:
+ # TODO: Modify to return a joined table; for cases where 1 or more tables
+ # are needed to get all fields from source file.
+ layers = fiona.listlayers(path)
+ if table_name and table_name not in layers:
+ raise Exception(f"Table name {table_name} does not exist")
+ i = 0
+ while True:
+ gdf = read_file(path, rows=slice(i, i + batch_size))
+ if gdf.shape[0] == 0:
+ break
+ gdf = self.rename_columns(gdf)
+ yield gdf
+ i += batch_size
diff --git a/nad_ch/application/nad_column_maps/default.yaml b/nad_ch/application/nad_column_maps/default.yaml
new file mode 100644
index 0000000..a4a6776
--- /dev/null
+++ b/nad_ch/application/nad_column_maps/default.yaml
@@ -0,0 +1,119 @@
+data_required_fields:
+ - Add_Number
+ - AddNo_Full
+ - St_Name
+ - StNam_Full
+ - County
+ - Inc_Muni
+ - Post_City
+ - State
+ - UUID
+ - AddAuth
+ - Longitude
+ - Latitude
+ - NatGrid
+ - Placement
+ - AddrPoint
+ - DateUpdate
+ - NAD_Source
+ - DataSet_ID
+data_column_mapping:
+ AddNum_Pre:
+ - ANUMBERPRE
+ Add_Number:
+ - ANUMBER
+ AddNum_Suf:
+ - ANUMBERSUF
+ AddNo_Full:
+ - ADR_NUM_COMP
+ St_PreMod:
+ - ST_PRE_MOD
+ St_PreDir:
+ - ST_PRE_DIR
+ St_PreTyp:
+ - ST_PRE_TYP
+ St_PreSep:
+ - ST_PRE_SEP
+ St_Name:
+ St_PosTyp:
+ - ST_POS_TYP
+ St_PosDir:
+ - ST_POS_DIR
+ St_PosMod:
+ - ST_POS_MOD
+ StNam_Full:
+ - ST_FULNAM
+ Building:
+ Floor:
+ Unit:
+ Room:
+ Seat:
+ Addtl_Loc:
+ SubAddress:
+ LandmkName:
+ - LANDMARK
+ County:
+ - CO_NAME
+ Inc_Muni:
+ Post_City:
+ - Post_Comm
+ - POSTCOMM
+ Census_Plc:
+ Uninc_Comm:
+ Nbrhd_Comm:
+ NatAmArea:
+ NatAmSub:
+ Urbnztn_PR:
+ PlaceOther:
+ State:
+ Zip_Code:
+ - Post_Code
+ - ZIP
+ Plus_4:
+ - Post_Code4
+ - ZIP4
+ UUID:
+ - GlobalID
+ AddAuth:
+ - DiscrpAgID
+ - AAUTHORITY
+ AddrRefSys:
+ Longitude:
+ - Long
+ - LONGITUDE
+ Latitude:
+ - Lat
+ - LATITUDE
+ NatGrid:
+ - USNG_CODE
+ Elevation:
+ - Elev
+ Placement:
+ - PLACE_LOC
+ AddrPoint:
+ Related_ID:
+ RelateType:
+ ParcelSrc:
+ Parcel_ID:
+ - STATE_PIN
+ AddrClass:
+ Lifecycle:
+ - STATUS
+ Effective:
+ - EFF_DATE
+ Expire:
+ - RET_DATE
+ DateUpdate:
+ - EDIT_DATE
+ AnomStatus:
+ - VERROR_911
+ LocatnDesc:
+ - LOC_DESC
+ Addr_Type:
+ - Place_Type
+ PlaceNmTyp:
+ DeliverTyp:
+ NAD_Source:
+ DataSet_ID:
+ - Site_NGUID
+ - ADD_ID
diff --git a/nad_ch/application/nad_column_maps/testprovider1.yaml b/nad_ch/application/nad_column_maps/testprovider1.yaml
new file mode 100644
index 0000000..830ce1a
--- /dev/null
+++ b/nad_ch/application/nad_column_maps/testprovider1.yaml
@@ -0,0 +1,47 @@
+data_column_mapping:
+ COL_0:
+ - ID
+ COL_1:
+ - STCOFIPS
+ COL_10:
+ - HISPPOP
+ COL_11:
+ - AMERIND
+ COL_12:
+ - ASIAN
+ COL_13:
+ - PACIFIC
+ COL_14:
+ - RACE2UP
+ COL_15:
+ - OTHRACE
+ COL_16:
+ - LASTUPDATE
+ COL_17:
+ - LASTEDITOR
+ COL_18:
+ - AGEMAJOR
+ COL_19:
+ - AREASQMETER
+ COL_2:
+ - TRACT
+ COL_20:
+ - Shape_Length
+ COL_21:
+ - Shape_Area
+ COL_22:
+ - geometry
+ COL_3:
+ - STFID
+ COL_4:
+ - BLOCK
+ COL_5:
+ - TOTPOP
+ COL_6:
+ - POPDENS
+ COL_7:
+ - RACEBASE
+ COL_8:
+ - WHITE
+ COL_9:
+ - BLACK
diff --git a/nad_ch/application/nad_column_maps/testprovider2.yaml b/nad_ch/application/nad_column_maps/testprovider2.yaml
new file mode 100644
index 0000000..38bfc4c
--- /dev/null
+++ b/nad_ch/application/nad_column_maps/testprovider2.yaml
@@ -0,0 +1,11 @@
+data_column_mapping:
+ COL_0:
+ - NAME
+ COL_1:
+ - ST
+ COL_2:
+ - ZIP
+ COL_3:
+ - RuleID
+ COL_4:
+ - geometry
diff --git a/nad_ch/application/use_cases.py b/nad_ch/application/use_cases.py
index f2e150e..daf24eb 100644
--- a/nad_ch/application/use_cases.py
+++ b/nad_ch/application/use_cases.py
@@ -107,6 +107,9 @@ def validate_data_submission(ctx: ApplicationContext, filename: str):
ctx.logger.error("Data extration error")
return
+ # data_producer = submission.producer
+ # config_name = f"{data_producer.name}_{data_producer.id}"
+ # TODO: Incorporate config
report = ctx.task_queue.run_load_and_validate(
ctx.submissions, submission.id, download_result.extracted_dir
)
diff --git a/nad_ch/infrastructure/task_queue.py b/nad_ch/infrastructure/task_queue.py
index 8b93649..7bcf5fe 100644
--- a/nad_ch/infrastructure/task_queue.py
+++ b/nad_ch/infrastructure/task_queue.py
@@ -6,6 +6,8 @@
report_to_dict,
report_from_dict,
)
+from typing import Optional
+from nad_ch.application.data_reader import DataReader
from nad_ch.application.interfaces import TaskQueue
from nad_ch.application.validation import get_feature_count, get_feature_details
from nad_ch.config import QUEUE_BROKER_URL, QUEUE_BACKEND_URL
diff --git a/poetry.lock b/poetry.lock
index ddfebe5..6a59df2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1230,6 +1230,66 @@ files = [
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
]
+[[package]]
+name = "pyyaml"
+version = "6.0.1"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
+ {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
+ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+ {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
+ {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
+ {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
+ {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
+ {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
+ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+ {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
+ {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
+ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+ {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
+ {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+ {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
+ {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
+ {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
+ {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
+ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+ {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
+ {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
+ {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
+ {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
+ {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
+ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+ {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
+ {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
+ {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
+ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
+]
+
[[package]]
name = "redis"
version = "5.0.1"
@@ -1517,4 +1577,4 @@ watchdog = ["watchdog (>=2.3)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
-content-hash = "7d22081fc1a7de6c76d28410e0c410cb621bcf6032dd4240b38a25075fffd2b1"
+content-hash = "c7423c2683e9c34dafbd4025104fa624ca6f6e72cafdf4bfb4a0767ec7cc69ee"
diff --git a/pyproject.toml b/pyproject.toml
index 923e73a..e8b1a90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ pandas = "^2.2.0"
geopandas = "^0.14.2"
pyarrow = "^15.0.0"
shapely = "^2.0.2"
+pyyaml = "^6.0.1"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.2"
diff --git a/tests/application/test_data_reader.py b/tests/application/test_data_reader.py
new file mode 100644
index 0000000..c27c49d
--- /dev/null
+++ b/tests/application/test_data_reader.py
@@ -0,0 +1,66 @@
+import os
+from nad_ch.application.data_reader import DataReader
+from tests.test_data.config_baselines import (
+ EXPECTED_DEFAULT_CONFIG,
+ TESTPROVIDER1_CONFIG,
+)
+import pickle
+from pandas.testing import assert_frame_equal
+
+TEST_DATA_DIR = "tests/test_data"
+
+
+def test_read_column_map():
+ reader = DataReader()
+ assert (
+ reader.column_map["data_required_fields"]
+ == EXPECTED_DEFAULT_CONFIG["data_required_fields"]
+ )
+ assert (
+ reader.column_map["data_column_mapping"]
+ == EXPECTED_DEFAULT_CONFIG["data_column_mapping"]
+ )
+
+ reader = DataReader("testprovider1")
+ assert (
+ reader.column_map["data_required_fields"]
+ == EXPECTED_DEFAULT_CONFIG["data_required_fields"]
+ )
+ assert (
+ reader.column_map["data_column_mapping"]
+ != EXPECTED_DEFAULT_CONFIG["data_column_mapping"]
+ )
+ assert (
+ reader.column_map["data_column_mapping"]
+ == TESTPROVIDER1_CONFIG["data_column_mapping"]
+ )
+
+
+def test_read_file_in_batches_shape():
+ file_path = os.path.join(
+ TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp"
+ )
+ reader = DataReader("testprovider2")
+ i = 0
+ for gdf in reader.read_file_in_batches(path=file_path, batch_size=50):
+ baseline_path = os.path.join(
+ TEST_DATA_DIR, f"shapefiles/baselines/usa-major-cities-gdf-{i}.pkl"
+ )
+ with open(baseline_path, "rb") as f:
+ gdf_baseline = pickle.load(f)
+ assert_frame_equal(gdf, gdf_baseline)
+ i += 1
+
+
+def test_read_file_in_batches_gdb():
+ file_path = os.path.join(TEST_DATA_DIR, "geodatabases/Naperville.gdb")
+ reader = DataReader("testprovider1")
+ i = 0
+ for gdf in reader.read_file_in_batches(path=file_path, batch_size=2000):
+ baseline_path = os.path.join(
+ TEST_DATA_DIR, f"geodatabases/baselines/naperville-gdf-{i}.pkl"
+ )
+ with open(baseline_path, "rb") as f:
+ gdf_baseline = pickle.load(f)
+ assert_frame_equal(gdf, gdf_baseline)
+ i += 1
diff --git a/tests/test_data/config_baselines.py b/tests/test_data/config_baselines.py
new file mode 100644
index 0000000..db003ec
--- /dev/null
+++ b/tests/test_data/config_baselines.py
@@ -0,0 +1,111 @@
+EXPECTED_DEFAULT_CONFIG = {
+ "data_required_fields": [
+ "Add_Number",
+ "AddNo_Full",
+ "St_Name",
+ "StNam_Full",
+ "County",
+ "Inc_Muni",
+ "Post_City",
+ "State",
+ "UUID",
+ "AddAuth",
+ "Longitude",
+ "Latitude",
+ "NatGrid",
+ "Placement",
+ "AddrPoint",
+ "DateUpdate",
+ "NAD_Source",
+ "DataSet_ID",
+ ],
+ "data_column_mapping": {
+ "AddNum_Pre": ["ANUMBERPRE"],
+ "Add_Number": ["ANUMBER"],
+ "AddNum_Suf": ["ANUMBERSUF"],
+ "AddNo_Full": ["ADR_NUM_COMP"],
+ "St_PreMod": ["ST_PRE_MOD"],
+ "St_PreDir": ["ST_PRE_DIR"],
+ "St_PreTyp": ["ST_PRE_TYP"],
+ "St_PreSep": ["ST_PRE_SEP"],
+ "St_Name": None,
+ "St_PosTyp": ["ST_POS_TYP"],
+ "St_PosDir": ["ST_POS_DIR"],
+ "St_PosMod": ["ST_POS_MOD"],
+ "StNam_Full": ["ST_FULNAM"],
+ "Building": None,
+ "Floor": None,
+ "Unit": None,
+ "Room": None,
+ "Seat": None,
+ "Addtl_Loc": None,
+ "SubAddress": None,
+ "LandmkName": ["LANDMARK"],
+ "County": ["CO_NAME"],
+ "Inc_Muni": None,
+ "Post_City": ["Post_Comm", "POSTCOMM"],
+ "Census_Plc": None,
+ "Uninc_Comm": None,
+ "Nbrhd_Comm": None,
+ "NatAmArea": None,
+ "NatAmSub": None,
+ "Urbnztn_PR": None,
+ "PlaceOther": None,
+ "State": None,
+ "Zip_Code": ["Post_Code", "ZIP"],
+ "Plus_4": ["Post_Code4", "ZIP4"],
+ "UUID": ["GlobalID"],
+ "AddAuth": ["DiscrpAgID", "AAUTHORITY"],
+ "AddrRefSys": None,
+ "Longitude": ["Long", "LONGITUDE"],
+ "Latitude": ["Lat", "LATITUDE"],
+ "NatGrid": ["USNG_CODE"],
+ "Elevation": ["Elev"],
+ "Placement": ["PLACE_LOC"],
+ "AddrPoint": None,
+ "Related_ID": None,
+ "RelateType": None,
+ "ParcelSrc": None,
+ "Parcel_ID": ["STATE_PIN"],
+ "AddrClass": None,
+ "Lifecycle": ["STATUS"],
+ "Effective": ["EFF_DATE"],
+ "Expire": ["RET_DATE"],
+ "DateUpdate": ["EDIT_DATE"],
+ "AnomStatus": ["VERROR_911"],
+ "LocatnDesc": ["LOC_DESC"],
+ "Addr_Type": ["Place_Type"],
+ "PlaceNmTyp": None,
+ "DeliverTyp": None,
+ "NAD_Source": None,
+ "DataSet_ID": ["Site_NGUID", "ADD_ID"],
+ },
+}
+
+TESTPROVIDER1_CONFIG = {
+ "data_column_mapping": {
+ "COL_0": ["ID"],
+ "COL_1": ["STCOFIPS"],
+ "COL_10": ["HISPPOP"],
+ "COL_11": ["AMERIND"],
+ "COL_12": ["ASIAN"],
+ "COL_13": ["PACIFIC"],
+ "COL_14": ["RACE2UP"],
+ "COL_15": ["OTHRACE"],
+ "COL_16": ["LASTUPDATE"],
+ "COL_17": ["LASTEDITOR"],
+ "COL_18": ["AGEMAJOR"],
+ "COL_19": ["AREASQMETER"],
+ "COL_2": ["TRACT"],
+ "COL_20": ["Shape_Length"],
+ "COL_21": ["Shape_Area"],
+ "COL_22": ["geometry"],
+ "COL_3": ["STFID"],
+ "COL_4": ["BLOCK"],
+ "COL_5": ["TOTPOP"],
+ "COL_6": ["POPDENS"],
+ "COL_7": ["RACEBASE"],
+ "COL_8": ["WHITE"],
+ "COL_9": ["BLACK"],
+ }
+}
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx
new file mode 100644
index 0000000..5dc4379
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist
new file mode 100644
index 0000000..6a5ab8e
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes
new file mode 100644
index 0000000..b02aa75
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable
new file mode 100644
index 0000000..141a2a7
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx
new file mode 100644
index 0000000..b3c26d8
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable
new file mode 100644
index 0000000..a0af90e
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx
new file mode 100644
index 0000000..7c12c56
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes
new file mode 100644
index 0000000..58df68d
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable
new file mode 100644
index 0000000..4f97dd3
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx
new file mode 100644
index 0000000..c8868b6
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx
new file mode 100644
index 0000000..9ebb2b2
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx
new file mode 100644
index 0000000..3c4b1db
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx
new file mode 100644
index 0000000..b124269
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist
new file mode 100644
index 0000000..7256c27
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes
new file mode 100644
index 0000000..a4f334d
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable
new file mode 100644
index 0000000..383c576
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx
new file mode 100644
index 0000000..26641b3
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx
new file mode 100644
index 0000000..e4498ac
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx
new file mode 100644
index 0000000..7624c03
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx
new file mode 100644
index 0000000..39aabd1
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx
new file mode 100644
index 0000000..08c32a8
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes
new file mode 100644
index 0000000..bc88709
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable
new file mode 100644
index 0000000..be05104
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx
new file mode 100644
index 0000000..39bb502
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx
new file mode 100644
index 0000000..44e0b2d
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx
new file mode 100644
index 0000000..2d24657
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx
new file mode 100644
index 0000000..650c255
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx
new file mode 100644
index 0000000..bef6804
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist
new file mode 100644
index 0000000..81a4eeb
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes
new file mode 100644
index 0000000..c608a88
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable
new file mode 100644
index 0000000..662a1d7
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx
new file mode 100644
index 0000000..9ea17c0
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx
new file mode 100644
index 0000000..7b44805
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx
new file mode 100644
index 0000000..3f5da5b
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx
new file mode 100644
index 0000000..f7311a3
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx
new file mode 100644
index 0000000..d9d5c8b
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx
new file mode 100644
index 0000000..46066e6
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx
new file mode 100644
index 0000000..67b9370
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes
new file mode 100644
index 0000000..2a98c93
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable
new file mode 100644
index 0000000..bf0447a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx
new file mode 100644
index 0000000..9c84c42
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes
new file mode 100644
index 0000000..c9d0caa
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable
new file mode 100644
index 0000000..3d67b11
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx
new file mode 100644
index 0000000..7cd1a47
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx b/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx
new file mode 100644
index 0000000..ea86f2d
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes
new file mode 100644
index 0000000..cc24e2a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable
new file mode 100644
index 0000000..86ac447
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx
new file mode 100644
index 0000000..be033c2
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx
new file mode 100644
index 0000000..f98218c
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx
new file mode 100644
index 0000000..de635c1
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes
new file mode 100644
index 0000000..eb57873
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable
new file mode 100644
index 0000000..bbd1f6c
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx
new file mode 100644
index 0000000..7d562f5
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx
new file mode 100644
index 0000000..41dbd6a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes
new file mode 100644
index 0000000..cc24e2a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable
new file mode 100644
index 0000000..0827759
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx
new file mode 100644
index 0000000..36c2d2a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx
new file mode 100644
index 0000000..e19a4ab
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx
new file mode 100644
index 0000000..955d764
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes
new file mode 100644
index 0000000..72e7357
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable
new file mode 100644
index 0000000..3ee878e
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx
new file mode 100644
index 0000000..8624824
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx
new file mode 100644
index 0000000..3f5f4bb
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes
new file mode 100644
index 0000000..cc24e2a
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable
new file mode 100644
index 0000000..0839460
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx
new file mode 100644
index 0000000..740fc4b
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx
new file mode 100644
index 0000000..7b778c7
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes
new file mode 100644
index 0000000..c9d0caa
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable
new file mode 100644
index 0000000..1e56ea4
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx
new file mode 100644
index 0000000..329b9ef
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx
new file mode 100644
index 0000000..a457ee9
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/gdb b/tests/test_data/geodatabases/Naperville.gdb/gdb
new file mode 100644
index 0000000..a786e12
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/gdb differ
diff --git a/tests/test_data/geodatabases/Naperville.gdb/timestamps b/tests/test_data/geodatabases/Naperville.gdb/timestamps
new file mode 100644
index 0000000..f1c7f81
Binary files /dev/null and b/tests/test_data/geodatabases/Naperville.gdb/timestamps differ
diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl b/tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl
new file mode 100644
index 0000000..6ddbb98
Binary files /dev/null and b/tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl differ
diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl b/tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl
new file mode 100644
index 0000000..8c2f489
Binary files /dev/null and b/tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl differ
diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl b/tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl
new file mode 100644
index 0000000..a279306
Binary files /dev/null and b/tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl differ
diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl b/tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl
new file mode 100644
index 0000000..aa72e75
Binary files /dev/null and b/tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl differ
diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl
new file mode 100644
index 0000000..7cc9cd1
Binary files /dev/null and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl differ
diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl
new file mode 100644
index 0000000..0ba3298
Binary files /dev/null and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl differ
diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl
new file mode 100644
index 0000000..326bbaf
Binary files /dev/null and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl differ
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf
new file mode 100644
index 0000000..8cd4759
Binary files /dev/null and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf differ
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj
new file mode 100644
index 0000000..f45cbad
--- /dev/null
+++ b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj
@@ -0,0 +1 @@
+GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
\ No newline at end of file
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn
new file mode 100644
index 0000000..8a33f07
Binary files /dev/null and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn differ
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx
new file mode 100644
index 0000000..4f981e1
Binary files /dev/null and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx differ
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp
new file mode 100644
index 0000000..bb96ae2
Binary files /dev/null and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp differ
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml
new file mode 100644
index 0000000..1907432
--- /dev/null
+++ b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml
@@ -0,0 +1 @@
+{BEE2416A-EC89-428E-AC85-A316F275D0D9}2006042112570600FALSE20130819115922002013081911592200CopyFeatures "C:\arcgis\Samples Net\Data\USZipCodeData\ZipCode_Boundaries_US_Major_Cities.shp" C:\Data\Data\Representations.gdb\ZipCode_Boundaries_US_Major_Cities # 0 0 0US_Major_Cities0020.000file://\\JIBBERJABBER\C$\WORK\GitHub\arcgis-runtime-samples-data\data\shapefiles\US_Major_Cities.shpLocal Area NetworkGeographicGCS_WGS_1984Angular Unit: Degree (0.017453)<GeographicCoordinateSystem xsi:type='typens:GeographicCoordinateSystem' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema' xmlns:typens='http://www.esri.com/schemas/ArcGIS/10.1'><WKT>GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433],AUTHORITY["EPSG",4326]]</WKT><XOrigin>-400</XOrigin><YOrigin>-400</YOrigin><XYScale>11258999068426.238</XYScale><ZOrigin>-100000</ZOrigin><ZScale>10000</ZScale><MOrigin>-100000</MOrigin><MScale>10000</MScale><XYTolerance>8.983152841195215e-009</XYTolerance><ZTolerance>0.001</ZTolerance><MTolerance>0.001</MTolerance><HighPrecision>true</HighPrecision><LeftLongitude>-180</LeftLongitude><WKID>4326</WKID><LatestWKID>4326</LatestWKID></GeographicCoordinateSystem>Microsoft Windows XP Version 5.1 (Build 2600) Service Pack 2; ESRI ArcCatalog 9.2.0.1170enREQUIRED: A brief narrative summary of the data set.REQUIRED: A summary of the intentions with which the data set was developed.REQUIRED: The name of an organization or individual that developed the data set.REQUIRED: The date when the data set is published or otherwise made available for release.ZipCode_Boundaries_US_Major_CitiesZipCode_Boundaries_US_Major_Citiesvector digital data\\YARO4525\C$\Data\Data\Representations.gdbREQUIRED: The basis on which the time period of content information is determined.REQUIRED: The year (and optionally month, or month and day) for which the data set corresponds to the ground.REQUIRED: The frequency with which changes and additions are made to the data set after the initial data set is completed.REQUIRED: Western-most coordinate of the limit of coverage expressed in longitude.REQUIRED: Eastern-most coordinate of the limit of coverage expressed in longitude.REQUIRED: Northern-most coordinate of the limit of coverage expressed in latitude.REQUIRED: Southern-most coordinate of the limit of coverage expressed in latitude.REQUIRED: Reference to a formally registered thesaurus or a similar authoritative source of theme keywords.REQUIRED: Common-use word or phrase used to describe the subject of the data set.REQUIRED: Restrictions and legal prerequisites for accessing the data set.REQUIRED: Restrictions and legal prerequisites for using the data set after access is granted.File Geodatabase Feature Class Version 6.2 (Build 9200) ; Esri ArcGIS 10.2.0.3348US_Major_CitiesenFGDC Content Standards for Digital Geospatial MetadataFGDC-STD-001-1998local timeREQUIRED: The person responsible for the metadata information.REQUIRED: The organization responsible for the metadata information.REQUIRED: The mailing and/or physical address for the organization or individual.REQUIRED: The city of the address.REQUIRED: The state or province of the address.REQUIRED: The ZIP or other postal code of the address.REQUIRED: The telephone number by which individuals can speak to the organization or individual.20060421ISO 19115 Geographic Information - MetadataDIS_ESRI1.0datasetDownloadable Data002file://\\YARO4525\C$\Data\Data\Representations.gdbLocal Area NetworkFile Geodatabase Feature ClassShapefile0.000VectorSimpleFALSE0FALSEFALSEGCS_WGS_1984Decimal degrees0.0000000.000000D_WGS_1984WGS_19846378137.000000298.257224Explicit elevation coordinate included with horizontal coordinates0.000100GCS_WGS_1984EPSG8.1.10US_Major_CitiesFeature Class0FIDFIDOID400Internal feature number.EsriSequential unique whole numbers that are automatically generated.ShapeShapeGeometry000Feature geometry.ESRICoordinates defining the features.NAMENAMEString4000STSTString200ZIPZIPString500RuleIDRuleIDInteger99020130819
diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx
new file mode 100644
index 0000000..2968ef1
Binary files /dev/null and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx differ