diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..7e9c174 --- /dev/null +++ b/conftest.py @@ -0,0 +1,17 @@ +from tests.fixtures import * +from tests.test_data.baselines import * +from nad_ch.config import QUEUE_BACKEND_URL, QUEUE_BROKER_URL + + +pytest_plugins = ("celery.contrib.pytest", ) + +@pytest.fixture(scope="session") +def celery_config(): + return { + "broker_url": QUEUE_BROKER_URL, + "result_backend": QUEUE_BACKEND_URL, + "broker_connection_retry": True, + "broker_connection_retry_delay": 5, + "broker_connection_retry_max": 3, + "broker_connection_retry_on_startup": True + } diff --git a/nad_ch/application/interfaces.py b/nad_ch/application/interfaces.py index 2cdc1c1..87516a0 100644 --- a/nad_ch/application/interfaces.py +++ b/nad_ch/application/interfaces.py @@ -1,9 +1,10 @@ -from typing import Optional, Protocol +from typing import Optional, Protocol, Dict from nad_ch.application.dtos import DownloadResult from nad_ch.domain.repositories import ( DataProducerRepository, DataSubmissionRepository, UserRepository, + ColumnMapRepository, ) @@ -38,7 +39,7 @@ def run_load_and_validate( submissions: DataSubmissionRepository, submission_id: int, path: str, - config_name: str, + column_map: Dict[str, str], ): ... @@ -78,6 +79,10 @@ def submissions(self) -> DataSubmissionRepository: def users(self) -> UserRepository: return self._users + @property + def column_maps(self) -> ColumnMapRepository: + return self._column_maps + @property def logger(self) -> Logger: return self._logger diff --git a/nad_ch/application/use_cases/data_submissions.py b/nad_ch/application/use_cases/data_submissions.py index a73ecda..82229b1 100644 --- a/nad_ch/application/use_cases/data_submissions.py +++ b/nad_ch/application/use_cases/data_submissions.py @@ -70,7 +70,9 @@ def list_data_submissions_by_producer( return get_view_model(submissions) -def validate_data_submission(ctx: ApplicationContext, filename: str, config_name: str): +def validate_data_submission( + ctx: ApplicationContext, filename: str, column_map_name: str +): submission = ctx.submissions.get_by_filename(filename) if not submission: ctx.logger.error("Data submission with that filename does not exist") @@ -81,8 +83,14 @@ def validate_data_submission(ctx: ApplicationContext, filename: str, config_name ctx.logger.error("Data extration error") return + # Using version 1 for column maps for now, may add feature for user to select + # version later + column_map = ctx.column_maps.get_by_name_and_version(column_map_name, 1) report = ctx.task_queue.run_load_and_validate( - ctx.submissions, submission.id, download_result.extracted_dir, config_name + ctx.submissions, + submission.id, + download_result.extracted_dir, + column_map.mapping, ) ctx.logger.info(f"Total number of features: {report.overview.feature_count}") diff --git a/nad_ch/application/validation.py b/nad_ch/application/validation.py index 8c6b984..a4d9112 100644 --- a/nad_ch/application/validation.py +++ b/nad_ch/application/validation.py @@ -23,14 +23,14 @@ def get_features_flagged(features: Dict[str, DataSubmissionReportFeature]) -> in def initialize_overview_details( - gdf: GeoDataFrame, column_maps: Dict[str, str] + gdf: GeoDataFrame, column_map: Dict[str, str] ) -> Tuple[DataSubmissionReportOverview, Dict[str, DataSubmissionReportFeature]]: report_overview = DataSubmissionReportOverview(feature_count=get_feature_count(gdf)) report_features = { nad_name: DataSubmissionReportFeature( provided_feature_name=provided_name, nad_feature_name=nad_name ) - for provided_name, nad_name in column_maps.items() + for provided_name, nad_name in column_map.items() } return report_overview, report_features @@ -60,8 +60,8 @@ def update_overview_details( def finalize_overview_details( - features: Dict[str, DataSubmissionReportFeature], overview: DataSubmissionReportOverview, + features: Dict[str, DataSubmissionReportFeature], ) -> DataSubmissionReportOverview: overview.features_flagged += get_features_flagged(features) # TODO: Add logic for etl_update_required & data_update_required diff --git a/nad_ch/config/development_local.py b/nad_ch/config/development_local.py index 4dea222..6092b48 100644 --- a/nad_ch/config/development_local.py +++ b/nad_ch/config/development_local.py @@ -7,6 +7,7 @@ SqlAlchemyDataProducerRepository, SqlAlchemyDataSubmissionRepository, SqlAlchemyUserRepository, + SqlAlchemyColumnMapRepository, ) from nad_ch.infrastructure.auth import AuthenticationImplementation from nad_ch.infrastructure.logger import BasicLogger @@ -46,6 +47,7 @@ def __init__(self): self._producers = self.create_producer_repository() self._submissions = self.create_submission_repository() self._users = self.create_user_repository() + self._column_maps = self.create_column_map_repository() self._logger = self.create_logger() self._storage = self.create_storage() self._task_queue = self.create_task_queue() @@ -60,6 +62,9 @@ def create_submission_repository(self): def create_user_repository(self): return SqlAlchemyUserRepository(self._session_factory) + def create_column_map_repository(self): + return SqlAlchemyColumnMapRepository(self._session_factory) + def create_logger(self): return BasicLogger(__name__, logging.DEBUG) diff --git a/nad_ch/config/development_remote.py b/nad_ch/config/development_remote.py index 73222a2..88e70ac 100644 --- a/nad_ch/config/development_remote.py +++ b/nad_ch/config/development_remote.py @@ -7,6 +7,7 @@ SqlAlchemyDataProducerRepository, SqlAlchemyDataSubmissionRepository, SqlAlchemyUserRepository, + SqlAlchemyColumnMapRepository, ) from nad_ch.infrastructure.auth import AuthenticationImplementation from nad_ch.infrastructure.logger import BasicLogger @@ -45,6 +46,7 @@ def __init__(self): self._producers = self.create_producer_repository() self._submissions = self.create_submission_repository() self._users = self.create_user_repository() + self._column_maps = self.create_column_map_repository() self._logger = self.create_logger() self._storage = self.create_storage() self._task_queue = self.create_task_queue() @@ -59,6 +61,9 @@ def create_submission_repository(self): def create_user_repository(self): return SqlAlchemyUserRepository(self._session_factory) + def create_column_map_repository(self): + return SqlAlchemyColumnMapRepository(self._session_factory) + def create_logger(self): return BasicLogger(__name__) diff --git a/nad_ch/config/test.py b/nad_ch/config/test.py index baa600a..0832cb8 100644 --- a/nad_ch/config/test.py +++ b/nad_ch/config/test.py @@ -8,6 +8,7 @@ FakeDataProducerRepository, FakeDataSubmissionRepository, FakeUserRepository, + FakeColumnMapRepository, FakeStorage, ) @@ -23,6 +24,7 @@ def __init__(self): self._producers = self.create_producer_repository() self._submissions = self.create_submission_repository() self._users = self.create_user_repository() + self._column_maps = self.create_column_map_repository() self._logger = self.create_logger() self._storage = self.create_storage() self._task_queue = self.create_task_queue() @@ -37,6 +39,9 @@ def create_submission_repository(self): def create_user_repository(self): return FakeUserRepository() + def create_column_map_repository(self): + return FakeColumnMapRepository() + def create_logger(self): return BasicLogger(__name__, logging.DEBUG) diff --git a/nad_ch/domain/repositories.py b/nad_ch/domain/repositories.py index 71ff660..b649ce7 100644 --- a/nad_ch/domain/repositories.py +++ b/nad_ch/domain/repositories.py @@ -41,6 +41,9 @@ def get_by_email(self, email: str) -> Optional[User]: def get_by_id(self, id: int) -> Optional[User]: ... + def get_all(self) -> Iterable[User]: + ... + class ColumnMapRepository(Protocol): def add(self, column_map: ColumnMap) -> ColumnMap: diff --git a/nad_ch/infrastructure/database.py b/nad_ch/infrastructure/database.py index 5d15d90..b56c0bc 100644 --- a/nad_ch/infrastructure/database.py +++ b/nad_ch/infrastructure/database.py @@ -111,7 +111,7 @@ def from_entity(submission: DataSubmission, producer_id: int, column_map_id: int def to_entity(self): producer = self.data_producer.to_entity() - column_map = self.column_map.to_entity(producer) + column_map = self.column_map.to_entity() entity = DataSubmission( id=self.id, filename=self.filename, @@ -190,13 +190,14 @@ def from_entity(column_map: ColumnMap, producer_id: int): ) return model - def to_entity(self, producer: DataProducer): + def to_entity(self): + producer_entity = self.data_producer.to_entity() entity = ColumnMap( id=self.id, name=self.name, version_id=self.version_id, mapping=self.mapping, - producer=producer, + producer=producer_entity, ) if self.created_at is not None: @@ -290,32 +291,32 @@ def get_by_producer(self, producer: DataProducer) -> List[DataSubmission]: def get_by_filename(self, filename: str) -> Optional[DataSubmission]: with session_scope(self.session_factory) as session: - result = ( - session.query(DataSubmissionModel, DataProducerModel) - .join( - DataProducerModel, - DataProducerModel.id == DataSubmissionModel.data_producer_id, - ) + submission_model = ( + session.query(DataSubmissionModel) .filter(DataSubmissionModel.filename == filename) .first() ) - if result: - submission_model, producer_model = result - return submission_model.to_entity(producer_model.to_entity()) + if submission_model: + return submission_model.to_entity() else: return None def update_report(self, id: int, report) -> None: with session_scope(self.session_factory) as session: - model_instance = ( + submission_model = ( session.query(DataSubmissionModel) .filter(DataSubmissionModel.id == id) .first() ) - if model_instance: - model_instance.report = report + if submission_model: + submission_model.report = report + session.commit() + session.refresh(submission_model) + return submission_model.to_entity() + else: + return None class SqlAlchemyUserRepository(UserRepository): @@ -350,6 +351,12 @@ def get_by_id(self, id: int) -> Optional[User]: else: return None + def get_all(self) -> List[User]: + with session_scope(self.session_factory) as session: + user_models = session.query(UserModel).all() + user_entities = [user.to_entity() for user in user_models] + return user_entities + class SqlAlchemyColumnMapRepository(ColumnMapRepository): def __init__(self, session_factory): @@ -366,8 +373,7 @@ def add(self, column_map: ColumnMap) -> ColumnMap: session.add(column_map_model) session.commit() session.refresh(column_map_model) - producer_model_entity = producer_model.to_entity() - return column_map_model.to_entity(producer_model_entity) + return column_map_model.to_entity() def get_all(self) -> List[ColumnMap]: with session_scope(self.session_factory) as session: @@ -387,10 +393,7 @@ def get_by_data_submission( .first() ) if submission_model: - producer_entity = submission_model.producer.to_entity() - column_map_entity = submission_model.column_map.to_entity( - producer_entity - ) + column_map_entity = submission_model.column_map.to_entity() return column_map_entity else: return None @@ -406,8 +409,7 @@ def get_by_name_and_version( ) .first() ) - producer_entity = column_map_model.data_producer.to_entity() if column_map_model: - return column_map_model.to_entity(producer_entity) + return column_map_model.to_entity() else: return None diff --git a/nad_ch/infrastructure/task_queue.py b/nad_ch/infrastructure/task_queue.py index ed54ec0..8aac8b1 100644 --- a/nad_ch/infrastructure/task_queue.py +++ b/nad_ch/infrastructure/task_queue.py @@ -14,10 +14,17 @@ ) from nad_ch.config import QUEUE_BROKER_URL, QUEUE_BACKEND_URL from nad_ch.domain.repositories import DataSubmissionRepository +from typing import Dict celery_app = Celery( - "redis-task-queue", broker=QUEUE_BROKER_URL, backend=QUEUE_BACKEND_URL + "redis-task-queue", + broker=QUEUE_BROKER_URL, + backend=QUEUE_BACKEND_URL, + broker_connection_retry=True, # Enable broker connection retry + broker_connection_retry_delay=5, # Optional: retry delay in seconds + broker_connection_retry_max=3, # Optional: maximum number of retries + broker_connection_retry_on_startup=True, # Enable retry on startup ) @@ -31,13 +38,13 @@ @celery_app.task -def load_and_validate(gdb_file_path: str, config_name: str) -> dict: - data_reader = DataReader(config_name) +def load_and_validate(gdb_file_path: str, column_map: Dict[str, str]) -> dict: + data_reader = DataReader(column_map) first_batch = True for gdf in data_reader.read_file_in_batches(path=gdb_file_path): if first_batch: overview, feature_details = initialize_overview_details( - data_reader.valid_renames + gdf, data_reader.valid_renames ) feature_details = update_feature_details(gdf, feature_details) overview = update_overview_details(gdf, overview) @@ -56,9 +63,9 @@ def run_load_and_validate( submissions: DataSubmissionRepository, submission_id: int, path: str, - config_name: str, + column_map: Dict[str, str], ): - task_result = load_and_validate.apply_async(args=[path, config_name]) + task_result = load_and_validate.apply_async(args=[path, column_map]) report_dict = task_result.get() submissions.update_report(submission_id, report_dict) return report_from_dict(report_dict) diff --git a/scripts/seed.py b/scripts/seed.py index efd2cec..ce88032 100644 --- a/scripts/seed.py +++ b/scripts/seed.py @@ -32,7 +32,7 @@ def main(): ) ctx.users.add(new_user) - new_column_map = ColumnMap(name="New Jersey Mapping v1", producer=saved_producer) + # new_column_map = ColumnMap(name="New Jersey Mapping v1", producer=saved_producer) # TODO save column map once ApplicationContext can provide a repository # saved_column_map = ctx.column_maps.add(new_column_map) diff --git a/tests/application/test_data_reader.py b/tests/application/test_data_reader.py index eaa0d8d..91a9cc7 100644 --- a/tests/application/test_data_reader.py +++ b/tests/application/test_data_reader.py @@ -1,22 +1,19 @@ import os from nad_ch.application.data_reader import DataReader -from tests.test_data.config_baselines import ( +from conftest import ( EXPECTED_DEFAULT_CONFIG, - TESTPROVIDER1_CONFIG, - TESTPROVIDER2_CONFIG, + TESTPRODUCER1_CONFIG, + TESTPRODUCER2_CONFIG, ) import pickle from pandas.testing import assert_frame_equal import pytest -from tests.fixtures import * TEST_DATA_DIR = "tests/test_data" -def test_set_column_map(test_provider_column_maps): - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider1", 1 - ) +def test_set_column_map(producer_column_maps): + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) reader = DataReader(column_map_entity.mapping) assert ( @@ -25,16 +22,10 @@ def test_set_column_map(test_provider_column_maps): ) assert ( reader.column_map["data_column_mapping"] - != EXPECTED_DEFAULT_CONFIG["data_column_mapping"] - ) - assert ( - reader.column_map["data_column_mapping"] - == TESTPROVIDER1_CONFIG["data_column_mapping"] + == TESTPRODUCER1_CONFIG["data_column_mapping"] ) - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider2", 1 - ) + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer2", 1) reader = DataReader(column_map_entity.mapping) assert ( @@ -43,39 +34,29 @@ def test_set_column_map(test_provider_column_maps): ) assert ( reader.column_map["data_column_mapping"] - != EXPECTED_DEFAULT_CONFIG["data_column_mapping"] - ) - assert ( - reader.column_map["data_column_mapping"] - == TESTPROVIDER2_CONFIG["data_column_mapping"] + == TESTPRODUCER2_CONFIG["data_column_mapping"] ) -def test_validate_column_map(test_provider_column_maps): - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider1", 1 - ) +def test_validate_column_map(producer_column_maps): + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) reader = DataReader(column_map_entity.mapping) with pytest.raises(Exception) as exc: reader.validate_column_map() msg = "Duplicate inputs found for destination fields: COL_13 & COL_2, COL_5 & COL_6" assert str(exc.value) == msg - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider2", 1 - ) + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer2", 1) reader = DataReader(column_map_entity.mapping) # No error raised reader.validate_column_map() -def test_read_file_in_batches_shape(test_provider_column_maps): +def test_read_file_in_batches_shape(producer_column_maps): file_path = os.path.join( TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp" ) - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider2", 1 - ) + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer2", 1) reader = DataReader(column_map_entity.mapping) i = 0 for gdf in reader.read_file_in_batches(path=file_path, batch_size=50): @@ -88,11 +69,9 @@ def test_read_file_in_batches_shape(test_provider_column_maps): i += 1 -def test_read_file_in_batches_gdb(test_provider_column_maps): +def test_read_file_in_batches_gdb(producer_column_maps): file_path = os.path.join(TEST_DATA_DIR, "geodatabases/Naperville.gdb") - column_map_entity = test_provider_column_maps.get_by_name_and_version( - "testprovider1", 1 - ) + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) reader = DataReader(column_map_entity.mapping) i = 0 for gdf in reader.read_file_in_batches(path=file_path, batch_size=2000): diff --git a/tests/application/test_validation.py b/tests/application/test_validation.py index e0b8dab..76ad40c 100644 --- a/tests/application/test_validation.py +++ b/tests/application/test_validation.py @@ -122,5 +122,5 @@ def test_finalize_overview_details(): overview, features = initialize_overview_details(gdf, column_maps) features["St_Name"].null_count = 2 features["Floor"].invalid_domain_count = 7 - overview = finalize_overview_details(features, overview) + overview = finalize_overview_details(overview, features) assert overview.features_flagged == 2 diff --git a/tests/application/use_cases/test_data_submissions.py b/tests/application/use_cases/test_data_submissions.py index c055d6f..f2e9f72 100644 --- a/tests/application/use_cases/test_data_submissions.py +++ b/tests/application/use_cases/test_data_submissions.py @@ -12,6 +12,7 @@ ) from nad_ch.config import create_app_context from nad_ch.domain.repositories import DataSubmissionRepository +from typing import Dict @pytest.fixture(scope="function") @@ -44,10 +45,13 @@ def test_list_data_submissions_by_producer(app_context): assert isinstance(result[0], DataSubmissionViewModel) -def test_validate_data_submission(app_context, caplog): +def test_validate_data_submission(app_context, caplog, producer_column_maps): producer_name = "State X" add_data_producer(app_context, producer_name) + column_map = producer_column_maps.get_by_name_and_version("testproducer1") + app_context.column_maps.add(column_map) + filename = "my_cool_file.zip" ingest_data_submission(app_context, filename, producer_name) submission = app_context.submissions.get_by_id(1) @@ -58,13 +62,13 @@ def run_load_and_validate( submissions: DataSubmissionRepository, submission_id: int, path: str, - config_name: str, + column_map: Dict[str, str], ): return DataSubmissionReport( overview=DataSubmissionReportOverview(feature_count=1) ) app_context._task_queue = CustomMockTestTaskQueue() - config_name = "default" - validate_data_submission(app_context, submission.filename, config_name) + column_map_name = "testproducer1" + validate_data_submission(app_context, submission.filename, column_map_name) assert re.search(r"Total number of features: 1", caplog.text) diff --git a/tests/fakes_and_mocks.py b/tests/fakes_and_mocks.py index 0bb4d73..70678a9 100644 --- a/tests/fakes_and_mocks.py +++ b/tests/fakes_and_mocks.py @@ -1,11 +1,12 @@ from datetime import datetime -from typing import Optional +from typing import Optional, Iterable from nad_ch.application.dtos import DownloadResult -from nad_ch.domain.entities import DataProducer, DataSubmission, User +from nad_ch.domain.entities import DataProducer, DataSubmission, User, ColumnMap from nad_ch.domain.repositories import ( DataProducerRepository, DataSubmissionRepository, UserRepository, + ColumnMapRepository, ) import os @@ -73,6 +74,38 @@ def get_by_id(self, id: int) -> Optional[User]: return next((u for u in self._users if u.id == id), None) +class FakeColumnMapRepository(ColumnMapRepository): + def __init__(self) -> None: + self._column_maps = set() + self._next_id = 1 + + def add(self, column_map: ColumnMap) -> ColumnMap: + column_map.id = self._next_id + column_map.set_created_at(datetime.now()) + self._column_maps.add(column_map) + self._next_id += 1 + return column_map + + def get_all(self) -> Iterable[ColumnMap]: + return sorted(list(self._column_maps), key=lambda column_map: column_map.id) + + def get_by_data_submission( + self, data_submission: DataSubmission + ) -> Optional[ColumnMap]: + # Not needed for now but may need to implement later + pass + + def get_by_name_and_version(self, name: str, version: int) -> Optional[ColumnMap]: + return next( + ( + cm + for cm in self._column_maps + if cm.name == name and cm.version_id == version + ), + None, + ) + + class FakeStorage: def __init__(self): self._files = set() diff --git a/tests/fixtures.py b/tests/fixtures.py index f7cf5ab..4f5e042 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -5,7 +5,7 @@ import pathlib from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -from nad_ch.domain.entities import DataProducer, DataSubmission, ColumnMap +from nad_ch.domain.entities import DataProducer, DataSubmission, ColumnMap, User from nad_ch.infrastructure.database import ( ModelBase, SqlAlchemyDataProducerRepository, @@ -16,6 +16,7 @@ from nad_ch.config import DATABASE_URL BASE_PATH = pathlib.Path(__file__).parent.resolve() +TEST_COLUMN_MAPS_PATH = os.path.join(BASE_PATH, "test_data/column_maps") @pytest.fixture(scope="function") @@ -33,12 +34,12 @@ def column_maps(test_database): @pytest.fixture(scope="function") -def test_provider_column_maps(repositories): +def producer_column_maps(repositories): producers, _, column_maps, _ = repositories new_producer = DataProducer("Producer A") saved_producer = producers.add(new_producer) - test_column_maps_path = os.path.join(BASE_PATH, "test_data/column_maps") - for test_column_map_path in glob.glob(f"{test_column_maps_path}/*.yaml"): + + for test_column_map_path in sorted(glob.glob(f"{TEST_COLUMN_MAPS_PATH}/*.yaml")): column_map_name = os.path.splitext(os.path.basename(test_column_map_path))[0] with open(test_column_map_path, "r") as file: mapping = yaml.safe_load(file) @@ -48,12 +49,55 @@ def test_provider_column_maps(repositories): return column_maps +@pytest.fixture(scope="function") +def producer_column_maps_and_submissions(repositories): + producers, submissions, column_maps, _ = repositories + + column_map_entities, producer_entities = [], [] + for i, test_column_map_path in enumerate( + sorted(glob.glob(f"{TEST_COLUMN_MAPS_PATH}/*.yaml")) + ): + new_producer = DataProducer(f"Producer {i + 1}") + producer_entities.append(producers.add(new_producer)) + + column_map_name = os.path.splitext(os.path.basename(test_column_map_path))[0] + with open(test_column_map_path, "r") as file: + mapping = yaml.safe_load(file) + new_column_map = ColumnMap( + column_map_name, producer_entities[i], mapping, 1 + ) + column_map_entities.append(column_maps.add(new_column_map)) + + new_submission1 = DataSubmission( + "testproducer1-submission", producer_entities[0], column_map_entities[0] + ) + new_submission2 = DataSubmission( + "testproducer2-submission", producer_entities[1], column_map_entities[1] + ) + _ = submissions.add(new_submission1) + _ = submissions.add(new_submission2) + return column_maps, submissions + + @pytest.fixture(scope="function") def users(test_database): Session = sessionmaker(bind=test_database) return SqlAlchemyUserRepository(Session) +@pytest.fixture(scope="function") +def users_xyz(users): + user_names = ("X", "Y", "Z") + for name in user_names: + new_user = User( + email=f"user{name}@gmail.com", + login_provider=f"{name}.com", + logout_url=f"dummy logout value for {name}", + ) + users.add(new_user) + return users + + @pytest.fixture(scope="function") def submissions(test_database): Session = sessionmaker(bind=test_database) @@ -66,6 +110,15 @@ def producers(test_database): return SqlAlchemyDataProducerRepository(Session) +@pytest.fixture(scope="function") +def producers_xyz(producers): + producer_names = ("State X", "State Y", "State Z") + for producer_name in producer_names: + new_producer = DataProducer(producer_name) + producers.add(new_producer) + return producers + + @pytest.fixture(scope="function") def repositories(test_database): Session = sessionmaker(bind=test_database) diff --git a/tests/infrastructure/test_database.py b/tests/infrastructure/test_database.py index 34d3b98..3c1311e 100644 --- a/tests/infrastructure/test_database.py +++ b/tests/infrastructure/test_database.py @@ -1,40 +1,169 @@ -from nad_ch.domain.entities import DataProducer, DataSubmission, ColumnMap -from tests.fixtures import * +from conftest import TEST_COLUMN_MAPS_PATH +import yaml +from nad_ch.domain.entities import DataProducer, DataSubmission, ColumnMap, User -def test_add_data_producer_to_repository_and_get_by_name(producers): - producer_name = "State X" - new_producer = DataProducer(producer_name) +def test_add_producer(producers): + new_producer = DataProducer("Producer A") + saved_producer = producers.add(new_producer) + assert saved_producer.id == 1 + assert saved_producer.created_at is not None + assert saved_producer.updated_at is not None + assert saved_producer.name == "Producer A" + assert isinstance(saved_producer, DataProducer) - producers.add(new_producer) - retrieved_producer = producers.get_by_name(producer_name) +def test_producer_repo_get_by_name(producers_xyz): + producer_name = "State X" + + retrieved_producer = producers_xyz.get_by_name(producer_name) assert retrieved_producer.id == 1 assert retrieved_producer.created_at is not None assert retrieved_producer.updated_at is not None assert retrieved_producer.name == producer_name - assert isinstance(retrieved_producer, DataProducer) is True + assert isinstance(retrieved_producer, DataProducer) -def test_add_data_producer_and_then_data_submission(repositories): - producers, submissions, column_maps, users = repositories - producer_name = "State X" - new_producer = DataProducer(producer_name) - saved_producer = producers.add(new_producer) - new_column_map = ColumnMap("TestMap", saved_producer, version_id=1) - saved_column_map = column_maps.add(new_column_map) - new_submission = DataSubmission("some-file-name", saved_producer, saved_column_map) +def test_producer_repo_get_all(producers_xyz): + producer_names = ("State X", "State Y", "State Z") - result = submissions.add(new_submission) + retrieved_producers = producers_xyz.get_all() + assert len(retrieved_producers) == 3 + assert all(producer.id == i + 1 for i, producer in enumerate(retrieved_producers)) + assert all(producer.created_at is not None for producer in retrieved_producers) + assert all(producer.updated_at is not None for producer in retrieved_producers) + assert all( + producer.name == producer_name + for producer, producer_name in list(zip(retrieved_producers, producer_names)) + ) + assert isinstance(retrieved_producers[0], DataProducer) + + +def test_add_user(users): + new_user = User( + email="userY@gmail.com", + login_provider="Y.com", + logout_url="dummy logout value for Y", + ) + saved_user = users.add(new_user) + assert saved_user.id == 1 + assert saved_user.created_at is not None + assert saved_user.updated_at is not None + assert saved_user.email == "userY@gmail.com" + assert saved_user.login_provider == "Y.com" + assert saved_user.logout_url == "dummy logout value for Y" + assert isinstance(saved_user, User) + + +def test_user_repo_get_by_email(users_xyz): + retrieved_user = users_xyz.get_by_email("userY@gmail.com") + assert retrieved_user.id == 2 + assert retrieved_user.created_at is not None + assert retrieved_user.updated_at is not None + assert retrieved_user.email == "userY@gmail.com" + assert retrieved_user.login_provider == "Y.com" + assert retrieved_user.logout_url == "dummy logout value for Y" + assert isinstance(retrieved_user, User) - assert result.id == 1 - assert result.created_at is not None - assert result.updated_at is not None - assert result.producer.id == saved_producer.id - assert result.filename == "some-file-name" +def test_user_repo_get_by_id(users_xyz): + retrieved_user = users_xyz.get_by_id(3) + assert retrieved_user.id == 3 + assert retrieved_user.created_at is not None + assert retrieved_user.updated_at is not None + assert retrieved_user.email == "userZ@gmail.com" + assert retrieved_user.login_provider == "Z.com" + assert retrieved_user.logout_url == "dummy logout value for Z" + assert isinstance(retrieved_user, User) + + +def test_user_repo_get_all(users_xyz): + user_names = ("X", "Y", "Z") + retrieved_users = users_xyz.get_all() + assert len(retrieved_users) == 3 + assert all(user.id == i + 1 for i, user in enumerate(retrieved_users)) + assert all(user.created_at is not None for user in retrieved_users) + assert all(user.updated_at is not None for user in retrieved_users) + assert all( + user.email == f"user{user_name}@gmail.com" + for user, user_name in list(zip(retrieved_users, user_names)) + ) + assert all( + user.login_provider == f"{user_name}.com" + for user, user_name in list(zip(retrieved_users, user_names)) + ) + assert all( + user.logout_url == f"dummy logout value for {user_name}" + for user, user_name in list(zip(retrieved_users, user_names)) + ) + assert isinstance(retrieved_users[0], User) -def test_retrieve_a_list_of_submissions_by_producer(repositories): + +def test_add_column_map(column_maps, producers_xyz): + producer = producers_xyz.get_by_name("State X") + mapping = {"a": "1", "b": "2"} + column_map = ColumnMap( + name="testmap", + producer=producer, + mapping=mapping, + version_id=2, + ) + saved_column_map = column_maps.add(column_map) + assert saved_column_map.id == 1 + assert saved_column_map.created_at is not None + assert saved_column_map.updated_at is not None + assert saved_column_map.name == "testmap" + assert saved_column_map.version_id == 2 + assert saved_column_map.mapping == mapping + assert isinstance(saved_column_map, ColumnMap) + + +def test_column_map_get_all(producer_column_maps): + column_maps = producer_column_maps.get_all() + for i, column_map in enumerate(column_maps): + name = f"testproducer{i + 1}" + assert column_map.id == i + 1 + assert column_map.created_at is not None + assert column_map.updated_at is not None + assert column_map.name == name + assert column_map.version_id == 1 + test_column_map_path = f"{TEST_COLUMN_MAPS_PATH}/{name}.yaml" + with open(test_column_map_path, "r") as file: + mapping = yaml.safe_load(file) + assert column_map.mapping == mapping + assert isinstance(column_map, ColumnMap) + + +def test_column_map_get_by_data_submission(producer_column_maps_and_submissions): + column_maps, submissions = producer_column_maps_and_submissions + column_map1 = column_maps.get_by_data_submission(submissions.get_by_id(1)) + column_map2 = column_maps.get_by_data_submission(submissions.get_by_id(2)) + for i, column_map in enumerate((column_map1, column_map2)): + name = f"testproducer{i + 1}" + assert column_map.id == i + 1 + assert column_map.name == name + test_column_map_path = f"{TEST_COLUMN_MAPS_PATH}/{name}.yaml" + with open(test_column_map_path, "r") as file: + mapping = yaml.safe_load(file) + assert column_map.mapping == mapping + assert isinstance(column_map, ColumnMap) + + +def test_column_map_get_by_name_and_version(producer_column_maps): + column_map = producer_column_maps.get_by_name_and_version("testproducer1", 1) + assert column_map.id == 1 + assert column_map.created_at is not None + assert column_map.updated_at is not None + assert column_map.name == "testproducer1" + assert column_map.version_id == 1 + test_column_map_path = f"{TEST_COLUMN_MAPS_PATH}/testproducer1.yaml" + with open(test_column_map_path, "r") as file: + mapping = yaml.safe_load(file) + assert column_map.mapping == mapping + assert isinstance(column_map, ColumnMap) + + +def test_add_data_submission(repositories): producers, submissions, column_maps, users = repositories producer_name = "State X" new_producer = DataProducer(producer_name) @@ -42,12 +171,64 @@ def test_retrieve_a_list_of_submissions_by_producer(repositories): new_column_map = ColumnMap("TestMap", saved_producer, version_id=1) saved_column_map = column_maps.add(new_column_map) new_submission = DataSubmission("some-file-name", saved_producer, saved_column_map) - submissions.add(new_submission) - another_new_submission = DataSubmission( - "some-other-file-name", saved_producer, saved_column_map - ) - submissions.add(another_new_submission) - submissions = submissions.get_by_producer(saved_producer) + saved_submission = submissions.add(new_submission) + + assert saved_submission.id == 1 + assert saved_submission.created_at is not None + assert saved_submission.updated_at is not None + assert saved_submission.producer.id == saved_producer.id + assert saved_submission.filename == "some-file-name" + + +def test_data_submission_get_by_id(producer_column_maps_and_submissions): + _, submissions = producer_column_maps_and_submissions + submission = submissions.get_by_id(2) + assert submission.id == 2 + assert submission.created_at is not None + assert submission.updated_at is not None + assert submission.report is None + assert submission.producer.id == 2 + assert submission.column_map.id == 2 + assert submission.filename == "testproducer2-submission" + + +def test_data_submission_get_by_producer(producer_column_maps_and_submissions): + column_maps, submissions = producer_column_maps_and_submissions + producer = column_maps.get_all()[1].producer + submissions_entities = submissions.get_by_producer(producer) + + assert len(submissions_entities) == 1 + submission = submissions_entities[0] + assert submission.id == 2 + assert submission.created_at is not None + assert submission.updated_at is not None + assert submission.report is None + assert submission.producer.id == 2 + assert submission.column_map.id == 2 + assert submission.filename == "testproducer2-submission" + + +def test_data_submission_get_by_filename(producer_column_maps_and_submissions): + _, submissions = producer_column_maps_and_submissions + filename = "testproducer2-submission" + submissions_entity = submissions.get_by_filename(filename) + assert submissions_entity.id == 2 + assert submissions_entity.filename == filename + + filename = "testproducer1-submission" + submissions_entity = submissions.get_by_filename(filename) + assert submissions_entity.id == 1 + assert submissions_entity.filename == filename + + +def test_data_submission_update_report(producer_column_maps_and_submissions): + _, submissions = producer_column_maps_and_submissions + submission = submissions.get_by_id(2) + assert submission.report is None + assert submission.filename == "testproducer2-submission" - assert len(submissions) == 2 + new_report = {"a": 1, "c": 2} + submission = submissions.update_report(submission.id, new_report) + assert submission.report == new_report + assert submission.filename == "testproducer2-submission" diff --git a/tests/infrastructure/test_task_queue.py b/tests/infrastructure/test_task_queue.py new file mode 100644 index 0000000..5f2332d --- /dev/null +++ b/tests/infrastructure/test_task_queue.py @@ -0,0 +1,29 @@ +import os +from nad_ch.infrastructure.task_queue import load_and_validate +from tests.application.test_data_reader import TEST_DATA_DIR +from conftest import NAPERVILLE_GDB_REPORT, MAJOR_CITIES_SHP_REPORT +import geopandas as gpd +import random +import numpy as np + + +def test_load_and_validate_testprovider1( + celery_worker, celery_app, producer_column_maps +): + column_map = producer_column_maps.get_by_name_and_version("testproducer1", 1) + file_path = os.path.join(TEST_DATA_DIR, "geodatabases/Naperville.gdb") + task_result = load_and_validate.delay(file_path, column_map.mapping) + report_dict = task_result.get() + assert report_dict == NAPERVILLE_GDB_REPORT + + +def test_load_and_validate_testprovider2( + celery_worker, celery_app, producer_column_maps +): + column_map = producer_column_maps.get_by_name_and_version("testproducer2", 1) + file_path = os.path.join( + TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp" + ) + task_result = load_and_validate.delay(file_path, column_map.mapping) + report_dict = task_result.get() + assert report_dict == MAJOR_CITIES_SHP_REPORT diff --git a/tests/test_data/baselines.py b/tests/test_data/baselines.py new file mode 100644 index 0000000..cd9997f --- /dev/null +++ b/tests/test_data/baselines.py @@ -0,0 +1,337 @@ +EXPECTED_DEFAULT_CONFIG = { + "data_required_fields": [ + "Add_Number", + "AddNo_Full", + "St_Name", + "StNam_Full", + "County", + "Inc_Muni", + "Post_City", + "State", + "UUID", + "AddAuth", + "Longitude", + "Latitude", + "NatGrid", + "Placement", + "AddrPoint", + "DateUpdate", + "NAD_Source", + "DataSet_ID", + ] +} + +TESTPRODUCER1_CONFIG = { + "data_column_mapping": { + "COL_0": ["ID"], + "COL_1": ["STCOFIPS"], + "COL_10": ["HISPPOP"], + "COL_11": ["AMERIND"], + "COL_12": ["ASIAN"], + "COL_13": ["PACIFIC"], + "COL_14": ["RACE2UP"], + "COL_15": ["OTHRACE"], + "COL_16": ["LASTUPDATE"], + "COL_17": ["LASTEDITOR"], + "COL_18": ["AGEMAJOR"], + "COL_19": ["AREASQMETER"], + "COL_2": ["TRACT", "Pacific"], + "COL_20": ["Shape_Length"], + "COL_21": ["Shape_Area"], + "COL_22": ["geometry"], + "COL_3": ["STFID"], + "COL_4": ["BLOCK"], + "COL_5": ["TOTPOP"], + "COL_6": ["POPDENS", "totPop"], + "COL_7": ["RACEBASE"], + "COL_8": ["WHITE"], + "COL_9": ["BLACK"], + } +} + +TESTPRODUCER2_CONFIG = { + "data_column_mapping": { + "COL_0": ["NAME"], + "COL_1": ["ST"], + "COL_2": ["ZIP"], + "COL_3": ["RuleID"], + "COL_4": ["geometry"], + } +} + +NAPERVILLE_GDB_REPORT = { + "overview": { + "feature_count": 23, + "features_flagged": 0, + "records_count": 6012, + "records_flagged": 0, + "etl_update_required": False, + "data_update_required": False, + }, + "features": [ + { + "provided_feature_name": "ID", + "nad_feature_name": "COL_0", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "STCOFIPS", + "nad_feature_name": "COL_1", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "HISPPOP", + "nad_feature_name": "COL_10", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "AMERIND", + "nad_feature_name": "COL_11", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "ASIAN", + "nad_feature_name": "COL_12", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "PACIFIC", + "nad_feature_name": "COL_13", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "RACE2UP", + "nad_feature_name": "COL_14", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "OTHRACE", + "nad_feature_name": "COL_15", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "LASTUPDATE", + "nad_feature_name": "COL_16", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "LASTEDITOR", + "nad_feature_name": "COL_17", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "AGEMAJOR", + "nad_feature_name": "COL_18", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "AREASQMETER", + "nad_feature_name": "COL_19", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "TRACT", + "nad_feature_name": "COL_2", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "Shape_Length", + "nad_feature_name": "COL_20", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "Shape_Area", + "nad_feature_name": "COL_21", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "geometry", + "nad_feature_name": "COL_22", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "STFID", + "nad_feature_name": "COL_3", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "BLOCK", + "nad_feature_name": "COL_4", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "TOTPOP", + "nad_feature_name": "COL_5", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "POPDENS", + "nad_feature_name": "COL_6", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "RACEBASE", + "nad_feature_name": "COL_7", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "WHITE", + "nad_feature_name": "COL_8", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "BLACK", + "nad_feature_name": "COL_9", + "populated_count": 6012, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + ], +} +MAJOR_CITIES_SHP_REPORT = { + "overview": { + "feature_count": 5, + "features_flagged": 2, + "records_count": 120, + "records_flagged": 6, + "etl_update_required": False, + "data_update_required": False, + }, + "features": [ + { + "provided_feature_name": "NAME", + "nad_feature_name": "COL_0", + "populated_count": 120, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "ST", + "nad_feature_name": "COL_1", + "populated_count": 120, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "ZIP", + "nad_feature_name": "COL_2", + "populated_count": 117, + "null_count": 3, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "RuleID", + "nad_feature_name": "COL_3", + "populated_count": 117, + "null_count": 3, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + { + "provided_feature_name": "geometry", + "nad_feature_name": "COL_4", + "populated_count": 120, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + }, + ], +} diff --git a/tests/test_data/column_maps/testprovider1.yaml b/tests/test_data/column_maps/testproducer1.yaml similarity index 100% rename from tests/test_data/column_maps/testprovider1.yaml rename to tests/test_data/column_maps/testproducer1.yaml diff --git a/tests/test_data/column_maps/testprovider2.yaml b/tests/test_data/column_maps/testproducer2.yaml similarity index 100% rename from tests/test_data/column_maps/testprovider2.yaml rename to tests/test_data/column_maps/testproducer2.yaml diff --git a/tests/test_data/config_baselines.py b/tests/test_data/config_baselines.py deleted file mode 100644 index df23eda..0000000 --- a/tests/test_data/config_baselines.py +++ /dev/null @@ -1,121 +0,0 @@ -EXPECTED_DEFAULT_CONFIG = { - "data_required_fields": [ - "Add_Number", - "AddNo_Full", - "St_Name", - "StNam_Full", - "County", - "Inc_Muni", - "Post_City", - "State", - "UUID", - "AddAuth", - "Longitude", - "Latitude", - "NatGrid", - "Placement", - "AddrPoint", - "DateUpdate", - "NAD_Source", - "DataSet_ID", - ], - "data_column_mapping": { - "AddNum_Pre": ["ANUMBERPRE"], - "Add_Number": ["ANUMBER"], - "AddNum_Suf": ["ANUMBERSUF"], - "AddNo_Full": ["ADR_NUM_COMP"], - "St_PreMod": ["ST_PRE_MOD"], - "St_PreDir": ["ST_PRE_DIR"], - "St_PreTyp": ["ST_PRE_TYP"], - "St_PreSep": ["ST_PRE_SEP"], - "St_Name": None, - "St_PosTyp": ["ST_POS_TYP"], - "St_PosDir": ["ST_POS_DIR"], - "St_PosMod": ["ST_POS_MOD"], - "StNam_Full": ["ST_FULNAM"], - "Building": None, - "Floor": None, - "Unit": None, - "Room": None, - "Seat": None, - "Addtl_Loc": None, - "SubAddress": None, - "LandmkName": ["LANDMARK"], - "County": ["CO_NAME"], - "Inc_Muni": None, - "Post_City": ["Post_Comm", "POSTCOMM"], - "Census_Plc": None, - "Uninc_Comm": None, - "Nbrhd_Comm": None, - "NatAmArea": None, - "NatAmSub": None, - "Urbnztn_PR": None, - "PlaceOther": None, - "State": None, - "Zip_Code": ["Post_Code", "ZIP"], - "Plus_4": ["Post_Code4", "ZIP4"], - "UUID": ["GlobalID"], - "AddAuth": ["DiscrpAgID", "AAUTHORITY"], - "AddrRefSys": None, - "Longitude": ["Long", "LONGITUDE"], - "Latitude": ["Lat", "LATITUDE"], - "NatGrid": ["USNG_CODE"], - "Elevation": ["Elev"], - "Placement": ["PLACE_LOC"], - "AddrPoint": None, - "Related_ID": None, - "RelateType": None, - "ParcelSrc": None, - "Parcel_ID": ["STATE_PIN"], - "AddrClass": None, - "Lifecycle": ["STATUS"], - "Effective": ["EFF_DATE"], - "Expire": ["RET_DATE"], - "DateUpdate": ["EDIT_DATE"], - "AnomStatus": ["VERROR_911"], - "LocatnDesc": ["LOC_DESC"], - "Addr_Type": ["Place_Type"], - "PlaceNmTyp": None, - "DeliverTyp": None, - "NAD_Source": None, - "DataSet_ID": ["Site_NGUID", "ADD_ID"], - }, -} - -TESTPROVIDER1_CONFIG = { - "data_column_mapping": { - "COL_0": ["ID"], - "COL_1": ["STCOFIPS"], - "COL_10": ["HISPPOP"], - "COL_11": ["AMERIND"], - "COL_12": ["ASIAN"], - "COL_13": ["PACIFIC"], - "COL_14": ["RACE2UP"], - "COL_15": ["OTHRACE"], - "COL_16": ["LASTUPDATE"], - "COL_17": ["LASTEDITOR"], - "COL_18": ["AGEMAJOR"], - "COL_19": ["AREASQMETER"], - "COL_2": ["TRACT", "Pacific"], - "COL_20": ["Shape_Length"], - "COL_21": ["Shape_Area"], - "COL_22": ["geometry"], - "COL_3": ["STFID"], - "COL_4": ["BLOCK"], - "COL_5": ["TOTPOP"], - "COL_6": ["POPDENS", "totPop"], - "COL_7": ["RACEBASE"], - "COL_8": ["WHITE"], - "COL_9": ["BLACK"], - } -} - -TESTPROVIDER2_CONFIG = { - "data_column_mapping": { - "COL_0": ["NAME"], - "COL_1": ["ST"], - "COL_2": ["ZIP"], - "COL_3": ["RuleID"], - "COL_4": ["geometry"], - } -} diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl index 7cc9cd1..938c3b1 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl differ diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl index 0ba3298..57e7522 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl differ diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl index 326bbaf..e2b91a5 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl and b/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg new file mode 100644 index 0000000..cd89cb9 --- /dev/null +++ b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg @@ -0,0 +1 @@ +ISO-8859-1 \ No newline at end of file diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf index 8cd4759..26ff775 100644 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf and b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn deleted file mode 100644 index 8a33f07..0000000 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbn and /dev/null differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx deleted file mode 100644 index 4f981e1..0000000 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.sbx and /dev/null differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml deleted file mode 100644 index 1907432..0000000 --- a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp.xml +++ /dev/null @@ -1 +0,0 @@ -{BEE2416A-EC89-428E-AC85-A316F275D0D9}2006042112570600FALSE20130819115922002013081911592200CopyFeatures "C:\arcgis\Samples Net\Data\USZipCodeData\ZipCode_Boundaries_US_Major_Cities.shp" C:\Data\Data\Representations.gdb\ZipCode_Boundaries_US_Major_Cities # 0 0 0US_Major_Cities0020.000file://\\JIBBERJABBER\C$\WORK\GitHub\arcgis-runtime-samples-data\data\shapefiles\US_Major_Cities.shpLocal Area NetworkGeographicGCS_WGS_1984Angular Unit: Degree (0.017453)<GeographicCoordinateSystem xsi:type='typens:GeographicCoordinateSystem' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema' xmlns:typens='http://www.esri.com/schemas/ArcGIS/10.1'><WKT>GEOGCS[&quot;GCS_WGS_1984&quot;,DATUM[&quot;D_WGS_1984&quot;,SPHEROID[&quot;WGS_1984&quot;,6378137.0,298.257223563]],PRIMEM[&quot;Greenwich&quot;,0.0],UNIT[&quot;Degree&quot;,0.0174532925199433],AUTHORITY[&quot;EPSG&quot;,4326]]</WKT><XOrigin>-400</XOrigin><YOrigin>-400</YOrigin><XYScale>11258999068426.238</XYScale><ZOrigin>-100000</ZOrigin><ZScale>10000</ZScale><MOrigin>-100000</MOrigin><MScale>10000</MScale><XYTolerance>8.983152841195215e-009</XYTolerance><ZTolerance>0.001</ZTolerance><MTolerance>0.001</MTolerance><HighPrecision>true</HighPrecision><LeftLongitude>-180</LeftLongitude><WKID>4326</WKID><LatestWKID>4326</LatestWKID></GeographicCoordinateSystem>Microsoft Windows XP Version 5.1 (Build 2600) Service Pack 2; ESRI ArcCatalog 9.2.0.1170enREQUIRED: A brief narrative summary of the data set.REQUIRED: A summary of the intentions with which the data set was developed.REQUIRED: The name of an organization or individual that developed the data set.REQUIRED: The date when the data set is published or otherwise made available for release.ZipCode_Boundaries_US_Major_CitiesZipCode_Boundaries_US_Major_Citiesvector digital data\\YARO4525\C$\Data\Data\Representations.gdbREQUIRED: The basis on which the time period of content information is determined.REQUIRED: The year (and optionally month, or month and day) for which the data set corresponds to the ground.REQUIRED: The state of the data set.REQUIRED: The frequency with which changes and additions are made to the data set after the initial data set is completed.REQUIRED: Western-most coordinate of the limit of coverage expressed in longitude.REQUIRED: Eastern-most coordinate of the limit of coverage expressed in longitude.REQUIRED: Northern-most coordinate of the limit of coverage expressed in latitude.REQUIRED: Southern-most coordinate of the limit of coverage expressed in latitude.REQUIRED: Reference to a formally registered thesaurus or a similar authoritative source of theme keywords.REQUIRED: Common-use word or phrase used to describe the subject of the data set.REQUIRED: Restrictions and legal prerequisites for accessing the data set.REQUIRED: Restrictions and legal prerequisites for using the data set after access is granted.File Geodatabase Feature Class Version 6.2 (Build 9200) ; Esri ArcGIS 10.2.0.3348US_Major_CitiesenFGDC Content Standards for Digital Geospatial MetadataFGDC-STD-001-1998local timeREQUIRED: The person responsible for the metadata information.REQUIRED: The organization responsible for the metadata information.REQUIRED: The mailing and/or physical address for the organization or individual.REQUIRED: The city of the address.REQUIRED: The state or province of the address.REQUIRED: The ZIP or other postal code of the address.REQUIRED: The telephone number by which individuals can speak to the organization or individual.20060421ISO 19115 Geographic Information - MetadataDIS_ESRI1.0datasetDownloadable Data002file://\\YARO4525\C$\Data\Data\Representations.gdbLocal Area NetworkFile Geodatabase Feature ClassShapefile0.000VectorSimpleFALSE0FALSEFALSEGCS_WGS_1984Decimal degrees0.0000000.000000D_WGS_1984WGS_19846378137.000000298.257224Explicit elevation coordinate included with horizontal coordinates0.000100GCS_WGS_1984EPSG8.1.10US_Major_CitiesFeature Class0FIDFIDOID400Internal feature number.EsriSequential unique whole numbers that are automatically generated.ShapeShapeGeometry000Feature geometry.ESRICoordinates defining the features.NAMENAMEString4000STSTString200ZIPZIPString500RuleIDRuleIDInteger99020130819