diff --git a/.gitignore b/.gitignore index bd08b1a..c83db22 100644 --- a/.gitignore +++ b/.gitignore @@ -182,3 +182,6 @@ node_modules/ # Mac Desktop Services Store *.DS_Store + +# Landing zone data storage +# /data/landing_zone/* diff --git a/README.md b/README.md index 48355c1..94e9c1e 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,8 @@ docker exec nad-ch-dev-local poetry run alembic downgrade None: + self.column_map = column_map + self.mapped_data_dir = mapped_data_dir + self.mapped_data_path = ( + os.path.join( + self.mapped_data_dir, + self.mapped_data_dir.split("/")[-1] + ".shp", + ) + if self.mapped_data_dir + else None + ) + self.zip_file_path = ( + os.path.join( + self.mapped_data_dir, + self.mapped_data_dir.split("/")[-1] + ".zip", + ) + if self.mapped_data_dir + else None + ) + self.valid_renames = {} + self.__validate_column_map() + + def __validate_column_map(self): + column_map_reverse = {} + + for key, value in self.column_map.items(): + if value: + value_lcase = value.lower() + if value_lcase in column_map_reverse: + column_map_reverse[value_lcase].append(key) + else: + column_map_reverse[value_lcase] = [key] + duplicates = {k: v for k, v in column_map_reverse.items() if len(v) > 1} + if duplicates: + duplicate_nad_fields = ", ".join( + [" & ".join(nad_fields) for nad_fields in list(duplicates.values())] + ) + raise Exception( + f"Duplicate inputs found for destination fields: {duplicate_nad_fields}" + ) + + def __rename_columns(self, gdf: GeoDataFrame) -> GeoDataFrame: + column_map = self.column_map + column_map["geometry"] = "geometry" + original_names = {col.lower(): col for col in gdf.columns} + for nad_column, raw_field in column_map.items(): + orig_matched_name = original_names.get(nad_column.lower()) + if orig_matched_name: + self.valid_renames[orig_matched_name] = nad_column + continue + if raw_field: + orig_matched_name = original_names.get(raw_field.lower()) + if orig_matched_name: + self.valid_renames[orig_matched_name] = nad_column + gdf = gdf.rename(columns=self.valid_renames) + return gdf[[col for col in self.valid_renames.values()]] + + def read_file_in_batches( + self, path: str, table_name: Optional[str] = None, batch_size: int = 100000 + ) -> Iterator[GeoDataFrame]: + # TODO: Modify to return a joined table; for cases where 1 or more tables + # are needed to get all fields from source file. + if table_name and table_name not in fiona.listlayers(path): + raise Exception(f"Table name {table_name} does not exist") + i = 0 + while True: + gdf = read_file(path, rows=slice(i, i + batch_size)) + if gdf.shape[0] == 0: + if self.mapped_data_dir: + # No more batches to process, create zip file + self.__zip_mapped_data() + break + gdf = self.__rename_columns(gdf) + if self.mapped_data_dir: + self.__write_mapped_batch(gdf, i == 0) + yield gdf + i += batch_size + + def __write_mapped_batch(self, gdf: GeoDataFrame, first_batch: bool): + write_mode = "a" + if first_batch: + write_mode = "w" + os.makedirs(self.mapped_data_dir, exist_ok=True) + try: + gdf.to_file( + filename=self.mapped_data_path, + index=False, + mode=write_mode, + engine="fiona", + ) + except Exception: + shutil.rmtree(self.mapped_data_dir) + raise + + def __zip_mapped_data(self): + with ZipFile(self.zip_file_path, "w") as zipf: + # Walk through all the files and subdirectories in the given directory + for root, dirs, files in os.walk(self.mapped_data_dir): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, self.mapped_data_dir) + zipf.write(file_path, arcname=relative_path) diff --git a/nad_ch/application/data_reader.py b/nad_ch/application/data_reader.py deleted file mode 100644 index b4390e4..0000000 --- a/nad_ch/application/data_reader.py +++ /dev/null @@ -1,67 +0,0 @@ -from geopandas import GeoDataFrame, read_file -import fiona -from typing import Optional, Dict, Iterator - - -class DataReader(object): - def __init__( - self, column_map: Dict[str, str], validate_mapping: bool = True - ) -> None: - self.column_map = column_map - self.valid_renames = {} - if validate_mapping: - self.validate_column_map() - - def validate_column_map(self): - column_map_reverse = {} - - for key, values in self.column_map.items(): - if values: - for value in values: - value_lcase = value.lower() - if value_lcase in column_map_reverse: - column_map_reverse[value_lcase].append(key) - else: - column_map_reverse[value_lcase] = [key] - duplicates = {k: v for k, v in column_map_reverse.items() if len(v) > 1} - if duplicates: - duplicate_nad_fields = ", ".join( - [" & ".join(nad_fields) for nad_fields in list(duplicates.values())] - ) - raise Exception( - f"Duplicate inputs found for destination fields: {duplicate_nad_fields}" - ) - - def rename_columns(self, gdf: GeoDataFrame) -> GeoDataFrame: - column_map = self.column_map - original_names = {col.lower(): col for col in gdf.columns} - for nad_column, fields_to_check in column_map.items(): - orig_matched_name = original_names.get(nad_column.lower()) - if orig_matched_name: - self.valid_renames[orig_matched_name] = nad_column - continue - if fields_to_check: - for field in fields_to_check: - orig_matched_name = original_names.get(field.lower()) - if orig_matched_name: - self.valid_renames[orig_matched_name] = nad_column - break - gdf = gdf.rename(columns=self.valid_renames) - return gdf[[col for col in self.valid_renames.values()]] - - def read_file_in_batches( - self, path: str, table_name: Optional[str] = None, batch_size: int = 100000 - ) -> Iterator[GeoDataFrame]: - # TODO: Modify to return a joined table; for cases where 1 or more tables - # are needed to get all fields from source file. - layers = fiona.listlayers(path) - if table_name and table_name not in layers: - raise Exception(f"Table name {table_name} does not exist") - i = 0 - while True: - gdf = read_file(path, rows=slice(i, i + batch_size)) - if gdf.shape[0] == 0: - break - gdf = self.rename_columns(gdf) - yield gdf - i += batch_size diff --git a/nad_ch/application/use_cases/data_submissions.py b/nad_ch/application/use_cases/data_submissions.py index 6d97223..413c785 100644 --- a/nad_ch/application/use_cases/data_submissions.py +++ b/nad_ch/application/use_cases/data_submissions.py @@ -7,6 +7,7 @@ DataSubmissionViewModel, ) from nad_ch.core.entities import DataSubmission, ColumnMap +from nad_ch.config import LANDING_ZONE def ingest_data_submission( @@ -85,14 +86,29 @@ def validate_data_submission( # Using version 1 for column maps for now, may add feature for user to select # version later - column_map = ctx.column_maps.get_by_name_and_version(column_map_name, 1) - report = ctx.task_queue.run_load_and_validate( - ctx.submissions, - submission.id, - download_result.extracted_dir, - column_map.mapping, - ) - - ctx.logger.info(f"Total number of features: {report.overview.feature_count}") - - ctx.storage.cleanup_temp_dir(download_result.temp_dir) + try: + column_map = ctx.column_maps.get_by_name_and_version(column_map_name, 1) + mapped_data_local_dir = submission.get_mapped_data_dir( + download_result.extracted_dir, LANDING_ZONE + ) + mapped_data_remote_dir = submission.get_mapped_data_dir( + download_result.extracted_dir, LANDING_ZONE, True + ) + report = ctx.task_queue.run_load_and_validate( + ctx.submissions, + submission.id, + download_result.extracted_dir, + column_map.mapping, + mapped_data_local_dir, + ) + _ = ctx.task_queue.run_copy_mapped_data_to_remote( + mapped_data_local_dir, + mapped_data_remote_dir, + ) + + ctx.logger.info(f"Total number of features: {report.overview.feature_count}") + except Exception: + raise + finally: + ctx.storage.cleanup_temp_dir(download_result.temp_dir) + ctx.storage.cleanup_temp_dir(mapped_data_local_dir) diff --git a/nad_ch/config/development_local.py b/nad_ch/config/development_local.py index 6092b48..26545fe 100644 --- a/nad_ch/config/development_local.py +++ b/nad_ch/config/development_local.py @@ -28,10 +28,6 @@ f"postgresql+psycopg2://{postgres_user}:{postgres_password}" f"@{postgres_host}:{postgres_port}/test_database" ) -# TEST_DATABASE_URL = ( -# f"postgresql://{postgres_user}" -# f"@localhost:/var/run/postgresql/.s.PGSQL.5432/test_database" -# ) QUEUE_BROKER_URL = os.getenv("QUEUE_BROKER_URL") QUEUE_BACKEND_URL = os.getenv("QUEUE_BACKEND_URL") S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") @@ -39,6 +35,7 @@ S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY") S3_SECRET_ACCESS_KEY = os.getenv("S3_SECRET_ACCESS_KEY") S3_REGION = os.getenv("S3_REGION") +LANDING_ZONE = os.path.join(os.getcwd(), "data/landing_zone") class DevLocalApplicationContext(ApplicationContext): @@ -68,11 +65,13 @@ def create_column_map_repository(self): def create_logger(self): return BasicLogger(__name__, logging.DEBUG) - def create_storage(self): + @staticmethod + def create_storage(): return MinioStorage( S3_ENDPOINT, S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY, + S3_REGION, S3_BUCKET_NAME, ) diff --git a/nad_ch/config/development_remote.py b/nad_ch/config/development_remote.py index 88e70ac..e22b331 100644 --- a/nad_ch/config/development_remote.py +++ b/nad_ch/config/development_remote.py @@ -14,6 +14,9 @@ from nad_ch.infrastructure.storage import S3Storage +LANDING_ZONE = os.path.join(os.getcwd(), "data/landing_zone") + + def get_credentials(service_name, default={}): service = vcap_services.get(service_name, [default]) return service[0].get("credentials", default) if service else default @@ -67,7 +70,8 @@ def create_column_map_repository(self): def create_logger(self): return BasicLogger(__name__) - def create_storage(self): + @staticmethod + def create_storage(): return S3Storage( S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY, diff --git a/nad_ch/config/test.py b/nad_ch/config/test.py index 0832cb8..1daee34 100644 --- a/nad_ch/config/test.py +++ b/nad_ch/config/test.py @@ -16,6 +16,7 @@ DATABASE_URL = os.getenv("DATABASE_URL") QUEUE_BROKER_URL = os.getenv("QUEUE_BROKER_URL") QUEUE_BACKEND_URL = os.getenv("QUEUE_BACKEND_URL") +LANDING_ZONE = os.path.join(os.getcwd(), "data/landing_zone") class TestApplicationContext(ApplicationContext): @@ -45,7 +46,8 @@ def create_column_map_repository(self): def create_logger(self): return BasicLogger(__name__, logging.DEBUG) - def create_storage(self): + @staticmethod + def create_storage(): return FakeStorage() def create_task_queue(self): diff --git a/nad_ch/core/entities.py b/nad_ch/core/entities.py index 688efcb..d652e20 100644 --- a/nad_ch/core/entities.py +++ b/nad_ch/core/entities.py @@ -155,19 +155,22 @@ def __repr__(self): @staticmethod def generate_filename(file_path: str, producer: DataProducer) -> str: - s = re.sub(r"\W+", "_", producer.name) - s = s.lower() - s = s.strip("_") - formatted_producer_name = re.sub(r"_+", "_", s) - - current_time_utc = datetime.now(timezone.utc) - timestamp = current_time_utc.timestamp() - datetime_obj = datetime.fromtimestamp(timestamp, UTC) - datetime_str = datetime_obj.strftime("%Y%m%d_%H%M%S") - - _, file_extension = os.path.splitext(file_path) - filename = f"{formatted_producer_name}_{datetime_str}{file_extension}" - return filename + return os.path.basename(file_path) + + def get_mapped_data_dir( + self, source_path: str, base_path: str, remote: bool = False + ) -> str: + filename, _ = os.path.splitext( + self.generate_filename(source_path, self.producer) + ) + if remote: + # Defines the path for remote storage such as s3 + partition_dt = datetime.today().strftime("%Y_%m_%d") + path = f"data_submissions/{self.producer.name}/{partition_dt}/{filename}" + else: + # Defines the path for local storage of post-mapped data + path = os.path.join(base_path, f"data_submissions/{self.id}/{filename}") + return path def has_report(self) -> bool: return self.report is not None diff --git a/nad_ch/infrastructure/storage.py b/nad_ch/infrastructure/storage.py index 384b3e3..55858d6 100644 --- a/nad_ch/infrastructure/storage.py +++ b/nad_ch/infrastructure/storage.py @@ -8,6 +8,7 @@ from botocore.client import Config from nad_ch.application.dtos import DownloadResult from nad_ch.application.interfaces import Storage +from minio import Minio class S3Storage(Storage): @@ -25,17 +26,19 @@ def __init__( def upload(self, source: str, destination: str) -> bool: try: - self.client.upload_file(source, Bucket=self.bucket_name, Key=destination) - return True + response = self.client.upload_file( + source, Bucket=self.bucket_name, Key=destination + ) + return response except FileNotFoundError: - return False + return None def delete(self, key: str) -> bool: try: - self.client.delete_object(Bucket=self.bucket_name, Key=key) - return True + response = self.client.delete_object(Bucket=self.bucket_name, Key=key) + return response except Exception: - return False + return None def download_temp(self, key: str) -> Optional[DownloadResult]: try: @@ -69,20 +72,39 @@ def cleanup_temp_dir(self, temp_dir: str) -> bool: class MinioStorage(S3Storage): def __init__( - self, endpoint_url: str, access_key_id: str, secret_access_key: str, bucket: str + self, + endpoint_url: str, + access_key_id: str, + secret_access_key: str, + region: str, + bucket: str, ): - session = Session() - self.client = session.client( - "s3", - endpoint_url=endpoint_url, - aws_access_key_id=access_key_id, - aws_secret_access_key=secret_access_key, - aws_session_token=None, - region_name="us-east-1", - verify=False, - config=Config(signature_version="s3v4"), + self.client = Minio( + endpoint=endpoint_url, + access_key=access_key_id, + secret_key=secret_access_key, + region=region, + secure=False, ) self.bucket_name = bucket + self.create_bucket() + + def upload(self, source: str, destination: str) -> bool: + try: + response = self.client.fput_object( + file_path=source, bucket_name=self.bucket_name, object_name=destination + ) + return response + except FileNotFoundError: + return None + + def create_bucket(self): + # Make the bucket if it doesn't exist. + if not self.client.bucket_exists(self.bucket_name): + self.client.make_bucket(self.bucket_name) + print("Created bucket", self.bucket_name) + else: + print("Bucket", self.bucket_name, "already exists") class LocalStorage(Storage): diff --git a/nad_ch/infrastructure/task_queue.py b/nad_ch/infrastructure/task_queue.py index e2a68b1..1fc85e0 100644 --- a/nad_ch/infrastructure/task_queue.py +++ b/nad_ch/infrastructure/task_queue.py @@ -1,14 +1,23 @@ +import os +from nad_ch.config.development_local import ( + DevLocalApplicationContext as dev_local_app_context, +) +from nad_ch.config.development_remote import ( + DevRemoteApplicationContext as dev_remote_app_context, +) +from nad_ch.config.test import TestApplicationContext as test_app_context from celery import Celery from nad_ch.application.dtos import ( DataSubmissionReport, report_to_dict, report_from_dict, ) -from nad_ch.application.data_reader import DataReader +from nad_ch.application.data_handler import DataHandler from nad_ch.application.interfaces import TaskQueue from nad_ch.application.validation import DataValidator from nad_ch.config import QUEUE_BROKER_URL, QUEUE_BACKEND_URL from nad_ch.core.repositories import DataSubmissionRepository +from datetime import datetime, timezone from typing import Dict @@ -24,6 +33,7 @@ celery_app.conf.update( + task_concurrency=4, store_processed=True, result_persistent=True, task_serializer="json", @@ -32,22 +42,48 @@ ) -@celery_app.task -def load_and_validate(gdb_file_path: str, column_map: Dict[str, str]) -> dict: - data_reader = DataReader(column_map) - first_batch = True - for gdf in data_reader.read_file_in_batches(path=gdb_file_path): - if first_batch: - data_validator = DataValidator(data_reader.valid_renames) - data_validator.run(gdf) - first_batch = False - data_validator.finalize_overview_details() - report = DataSubmissionReport( - data_validator.report_overview, list(data_validator.report_features.values()) - ) +@celery_app.task(bind=True, max_retries=2) +def load_and_validate( + self, gdb_file_path: str, column_map: Dict[str, str], mapped_data_dir: str +) -> dict: + try: + data_handler = DataHandler(column_map, mapped_data_dir) + first_batch = True + for gdf in data_handler.read_file_in_batches(path=gdb_file_path): + if first_batch: + data_validator = DataValidator(data_handler.valid_renames) + data_validator.run(gdf) + first_batch = False + data_validator.finalize_overview_details() + report = DataSubmissionReport( + data_validator.report_overview, + list(data_validator.report_features.values()), + ) + except Exception as e: + raise self.retry(exec=e, countdown=30) return report_to_dict(report) +@celery_app.task(bind=True, max_retries=2) +def copy_mapped_data_to_remote( + self, mapped_data_local_dir: str, mapped_data_remote_dir: str +) -> bool: + try: + success = True + app_context = TaskHelperFunctions.get_app_context() + storage_interface = app_context.create_storage() + filename = mapped_data_remote_dir.split("/")[-1] + timestamp = datetime.now(timezone.utc).strftime("%Y_%m_%d_%H%M%S") + # Copy mapped dataset to remote storage + storage_interface.upload( + os.path.join(mapped_data_local_dir, f"{filename}.zip"), + os.path.join(mapped_data_remote_dir, f"{filename}_{timestamp}.zip"), + ) + except Exception as e: + raise self.retry(exec=e, countdown=30) + return success + + class CeleryTaskQueue(TaskQueue): def __init__(self, app): self.app = app @@ -58,8 +94,36 @@ def run_load_and_validate( submission_id: int, path: str, column_map: Dict[str, str], + mapped_data_dir: str, ): - task_result = load_and_validate.apply_async(args=[path, column_map]) + task_result = load_and_validate.apply_async( + args=[path, column_map, mapped_data_dir] + ) report_dict = task_result.get() submissions.update_report(submission_id, report_dict) return report_from_dict(report_dict) + + def run_copy_mapped_data_to_remote( + self, + mapped_data_dir: str, + mapped_data_remote_dir: str, + ): + task_result = copy_mapped_data_to_remote.apply_async( + args=[mapped_data_dir, mapped_data_remote_dir] + ) + success = task_result.get() + return success + + +class TaskHelperFunctions: + + @staticmethod + def get_app_context(): + APP_ENV = os.environ.get("APP_ENV") + if APP_ENV == "dev_local": + app_context = dev_local_app_context + elif APP_ENV == "dev_remote": + app_context = dev_remote_app_context + elif APP_ENV == "test": + app_context = test_app_context + return app_context diff --git a/poetry.lock b/poetry.lock index 7f8903b..b981384 100644 --- a/poetry.lock +++ b/poetry.lock @@ -33,6 +33,63 @@ files = [ [package.dependencies] vine = ">=5.0.0,<6.0.0" +[[package]] +name = "argon2-cffi" +version = "23.1.0" +description = "Argon2 for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"}, + {file = "argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08"}, +] + +[package.dependencies] +argon2-cffi-bindings = "*" + +[package.extras] +dev = ["argon2-cffi[tests,typing]", "tox (>4)"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-notfound-page"] +tests = ["hypothesis", "pytest"] +typing = ["mypy"] + +[[package]] +name = "argon2-cffi-bindings" +version = "21.2.0" +description = "Low-level CFFI bindings for Argon2" +optional = false +python-versions = ">=3.6" +files = [ + {file = "argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f"}, + {file = "argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"}, +] + +[package.dependencies] +cffi = ">=1.0.1" + +[package.extras] +dev = ["cogapp", "pre-commit", "pytest", "wheel"] +tests = ["pytest"] + [[package]] name = "async-timeout" version = "4.0.3" @@ -233,6 +290,70 @@ files = [ {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -869,6 +990,24 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "minio" +version = "7.2.5" +description = "MinIO Python SDK for Amazon S3 Compatible Cloud Storage" +optional = false +python-versions = "*" +files = [ + {file = "minio-7.2.5-py3-none-any.whl", hash = "sha256:ed9176c96d4271cb1022b9ecb8a538b1e55b32ae06add6de16425cab99ef2304"}, + {file = "minio-7.2.5.tar.gz", hash = "sha256:59d8906e2da248a9caac34d4958a859cc3a44abbe6447910c82b5abfa9d6a2e1"}, +] + +[package.dependencies] +argon2-cffi = "*" +certifi = "*" +pycryptodome = "*" +typing-extensions = "*" +urllib3 = "*" + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -1203,6 +1342,58 @@ files = [ {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, ] +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pycryptodome" +version = "3.20.0" +description = "Cryptographic library for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodome-3.20.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:f0e6d631bae3f231d3634f91ae4da7a960f7ff87f2865b2d2b831af1dfb04e9a"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:baee115a9ba6c5d2709a1e88ffe62b73ecc044852a925dcb67713a288c4ec70f"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:417a276aaa9cb3be91f9014e9d18d10e840a7a9b9a9be64a42f553c5b50b4d1d"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a1250b7ea809f752b68e3e6f3fd946b5939a52eaeea18c73bdab53e9ba3c2dd"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:d5954acfe9e00bc83ed9f5cb082ed22c592fbbef86dc48b907238be64ead5c33"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win32.whl", hash = "sha256:06d6de87c19f967f03b4cf9b34e538ef46e99a337e9a61a77dbe44b2cbcf0690"}, + {file = "pycryptodome-3.20.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ec0bb1188c1d13426039af8ffcb4dbe3aad1d7680c35a62d8eaf2a529b5d3d4f"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5601c934c498cd267640b57569e73793cb9a83506f7c73a8ec57a516f5b0b091"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d29daa681517f4bc318cd8a23af87e1f2a7bad2fe361e8aa29c77d652a065de4"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3427d9e5310af6680678f4cce149f54e0bb4af60101c7f2c16fdf878b39ccccc"}, + {file = "pycryptodome-3.20.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:3cd3ef3aee1079ae44afaeee13393cf68b1058f70576b11439483e34f93cf818"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac1c7c0624a862f2e53438a15c9259d1655325fc2ec4392e66dc46cdae24d044"}, + {file = "pycryptodome-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:76658f0d942051d12a9bd08ca1b6b34fd762a8ee4240984f7c06ddfb55eaf15a"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f35d6cee81fa145333137009d9c8ba90951d7d77b67c79cbe5f03c7eb74d8fe2"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cb39afede7055127e35a444c1c041d2e8d2f1f9c121ecef573757ba4cd2c3c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a4c4dc60b78ec41d2afa392491d788c2e06edf48580fbfb0dd0f828af49d25"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fb3b87461fa35afa19c971b0a2b7456a7b1db7b4eba9a8424666104925b78128"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:acc2614e2e5346a4a4eab6e199203034924313626f9620b7b4b38e9ad74b7e0c"}, + {file = "pycryptodome-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:210ba1b647837bfc42dd5a813cdecb5b86193ae11a3f5d972b9a0ae2c7e9e4b4"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win32.whl", hash = "sha256:8d6b98d0d83d21fb757a182d52940d028564efe8147baa9ce0f38d057104ae72"}, + {file = "pycryptodome-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:9b3ae153c89a480a0ec402e23db8d8d84a3833b65fa4b15b81b83be9d637aab9"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:4401564ebf37dfde45d096974c7a159b52eeabd9969135f0426907db367a652a"}, + {file = "pycryptodome-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:ec1f93feb3bb93380ab0ebf8b859e8e5678c0f010d2d78367cf6bc30bfeb148e"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:acae12b9ede49f38eb0ef76fdec2df2e94aad85ae46ec85be3648a57f0a7db04"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47888542a0633baff535a04726948e876bf1ed880fddb7c10a736fa99146ab3"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e0e4a987d38cfc2e71b4a1b591bae4891eeabe5fa0f56154f576e26287bfdea"}, + {file = "pycryptodome-3.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c18b381553638414b38705f07d1ef0a7cf301bc78a5f9bc17a957eb19446834b"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a60fedd2b37b4cb11ccb5d0399efe26db9e0dd149016c1cc6c8161974ceac2d6"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:405002eafad114a2f9a930f5db65feef7b53c4784495dd8758069b89baf68eab"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ab6ab0cb755154ad14e507d1df72de9897e99fd2d4922851a276ccc14f4f1a5"}, + {file = "pycryptodome-3.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acf6e43fa75aca2d33e93409f2dafe386fe051818ee79ee8a3e21de9caa2ac9e"}, + {file = "pycryptodome-3.20.0.tar.gz", hash = "sha256:09609209ed7de61c2b560cc5c8c4fbf892f8b15b1faf7e4cbffac97db1fffda7"}, +] + [[package]] name = "pyflakes" version = "3.1.0" @@ -1731,4 +1922,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "201a9ea0401ac540331a437dea78d07fd9260beb566af7b313663b74b7f4d480" +content-hash = "9e77a5e1e3d089f412ffb70907950168cc7e2afbeaa287cca858de9456e4bd5e" diff --git a/pyproject.toml b/pyproject.toml index 341e43c..c5d7d8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ coverage = "^7.3.2" pytest-mock = "^3.12.0" black = "^24.3.0" pytest-env = "^1.1.3" +minio = "^7.2.5" [tool.poetry.scripts] cli = "nad_ch.main:run_cli" diff --git a/sample.env b/sample.env index 1133914..eaaa855 100644 --- a/sample.env +++ b/sample.env @@ -24,3 +24,4 @@ S3_ENDPOINT=http://storage:9000 S3_ACCESS_KEY= S3_SECRET_ACCESS_KEY= S3_REGION=us-east-1 +LANDING_ZONE=/data/landing_zone diff --git a/tests/application/test_data_reader.py b/tests/application/test_data_handler.py similarity index 65% rename from tests/application/test_data_reader.py rename to tests/application/test_data_handler.py index a59322b..91fa1c7 100644 --- a/tests/application/test_data_reader.py +++ b/tests/application/test_data_handler.py @@ -1,10 +1,6 @@ import os -from nad_ch.application.data_reader import DataReader -from conftest import ( - EXPECTED_DEFAULT_CONFIG, - TESTPRODUCER1_CONFIG, - TESTPRODUCER2_CONFIG, -) +from nad_ch.application.data_handler import DataHandler +from conftest import TESTPRODUCER1_CONFIG, TESTPRODUCER2_CONFIG import pickle from pandas.testing import assert_frame_equal import pytest @@ -14,32 +10,34 @@ def test_set_column_map(producer_column_maps): column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) - reader = DataReader(column_map_entity.mapping, False) + reader = DataHandler(column_map_entity.mapping) assert reader.column_map == TESTPRODUCER1_CONFIG column_map_entity = producer_column_maps.get_by_name_and_version("testproducer2", 1) - reader = DataReader(column_map_entity.mapping, False) + reader = DataHandler(column_map_entity.mapping) assert reader.column_map == TESTPRODUCER2_CONFIG def test_validate_column_map_duplicate_inputs(producer_column_maps): - column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) + column_map_entity = producer_column_maps.get_by_name_and_version("testproducer4", 1) with pytest.raises(Exception) as exc: - _ = DataReader(column_map_entity.mapping) + _ = DataHandler(column_map_entity.mapping) msg = "Duplicate inputs found for destination fields: COL_13 & COL_2, COL_5 & COL_6" assert str(exc.value) == msg def test_read_file_in_batches_shape1(producer_column_maps): file_path = os.path.join( - TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp" + os.getcwd(), TEST_DATA_DIR, "shapefiles/usa-major-cities.zip" ) column_map_entity = producer_column_maps.get_by_name_and_version("testproducer2", 1) - reader = DataReader(column_map_entity.mapping) + reader = DataHandler(column_map_entity.mapping) i = 0 for gdf in reader.read_file_in_batches(path=file_path, batch_size=50): baseline_path = os.path.join( - TEST_DATA_DIR, f"shapefiles/baselines/usa-major-cities-gdf-{i}.pkl" + TEST_DATA_DIR, + "shapefiles/baselines/test_read_file_in_batches_shape1/" + f"usa-major-cities-gdf-{i}.pkl", ) with open(baseline_path, "rb") as f: gdf_baseline = pickle.load(f) @@ -49,14 +47,16 @@ def test_read_file_in_batches_shape1(producer_column_maps): def test_read_file_in_batches_shape2(producer_column_maps): file_path = os.path.join( - TEST_DATA_DIR, "shapefiles/NM911_Address_202310/NM911_Address_202310.shp" + os.getcwd(), TEST_DATA_DIR, "shapefiles/NM911_Address_202310.zip" ) column_map_entity = producer_column_maps.get_by_name_and_version("testproducer3", 1) - reader = DataReader(column_map_entity.mapping) + reader = DataHandler(column_map_entity.mapping) i = 0 for gdf in reader.read_file_in_batches(path=file_path, batch_size=250): baseline_path = os.path.join( - TEST_DATA_DIR, f"shapefiles/baselines/NM911_Address_202310-gdf-{i}.pkl" + TEST_DATA_DIR, + "shapefiles/baselines/test_read_file_in_batches_shape2/" + f"NM911_Address_202310-gdf-{i}.pkl", ) with open(baseline_path, "rb") as f: gdf_baseline = pickle.load(f) @@ -65,13 +65,17 @@ def test_read_file_in_batches_shape2(producer_column_maps): def test_read_file_in_batches_gdb1(producer_column_maps): - file_path = os.path.join(TEST_DATA_DIR, "geodatabases/Naperville.gdb") + file_path = os.path.join( + os.getcwd(), TEST_DATA_DIR, "geodatabases/Naperville.gdb.zip" + ) column_map_entity = producer_column_maps.get_by_name_and_version("testproducer1", 1) - reader = DataReader(column_map_entity.mapping, False) + reader = DataHandler(column_map_entity.mapping) i = 0 for gdf in reader.read_file_in_batches(path=file_path, batch_size=2000): baseline_path = os.path.join( - TEST_DATA_DIR, f"geodatabases/baselines/naperville-gdf-{i}.pkl" + TEST_DATA_DIR, + "geodatabases/baselines/test_read_file_in_batches_gdb1/" + f"naperville-gdf-{i}.pkl", ) with open(baseline_path, "rb") as f: gdf_baseline = pickle.load(f) diff --git a/tests/application/use_cases/test_data_submissions.py b/tests/application/use_cases/test_data_submissions.py index a08b97f..11664a3 100644 --- a/tests/application/use_cases/test_data_submissions.py +++ b/tests/application/use_cases/test_data_submissions.py @@ -63,11 +63,18 @@ def run_load_and_validate( submission_id: int, path: str, column_map: Dict[str, str], + mapped_data_dir: str, ): return DataSubmissionReport( overview=DataSubmissionReportOverview(feature_count=1) ) + def run_copy_mapped_data_to_remote( + self, mapped_data_local_dir: str, mapped_data_remote_dir: str + ): + + return True + app_context._task_queue = CustomMockTestTaskQueue() column_map_name = "testproducer1" validate_data_submission(app_context, submission.filename, column_map_name) diff --git a/tests/core/test_entities.py b/tests/core/test_entities.py index a41dfe5..9d050eb 100644 --- a/tests/core/test_entities.py +++ b/tests/core/test_entities.py @@ -5,10 +5,7 @@ def test_data_submission_generates_filename(): producer = DataProducer("Some Producer") filename = DataSubmission.generate_filename("someupload.zip", producer) - todays_date = datetime.datetime.now().strftime("%Y%m%d") - assert filename.startswith("some_producer_") - assert todays_date in filename - assert filename.endswith(".zip") + assert filename == "someupload.zip" def test_data_submission_knows_if_it_has_a_report(): diff --git a/tests/fakes_and_mocks.py b/tests/fakes_and_mocks.py index e5a8d83..83c6963 100644 --- a/tests/fakes_and_mocks.py +++ b/tests/fakes_and_mocks.py @@ -9,6 +9,7 @@ ColumnMapRepository, ) import os +import shutil class FakeDataProducerRepository(DataProducerRepository): @@ -135,10 +136,14 @@ def upload(self, source: str, destination: str) -> bool: return True def download_temp(self, filename: str) -> Optional[DownloadResult]: - return DownloadResult(temp_dir=filename, extracted_dir=f"{filename}.gdb") + return DownloadResult(temp_dir=filename, extracted_dir=filename) def cleanup_temp_dir(self, temp_dir: str): - pass + try: + shutil.rmtree(temp_dir) + return True + except Exception: + return False def delete(self, file_path: str) -> bool: if os.path.exists(file_path): diff --git a/tests/infrastructure/test_task_queue.py b/tests/infrastructure/test_task_queue.py index 5103e36..4229fd7 100644 --- a/tests/infrastructure/test_task_queue.py +++ b/tests/infrastructure/test_task_queue.py @@ -1,6 +1,12 @@ import os -from nad_ch.infrastructure.task_queue import load_and_validate -from tests.application.test_data_reader import TEST_DATA_DIR +import pickle +from geopandas import read_file +from pandas.testing import assert_frame_equal +from nad_ch.infrastructure.task_queue import ( + load_and_validate, + copy_mapped_data_to_remote, +) +from tests.application.test_data_handler import TEST_DATA_DIR from conftest import MAJOR_CITIES_SHP_REPORT, NM911_ADDRESS_202310_REPORT import pytest @@ -9,8 +15,10 @@ def test_load_and_validate_testprovider1( celery_worker, celery_app, producer_column_maps ): column_map = producer_column_maps.get_by_name_and_version("testproducer1", 1) - file_path = os.path.join(TEST_DATA_DIR, "geodatabases/Naperville.gdb") - task_result = load_and_validate.delay(file_path, column_map.mapping) + file_path = os.path.join( + os.getcwd(), TEST_DATA_DIR, "geodatabases/Naperville.gdb.zip" + ) + task_result = load_and_validate.delay(file_path, column_map.mapping, "") msg = "Duplicate inputs found for destination fields: COL_13 & COL_2, COL_5 & COL_6" with pytest.raises(Exception) as exc: _ = task_result.get() @@ -18,13 +26,13 @@ def test_load_and_validate_testprovider1( def test_load_and_validate_testprovider2( - celery_worker, celery_app, producer_column_maps + celery_worker, celery_app, producer_column_maps, tmpdir ): column_map = producer_column_maps.get_by_name_and_version("testproducer2", 1) file_path = os.path.join( - TEST_DATA_DIR, "shapefiles/usa-major-cities/usa-major-cities.shp" + os.getcwd(), TEST_DATA_DIR, "shapefiles/usa-major-cities.zip" ) - task_result = load_and_validate.delay(file_path, column_map.mapping) + task_result = load_and_validate.delay(file_path, column_map.mapping, "") report_dict = task_result.get() # Check that sorted values from missing required fields match assert sorted(report_dict["overview"]["missing_required_fields"]) == sorted( @@ -38,14 +46,35 @@ def test_load_and_validate_testprovider2( def test_load_and_validate_testprovider3( - celery_worker, celery_app, producer_column_maps + celery_worker, celery_app, producer_column_maps, tmpdir ): column_map = producer_column_maps.get_by_name_and_version("testproducer3", 1) file_path = os.path.join( - TEST_DATA_DIR, "shapefiles/NM911_Address_202310/NM911_Address_202310.shp" + os.getcwd(), TEST_DATA_DIR, "shapefiles/NM911_Address_202310.zip" + ) + temp_dir_path = tmpdir.mkdir("landing_zone") + filename, _ = os.path.splitext(os.path.basename(file_path)) + mapped_data_dir = os.path.join( + temp_dir_path, + f"data_submissions/{column_map.producer.name}/1/{filename}", + ) + mapped_data_path = os.path.join(mapped_data_dir, f"{filename}.zip") + task_result = load_and_validate.delay( + file_path, column_map.mapping, mapped_data_dir ) - task_result = load_and_validate.delay(file_path, column_map.mapping) report_dict = task_result.get() + # Validate that mapped data was written to shape file correctly + assert os.path.exists(mapped_data_path) + baseline_path = os.path.join( + TEST_DATA_DIR, + "shapefiles/baselines/test_load_and_validate_testprovider3/" + "NM911_Address_202310.pkl", + ) + gdf = read_file(mapped_data_dir) + with open(baseline_path, "rb") as f: + gdf_baseline = pickle.load(f) + assert_frame_equal(gdf, gdf_baseline) + # Check that sorted values from missing required fields match assert sorted(report_dict["overview"]["missing_required_fields"]) == sorted( NM911_ADDRESS_202310_REPORT["overview"]["missing_required_fields"] @@ -55,3 +84,36 @@ def test_load_and_validate_testprovider3( del report_dict["overview"]["missing_required_fields"] del NM911_ADDRESS_202310_REPORT["overview"]["missing_required_fields"] assert report_dict == NM911_ADDRESS_202310_REPORT + + +def test_copy_mapped_data_to_remote(celery_worker, celery_app): + mapped_data_local_dir = os.path.join(os.getcwd(), TEST_DATA_DIR, "shapefiles") + mapped_data_remote_dir = ( + "data_submissions/Producer A/2024_04_02/NM911_Address_202310" + ) + task_result = copy_mapped_data_to_remote.delay( + mapped_data_local_dir, + mapped_data_remote_dir, + ) + result = task_result.get() + assert result is True + + +@pytest.mark.skip( + "Skipping to avoid build up of data in minio storage; " + "this test is to ensure mapped data is uploaded to 'remote' " + "storage successfully." +) +def test_copy_mapped_data_to_remote_2(celery_worker, celery_app, monkeypatch): + # This test requires Minio to be running locally + mapped_data_local_dir = os.path.join(os.getcwd(), TEST_DATA_DIR, "shapefiles") + mapped_data_remote_dir = ( + "data_submissions/Producer A/2024_04_02/NM911_Address_202310" + ) + monkeypatch.setenv("APP_ENV", "dev_local") + task_result = copy_mapped_data_to_remote.delay( + mapped_data_local_dir, + mapped_data_remote_dir, + ) + result = task_result.get() + assert result is True diff --git a/tests/test_data/baselines.py b/tests/test_data/baselines.py index 9272c5b..b150487 100644 --- a/tests/test_data/baselines.py +++ b/tests/test_data/baselines.py @@ -1,58 +1,35 @@ -EXPECTED_DEFAULT_CONFIG = { - "data_required_fields": [ - "Add_Number", - "AddNo_Full", - "St_Name", - "StNam_Full", - "County", - "Inc_Muni", - "Post_City", - "State", - "UUID", - "AddAuth", - "Longitude", - "Latitude", - "NatGrid", - "Placement", - "AddrPoint", - "DateUpdate", - "NAD_Source", - "DataSet_ID", - ] -} +# Baseline objects used for testing purposes TESTPRODUCER1_CONFIG = { - "COL_0": ["ID"], - "COL_1": ["STCOFIPS"], - "COL_10": ["HISPPOP"], - "COL_11": ["AMERIND"], - "COL_12": ["ASIAN"], - "COL_13": ["PACIFIC"], - "COL_14": ["RACE2UP"], - "COL_15": ["OTHRACE"], - "COL_16": ["LASTUPDATE"], - "COL_17": ["LASTEDITOR"], - "COL_18": ["AGEMAJOR"], - "COL_19": ["AREASQMETER"], - "COL_2": ["TRACT", "Pacific"], - "COL_20": ["Shape_Length"], - "COL_21": ["Shape_Area"], - "COL_22": ["geometry"], - "COL_3": ["STFID"], - "COL_4": ["BLOCK"], - "COL_5": ["TOTPOP"], - "COL_6": ["POPDENS", "totPop"], - "COL_7": ["RACEBASE"], - "COL_8": ["WHITE"], - "COL_9": ["BLACK"], + "COL_0": "ID", + "COL_1": "STCOFIPS", + "COL_10": "HISPPOP", + "COL_11": "AMERIND", + "COL_12": "ASIAN", + "COL_13": "PACIFIC", + "COL_14": "RACE2UP", + "COL_15": "OTHRACE", + "COL_16": "LASTUPDATE", + "COL_17": "LASTEDITOR", + "COL_18": "AGEMAJOR", + "COL_19": "AREASQMETER", + "COL_2": "TRACT", + "COL_20": "Shape_Length", + "COL_21": "Shape_Area", + "COL_3": "STFID", + "COL_4": "BLOCK", + "COL_5": "TOTPOP", + "COL_6": "POPDENS", + "COL_7": "RACEBASE", + "COL_8": "WHITE", + "COL_9": "BLACK", } TESTPRODUCER2_CONFIG = { - "COL_0": ["NAME"], - "COL_1": ["ST"], - "COL_2": ["ZIP"], - "COL_3": ["RuleID"], - "COL_4": ["geometry"], + "COL_0": "NAME", + "COL_1": "ST", + "COL_2": "ZIP", + "COL_3": "RuleID", } NAPERVILLE_GDB_REPORT = { @@ -274,6 +251,7 @@ }, ], } + MAJOR_CITIES_SHP_REPORT = { "overview": { "feature_count": 5, @@ -350,7 +328,7 @@ }, { "provided_feature_name": "geometry", - "nad_feature_name": "COL_4", + "nad_feature_name": "geometry", "populated_count": 120, "null_count": 0, "invalid_domain_count": 0, @@ -364,7 +342,7 @@ NM911_ADDRESS_202310_REPORT = { "overview": { - "feature_count": 15, + "feature_count": 16, "features_flagged": 9, "records_count": 1000, "records_flagged": 1000, @@ -591,5 +569,16 @@ "domain_frequency": {}, "high_domain_cardinality": False, }, + { + "provided_feature_name": "geometry", + "nad_feature_name": "geometry", + "populated_count": 1000, + "null_count": 0, + "invalid_domain_count": 0, + "valid_domain_count": 0, + "invalid_domains": [], + "domain_frequency": {}, + "high_domain_cardinality": False, + }, ], } diff --git a/tests/test_data/column_maps/testproducer1.yaml b/tests/test_data/column_maps/testproducer1.yaml index 5c65c21..b7cd3c6 100644 --- a/tests/test_data/column_maps/testproducer1.yaml +++ b/tests/test_data/column_maps/testproducer1.yaml @@ -1,48 +1,22 @@ -COL_0: - - ID -COL_1: - - STCOFIPS -COL_10: - - HISPPOP -COL_11: - - AMERIND -COL_12: - - ASIAN -COL_13: - - PACIFIC -COL_14: - - RACE2UP -COL_15: - - OTHRACE -COL_16: - - LASTUPDATE -COL_17: - - LASTEDITOR -COL_18: - - AGEMAJOR -COL_19: - - AREASQMETER -COL_2: - - TRACT - - Pacific -COL_20: - - Shape_Length -COL_21: - - Shape_Area -COL_22: - - geometry -COL_3: - - STFID -COL_4: - - BLOCK -COL_5: - - TOTPOP -COL_6: - - POPDENS - - totPop -COL_7: - - RACEBASE -COL_8: - - WHITE -COL_9: - - BLACK +COL_0: ID +COL_1: STCOFIPS +COL_10: HISPPOP +COL_11: AMERIND +COL_12: ASIAN +COL_13: PACIFIC +COL_14: RACE2UP +COL_15: OTHRACE +COL_16: LASTUPDATE +COL_17: LASTEDITOR +COL_18: AGEMAJOR +COL_19: AREASQMETER +COL_2: TRACT +COL_20: Shape_Length +COL_21: Shape_Area +COL_3: STFID +COL_4: BLOCK +COL_5: TOTPOP +COL_6: POPDENS +COL_7: RACEBASE +COL_8: WHITE +COL_9: BLACK diff --git a/tests/test_data/column_maps/testproducer2.yaml b/tests/test_data/column_maps/testproducer2.yaml index 0f25a7f..5ce40c0 100644 --- a/tests/test_data/column_maps/testproducer2.yaml +++ b/tests/test_data/column_maps/testproducer2.yaml @@ -1,10 +1,4 @@ -COL_0: - - NAME -COL_1: - - ST -COL_2: - - ZIP -COL_3: - - RuleID -COL_4: - - geometry +COL_0: NAME +COL_1: ST +COL_2: ZIP +COL_3: RuleID diff --git a/tests/test_data/column_maps/testproducer3.yaml b/tests/test_data/column_maps/testproducer3.yaml index e533d73..80133df 100644 --- a/tests/test_data/column_maps/testproducer3.yaml +++ b/tests/test_data/column_maps/testproducer3.yaml @@ -1,26 +1,18 @@ AddNum_Pre: -Add_Number: - - ADD_NUMBER -AddNum_Suf: - - ADD_SUFFIX +Add_Number: ADD_NUMBER +AddNum_Suf: ADD_SUFFIX AddNo_Full: St_PreMod: -St_PreDir: - - STR_DIR -St_PreTyp: - - STR_PRETYP +St_PreDir: STR_DIR +St_PreTyp: STR_PRETYP St_PreSep: -St_Name: - - STR_NAME -St_PosTyp: - - STR_SUFFIX -St_PosDir: - - POST_DIR +St_Name: STR_NAME +St_PosTyp: STR_SUFFIX +St_PosDir: POST_DIR St_PosMod: St_PosSep: St_PosMod: -StNam_Full: - - ROAD_LABEL +StNam_Full: ROAD_LABEL Building: Floor: Unit: @@ -31,23 +23,19 @@ SubAddress: LandmkName: County: Inc_Muni: -Post_City: - - COMNAME +Post_City: COMNAME Census_Plc: -Uninc_Comm: - - MSAG_COM +Uninc_Comm: MSAG_COM Nbrhd_Comm: NatAmArea: NatAmSub: Urbnztn_PR: PlaceOther: State: -Zip_Code: - - ZIPCODE +Zip_Code: ZIPCODE Plus_4: UUID: -AddAuth: - - DPID +AddAuth: DPID AddrRefSys: Longitude: Latitude: @@ -61,17 +49,13 @@ ParcelSrc: Parcel_ID: AddrClass: Lifecycle: -Effective: - - DATE_UPD +Effective: DATE_UPD Expire: -DateUpdate: - - last_edi_1 -AnomStatus: - - EXCEPTION +DateUpdate: last_edi_1 +AnomStatus: EXCEPTION LocatnDesc: Addr_Type: PlaceNmTyp: DeliverTyp: NAD_Source: -DataSet_ID: - - FID +DataSet_ID: FID diff --git a/tests/test_data/column_maps/testproducer4.yaml b/tests/test_data/column_maps/testproducer4.yaml new file mode 100644 index 0000000..34f9e99 --- /dev/null +++ b/tests/test_data/column_maps/testproducer4.yaml @@ -0,0 +1,22 @@ +COL_0: ID +COL_1: STCOFIPS +COL_10: HISPPOP +COL_11: AMERIND +COL_12: ASIAN +COL_13: PACIFIC +COL_14: RACE2UP +COL_15: OTHRACE +COL_16: LASTUPDATE +COL_17: LASTEDITOR +COL_18: AGEMAJOR +COL_19: AREASQMETER +COL_2: pacific +COL_20: Shape_Length +COL_21: Shape_Area +COL_3: STFID +COL_4: BLOCK +COL_5: TOTPOP +COL_6: totPop +COL_7: RACEBASE +COL_8: WHITE +COL_9: BLACK diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx deleted file mode 100644 index 5dc4379..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000001.TablesByName.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist deleted file mode 100644 index 6a5ab8e..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000001.freelist and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes deleted file mode 100644 index b02aa75..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable deleted file mode 100644 index 141a2a7..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx deleted file mode 100644 index b3c26d8..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000001.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable deleted file mode 100644 index a0af90e..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx deleted file mode 100644 index 7c12c56..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000002.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes deleted file mode 100644 index 58df68d..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable deleted file mode 100644 index 4f97dd3..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx deleted file mode 100644 index c8868b6..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000003.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx deleted file mode 100644 index 9ebb2b2..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByPhysicalName.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx deleted file mode 100644 index 3c4b1db..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.CatItemsByType.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx deleted file mode 100644 index b124269..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.FDO_UUID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist deleted file mode 100644 index 7256c27..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.freelist and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes deleted file mode 100644 index a4f334d..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable deleted file mode 100644 index 383c576..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx deleted file mode 100644 index 26641b3..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx b/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx deleted file mode 100644 index e4498ac..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000004.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx deleted file mode 100644 index 7624c03..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByName.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx deleted file mode 100644 index 39aabd1..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByParentTypeID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx deleted file mode 100644 index 08c32a8..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.CatItemTypesByUUID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes deleted file mode 100644 index bc88709..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable deleted file mode 100644 index be05104..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx deleted file mode 100644 index 39bb502..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000005.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx deleted file mode 100644 index 44e0b2d..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByDestinationID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx deleted file mode 100644 index 2d24657..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByOriginID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx deleted file mode 100644 index 650c255..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.CatRelsByType.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx deleted file mode 100644 index bef6804..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.FDO_UUID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist b/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist deleted file mode 100644 index 81a4eeb..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.freelist and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes deleted file mode 100644 index c608a88..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable deleted file mode 100644 index 662a1d7..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx deleted file mode 100644 index 9ea17c0..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000006.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx deleted file mode 100644 index 7b44805..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByBackwardLabel.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx deleted file mode 100644 index 3f5da5b..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByDestItemTypeID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx deleted file mode 100644 index f7311a3..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByForwardLabel.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx deleted file mode 100644 index d9d5c8b..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByName.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx deleted file mode 100644 index 46066e6..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx deleted file mode 100644 index 67b9370..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.CatRelTypesByUUID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes deleted file mode 100644 index 2a98c93..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable deleted file mode 100644 index bf0447a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx deleted file mode 100644 index 9c84c42..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000007.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes deleted file mode 100644 index c9d0caa..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable deleted file mode 100644 index 3d67b11..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx deleted file mode 100644 index 7cd1a47..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000009.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx b/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx deleted file mode 100644 index ea86f2d..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a00000009.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes deleted file mode 100644 index cc24e2a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable deleted file mode 100644 index 86ac447..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx deleted file mode 100644 index be033c2..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx deleted file mode 100644 index f98218c..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000a.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx deleted file mode 100644 index de635c1..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.FDO_GlobalID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes deleted file mode 100644 index eb57873..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable deleted file mode 100644 index bbd1f6c..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx deleted file mode 100644 index 7d562f5..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx deleted file mode 100644 index 41dbd6a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000b.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes deleted file mode 100644 index cc24e2a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable deleted file mode 100644 index 0827759..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx deleted file mode 100644 index 36c2d2a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx deleted file mode 100644 index e19a4ab..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000c.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx deleted file mode 100644 index 955d764..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.G15POLLINGID.atx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes deleted file mode 100644 index 72e7357..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable deleted file mode 100644 index 3ee878e..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx deleted file mode 100644 index 8624824..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx deleted file mode 100644 index 3f5f4bb..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000e.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes deleted file mode 100644 index cc24e2a..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable deleted file mode 100644 index 0839460..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx deleted file mode 100644 index 740fc4b..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx deleted file mode 100644 index 7b778c7..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000000f.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes deleted file mode 100644 index c9d0caa..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbindexes and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable deleted file mode 100644 index 1e56ea4..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtable and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx deleted file mode 100644 index 329b9ef..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.gdbtablx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx b/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx deleted file mode 100644 index a457ee9..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/a0000001e.spx and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/gdb b/tests/test_data/geodatabases/Naperville.gdb/gdb deleted file mode 100644 index a786e12..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/gdb and /dev/null differ diff --git a/tests/test_data/geodatabases/Naperville.gdb/timestamps b/tests/test_data/geodatabases/Naperville.gdb/timestamps deleted file mode 100644 index f1c7f81..0000000 Binary files a/tests/test_data/geodatabases/Naperville.gdb/timestamps and /dev/null differ diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-0.pkl similarity index 99% rename from tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl rename to tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-0.pkl index 6ddbb98..fb951ef 100644 Binary files a/tests/test_data/geodatabases/baselines/naperville-gdf-0.pkl and b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-0.pkl differ diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-1.pkl similarity index 99% rename from tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl rename to tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-1.pkl index 8c2f489..58d5355 100644 Binary files a/tests/test_data/geodatabases/baselines/naperville-gdf-1.pkl and b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-1.pkl differ diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-2.pkl similarity index 99% rename from tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl rename to tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-2.pkl index a279306..9c4ed32 100644 Binary files a/tests/test_data/geodatabases/baselines/naperville-gdf-2.pkl and b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-2.pkl differ diff --git a/tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-3.pkl similarity index 96% rename from tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl rename to tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-3.pkl index aa72e75..3d9f1d9 100644 Binary files a/tests/test_data/geodatabases/baselines/naperville-gdf-3.pkl and b/tests/test_data/geodatabases/baselines/test_read_file_in_batches_gdb1/naperville-gdf-3.pkl differ diff --git a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.cpg b/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.cpg deleted file mode 100644 index cd89cb9..0000000 --- a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.cpg +++ /dev/null @@ -1 +0,0 @@ -ISO-8859-1 \ No newline at end of file diff --git a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.dbf b/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.dbf deleted file mode 100644 index e15b8df..0000000 Binary files a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.dbf and /dev/null differ diff --git a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.prj b/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.prj deleted file mode 100644 index 91504ca..0000000 --- a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.prj +++ /dev/null @@ -1 +0,0 @@ -PROJCS["NAD_1983_UTM_Zone_13N",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-105.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shp b/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shp deleted file mode 100644 index 2d2b3fd..0000000 Binary files a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shp and /dev/null differ diff --git a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shx b/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shx deleted file mode 100644 index 8d7259e..0000000 Binary files a/tests/test_data/shapefiles/NM911_Address_202310/NM911_Address_202310.shx and /dev/null differ diff --git a/tests/test_data/shapefiles/baselines/test_load_and_validate_testprovider3/NM911_Address_202310.pkl b/tests/test_data/shapefiles/baselines/test_load_and_validate_testprovider3/NM911_Address_202310.pkl new file mode 100644 index 0000000..3e879ab Binary files /dev/null and b/tests/test_data/shapefiles/baselines/test_load_and_validate_testprovider3/NM911_Address_202310.pkl differ diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-0.pkl similarity index 94% rename from tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-0.pkl index 938c3b1..f6bce82 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-0.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-0.pkl differ diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-1.pkl similarity index 94% rename from tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-1.pkl index 57e7522..00739be 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-1.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-1.pkl differ diff --git a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-2.pkl similarity index 90% rename from tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-2.pkl index e2b91a5..e34c2f5 100644 Binary files a/tests/test_data/shapefiles/baselines/usa-major-cities-gdf-2.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape1/usa-major-cities-gdf-2.pkl differ diff --git a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-0.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-0.pkl similarity index 78% rename from tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-0.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-0.pkl index d913feb..396f6b0 100644 Binary files a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-0.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-0.pkl differ diff --git a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-1.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-1.pkl similarity index 79% rename from tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-1.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-1.pkl index d99fce3..29f6313 100644 Binary files a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-1.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-1.pkl differ diff --git a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-2.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-2.pkl similarity index 79% rename from tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-2.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-2.pkl index c5632d5..cb59fdd 100644 Binary files a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-2.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-2.pkl differ diff --git a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-3.pkl b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-3.pkl similarity index 79% rename from tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-3.pkl rename to tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-3.pkl index a3e106e..074e8ac 100644 Binary files a/tests/test_data/shapefiles/baselines/NM911_Address_202310-gdf-3.pkl and b/tests/test_data/shapefiles/baselines/test_read_file_in_batches_shape2/NM911_Address_202310-gdf-3.pkl differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg deleted file mode 100644 index cd89cb9..0000000 --- a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.cpg +++ /dev/null @@ -1 +0,0 @@ -ISO-8859-1 \ No newline at end of file diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf deleted file mode 100644 index 26ff775..0000000 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.dbf and /dev/null differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj deleted file mode 100644 index f45cbad..0000000 --- a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.prj +++ /dev/null @@ -1 +0,0 @@ -GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp deleted file mode 100644 index bb96ae2..0000000 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shp and /dev/null differ diff --git a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx b/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx deleted file mode 100644 index 2968ef1..0000000 Binary files a/tests/test_data/shapefiles/usa-major-cities/usa-major-cities.shx and /dev/null differ