diff --git a/airbyte_cdk/sources/file_based/exceptions.py b/airbyte_cdk/sources/file_based/exceptions.py index 1a879dda1..832ce06fb 100644 --- a/airbyte_cdk/sources/file_based/exceptions.py +++ b/airbyte_cdk/sources/file_based/exceptions.py @@ -129,7 +129,9 @@ class FileSizeLimitError(CustomFileBasedException): pass -def format_duplicate_files_error_message(stream_name: str, duplicated_files_names: List): +def format_duplicate_files_error_message( + stream_name: str, duplicated_files_names: List[dict[str, List[str]]] +) -> str: duplicated_files_messages = [] for duplicated_file in duplicated_files_names: for duplicated_file_name, file_paths in duplicated_file.items(): diff --git a/airbyte_cdk/sources/file_based/file_based_source.py b/airbyte_cdk/sources/file_based/file_based_source.py index b1cc11587..e63d1b87e 100644 --- a/airbyte_cdk/sources/file_based/file_based_source.py +++ b/airbyte_cdk/sources/file_based/file_based_source.py @@ -390,10 +390,12 @@ def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool: return use_file_transfer @staticmethod - def _preserve_subdirectories_directories(parsed_config: AbstractFileBasedSpec): + def _preserve_subdirectories_directories(parsed_config: AbstractFileBasedSpec) -> bool: # fall back to preserve subdirectories if config is not present or incomplete - if hasattr(parsed_config, "delivery_options") and hasattr( - parsed_config.delivery_options, "preserve_subdirectories_directories" + if ( + hasattr(parsed_config, "delivery_options") + and parsed_config.delivery_options is not None + and hasattr(parsed_config.delivery_options, "preserve_subdirectories_directories") ): return parsed_config.delivery_options.preserve_subdirectories_directories return True diff --git a/airbyte_cdk/sources/file_based/file_based_stream_reader.py b/airbyte_cdk/sources/file_based/file_based_stream_reader.py index 4c58d850f..babac114b 100644 --- a/airbyte_cdk/sources/file_based/file_based_stream_reader.py +++ b/airbyte_cdk/sources/file_based/file_based_stream_reader.py @@ -140,6 +140,7 @@ def preserve_subdirectories_directories(self) -> bool: if ( self.config and hasattr(self.config, "delivery_options") + and self.config.delivery_options is not None and hasattr(self.config.delivery_options, "preserve_subdirectories_directories") ): return self.config.delivery_options.preserve_subdirectories_directories diff --git a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 6c43237db..babb2825c 100644 --- a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -108,7 +108,9 @@ def _filter_schema_invalid_properties( else: return super()._filter_schema_invalid_properties(configured_catalog_json_schema) - def _duplicated_files_names(self, slices: List) -> list[dict]: + def _duplicated_files_names( + self, slices: List[dict[str, List[RemoteFile]]] + ) -> List[dict[str, List[str]]]: seen_file_names = set() duplicates_file_names = set() file_paths = defaultdict(list)