Skip to content

Commit

Permalink
try to remove in memory cache for httpclient
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 committed Dec 5, 2024
1 parent d357f67 commit 494f5c6
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 16 deletions.
37 changes: 22 additions & 15 deletions airbyte_cdk/sources/declarative/concurrent_declarative_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def __init__(
emit_connector_builder_messages=emit_connector_builder_messages,
disable_resumable_full_refresh=True,
)
self._config = config
self._concurrent_streams: Optional[List[AbstractStream]] = None
self._synchronous_streams: Optional[List[Stream]] = None

super().__init__(
source_config=source_config,
Expand All @@ -89,21 +92,6 @@ def __init__(

self._state = state

self._concurrent_streams: Optional[List[AbstractStream]]
self._synchronous_streams: Optional[List[Stream]]

# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
# for our future improvements to the CDK.
if config:
self._concurrent_streams, self._synchronous_streams = self._group_streams(
config=config or {}
)
else:
self._concurrent_streams = None
self._synchronous_streams = None

concurrency_level_from_manifest = self._source_config.get("concurrency_level")
if concurrency_level_from_manifest:
concurrency_level_component = self._constructor.create_component(
Expand Down Expand Up @@ -132,6 +120,19 @@ def __init__(
message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory
)

def _actually_group(self) -> None:
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
# for our future improvements to the CDK.
if self._config:
self._concurrent_streams, self._synchronous_streams = self._group_streams(
config=self._config or {}
)
else:
self._concurrent_streams = None
self._synchronous_streams = None

def read(
self,
logger: logging.Logger,
Expand All @@ -141,6 +142,9 @@ def read(
) -> Iterator[AirbyteMessage]:
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
# streams must be saved so that they can be removed from the catalog before starting synchronous streams
if self._concurrent_streams is None:
self._actually_group()

if self._concurrent_streams:
concurrent_stream_names = set(
[concurrent_stream.name for concurrent_stream in self._concurrent_streams]
Expand All @@ -166,6 +170,9 @@ def read(
yield from super().read(logger, config, filtered_catalog, state)

def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
if self._concurrent_streams is None:
self._actually_group()

concurrent_streams = self._concurrent_streams or []
synchronous_streams = self._synchronous_streams or []
return AirbyteCatalog(
Expand Down
1 change: 0 additions & 1 deletion airbyte_cdk/sources/streams/http/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def _request_session(self) -> requests.Session:
# Use in-memory cache if cache_dir is not set
# This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
if cache_dir:
self._logger.info(f"Using path {cache_dir} for HTTP cache") # TODO: remove
sqlite_path = str(Path(cache_dir) / self.cache_filename)
else:
self._logger.info("Using memory for cache") # TODO: remove
Expand Down

0 comments on commit 494f5c6

Please sign in to comment.