Skip to content

Commit

Permalink
Try fixing sqlite errors
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 committed Dec 5, 2024
1 parent 50d6f21 commit d357f67
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@


class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
# By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
SINGLE_THREADED_CONCURRENCY_LEVEL = 1
# By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
# because it has hit the limit of futures but not partition reader is consuming them.
SINGLE_THREADED_CONCURRENCY_LEVEL = 2

def __init__(
self,
Expand Down Expand Up @@ -121,7 +122,7 @@ def __init__(
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
else:
concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL // 2

self._concurrent_source = ConcurrentSource.create(
num_workers=concurrency_level,
Expand Down
13 changes: 9 additions & 4 deletions airbyte_cdk/sources/streams/http/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(
):
self._name = name
self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
self._logger = logger
if session:
self._session = session
else:
Expand All @@ -108,7 +109,6 @@ def __init__(
)
if isinstance(authenticator, AuthBase):
self._session.auth = authenticator
self._logger = logger
self._error_handler = error_handler or HttpStatusErrorHandler(self._logger)
if backoff_strategy is not None:
if isinstance(backoff_strategy, list):
Expand Down Expand Up @@ -140,10 +140,12 @@ def _request_session(self) -> requests.Session:
# Use in-memory cache if cache_dir is not set
# This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
if cache_dir:
self._logger.info(f"Using path {cache_dir} for HTTP cache") # TODO: remove
sqlite_path = str(Path(cache_dir) / self.cache_filename)
else:
self._logger.info("Using memory for cache") # TODO: remove
sqlite_path = "file::memory:?cache=shared"
backend = SkipFailureSQLiteCache(sqlite_path)
backend = SkipFailureSQLiteCache(self._name, sqlite_path) # TODO maybe add a busy timeout
return CachedLimiterSession(
sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
) # type: ignore # there are no typeshed stubs for requests_cache
Expand Down Expand Up @@ -541,18 +543,21 @@ def _write(self, key: str, value: str) -> None:
class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
def __init__( # type: ignore # ignoring as lib is not typed
self,
table_name="response",
db_path="http_cache",
serializer=None,
**kwargs,
) -> None:
super().__init__(db_path, serializer, **kwargs)
skwargs = {"serializer": serializer, **kwargs} if serializer else kwargs
self.responses: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
db_path, table_name="responses", **skwargs
db_path, table_name=table_name, fast_save=True, wal=True, **skwargs
)
self.redirects: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
db_path,
table_name="redirects",
table_name=f"redirects_{table_name}",
fast_save=True,
wal=True,
lock=self.responses._lock,
serializer=None,
**kwargs,
Expand Down

0 comments on commit d357f67

Please sign in to comment.