Skip to content

Commit

Permalink
Middleware is done
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewZMSU committed May 29, 2024
1 parent afc5af2 commit bc2f698
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def __init__(self, restoring_length: int, n_retry_restoring: int):
self.restoring_length = restoring_length
self.n_retry_restoring = n_retry_restoring
self.context_requests = {}
self.context_counters = {}
self.context_length = {}

@classmethod
def from_crawler(cls, crawler: Crawler):
Expand Down Expand Up @@ -450,9 +450,9 @@ def process_response(self, request: Request, response, spider):
if isinstance(response, PuppeteerResponse):
if request_binding:
self._bind_context(request, response)
if response.context_id in self.context_counters:
if response.context_id in self.context_length:
# Update number of actions in context
self.context_counters[response.context_id] += 1
self.context_length[response.context_id] += 1
elif puppeteer_request is not None and response.status == HTTPStatus.UNPROCESSABLE_ENTITY:
# One PuppeteerRequest has failed with 422 error
if request_binding:
Expand All @@ -472,29 +472,29 @@ def _bind_context(self, request, response):
restoring_request.meta['__restore_count'] = restoring_request.meta.get('__restore_count', 0)
restoring_request.meta['__context_id'] = response.context_id
self.context_requests[response.context_id] = restoring_request
self.context_counters[response.context_id] = 0
self.context_length[response.context_id] = 0

def _restore_context(self, response):
context_id = json.loads(response.text).get('contextId', None)

if context_id in self.context_requests:
restoring_request = self.context_requests[context_id]

if self.context_counters[context_id] > self.restoring_length: # TODO: not informative variables
if self.context_length[context_id] >= self.restoring_length + 1:
# Too many actions in context
self.__delete_context(context_id, "TOO MANY ACTIONS IN CONTEXT")
elif restoring_request.meta['__restore_count'] >= self.n_retry_restoring: # TODO: try to fix the > and >= (why not the same???)
elif restoring_request.meta['__restore_count'] >= self.n_retry_restoring:
# Too many retries
self.__delete_context(context_id, "TOO MANY RETRIES")
else:
# Restoring
restoring_request.meta['__restore_count'] += 1
print(f"Restoring the request {restoring_request}") # TODO: to make logging
self.context_counters[context_id] = 1
self.context_length[context_id] = 1
return restoring_request
return response

def __delete_context(self, context_id: str, reason: str):
del self.context_counters[context_id]
del self.context_length[context_id]
del self.context_requests[context_id]
print(reason)

0 comments on commit bc2f698

Please sign in to comment.