From cc0590d2235fb89592e241919f0c950ea1fcf611 Mon Sep 17 00:00:00 2001 From: matthew Date: Fri, 12 Jul 2024 12:54:11 +0300 Subject: [PATCH] Fixed not closing page after some actions with activated Recaptcha middleware --- scrapypuppeteer/middleware.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index bd77fb2..6907ae0 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -98,7 +98,7 @@ def from_crawler(cls, crawler): ) return middleware - def process_request(self, request, spider): + def process_request(self, request, **_): if isinstance(request, CloseContextRequest): return self.process_close_context_request(request) @@ -343,19 +343,27 @@ def from_crawler(cls, crawler: Crawler): ) return cls(recaptcha_solving, submit_selectors) - def process_request(self, request, spider): + @staticmethod + def is_recaptcha_producing_action(action) -> bool: + return not isinstance( + action, + (Screenshot, Scroll, CustomJsAction, RecaptchaSolver), + ) + + def process_request(self, request, **_): if request.meta.get("dont_recaptcha", False): return None + # Checking if we need to close page after action if isinstance(request, PuppeteerRequest): - if request.close_page and not request.meta.get( - "_captcha_submission", False - ): - request.close_page = False - request.dont_filter = True - self._page_closing.add(request) - return request - return None + if self.is_recaptcha_producing_action(request.action): + if request.close_page and not request.meta.get( + "_captcha_submission", False + ): + request.close_page = False + request.dont_filter = True + self._page_closing.add(request) + return request def process_response(self, request, response, spider): if not isinstance( @@ -376,10 +384,7 @@ def process_response(self, request, response, spider): # RECaptchaSolver was called by recaptcha middleware return self._submit_recaptcha(request, response, spider) - if isinstance( - puppeteer_request.action, - (Screenshot, Scroll, CustomJsAction, RecaptchaSolver), - ): + if not self.is_recaptcha_producing_action(puppeteer_request.action): # No recaptcha after these actions return response