From da3b321cc4906904f1cc1feac3c08a135cf20e28 Mon Sep 17 00:00:00 2001 From: AndrewKorzh <92707967+AndrewKorzh@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:05:27 +0300 Subject: [PATCH] add-FormAction --- scrapypuppeteer/actions.py | 12 +++++++ .../browser_managers/local_browser_manager.py | 34 ++++++++++++++++--- .../service_browser_manager.py | 3 +- scrapypuppeteer/middleware.py | 1 - setup.py | 2 +- 5 files changed, 45 insertions(+), 7 deletions(-) diff --git a/scrapypuppeteer/actions.py b/scrapypuppeteer/actions.py index 369039b..a285400 100644 --- a/scrapypuppeteer/actions.py +++ b/scrapypuppeteer/actions.py @@ -230,6 +230,18 @@ class Har(PuppeteerServiceAction): def payload(self): return {} + + +class FormAction(PuppeteerServiceAction): + endpoint = "form_action" + + def __init__(self, input_mapping: dict, submit_button: str = None): + self.input_mapping = input_mapping + self.submit_button = submit_button + + def payload(self): + return {"inputMapping": self.input_mapping, "submitButton": self.submit_button} + diff --git a/scrapypuppeteer/browser_managers/local_browser_manager.py b/scrapypuppeteer/browser_managers/local_browser_manager.py index 55d727f..131c2ae 100644 --- a/scrapypuppeteer/browser_managers/local_browser_manager.py +++ b/scrapypuppeteer/browser_managers/local_browser_manager.py @@ -71,7 +71,8 @@ def __init__(self): "screenshot": self.screenshot, "action": self.action, "recaptcha_solver": self.recaptcha_solver, - "har": self.har + "har": self.har, + "form_action": self.form_action } def process_request(self, request): @@ -243,6 +244,34 @@ async def async_scroll(): return syncer.sync(async_scroll()) + + def form_action(self, request: PuppeteerRequest): + context_id, page_id = syncer.sync(self.context_manager.check_context_and_page(request.context_id, request.page_id)) + page = self.context_manager.get_page_by_id(context_id, page_id) + + async def async_form_action(): + input_mapping = request.action.payload().get("inputMapping") + submit_button = request.action.payload().get("submitButton", None) + cookies = request.cookies + + for selector, params in input_mapping.items(): + value = params.get("value", "no value was provided") + delay = params.get("delay", 0) + await page.type(selector, value, {"delay": delay}) + + if submit_button: + await page.click(submit_button) + + response_html = await page.content() + return PuppeteerHtmlResponse(request.url, + request, + context_id = context_id, + page_id = page_id, + html = response_html, + cookies=cookies) + + return syncer.sync(async_form_action()) + def action(self, request: PuppeteerRequest): raise ValueError("CustomJsAction is not available in local mode") @@ -252,6 +281,3 @@ def recaptcha_solver(self, request: PuppeteerRequest): def har(self, request: PuppeteerRequest): raise ValueError("Har is not available in local mode") - - - diff --git a/scrapypuppeteer/browser_managers/service_browser_manager.py b/scrapypuppeteer/browser_managers/service_browser_manager.py index 2e7e488..3c0459c 100644 --- a/scrapypuppeteer/browser_managers/service_browser_manager.py +++ b/scrapypuppeteer/browser_managers/service_browser_manager.py @@ -23,6 +23,7 @@ Scroll, CustomJsAction, Har, + FormAction ) from scrapypuppeteer.response import ( PuppeteerResponse, @@ -210,7 +211,7 @@ def _form_response( @staticmethod def _get_response_class(request_action): - if isinstance(request_action, (GoTo, GoForward, GoBack, Click, Scroll)): + if isinstance(request_action, (GoTo, GoForward, GoBack, Click, Scroll, FormAction)): return PuppeteerHtmlResponse if isinstance(request_action, Screenshot): return PuppeteerScreenshotResponse diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index 8c48371..88587ff 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -31,7 +31,6 @@ PuppeteerJsonResponse, ) from scrapypuppeteer.request import ActionRequest, PuppeteerRequest, CloseContextRequest -# from scrapypuppeteer.browser_managers.local_browser_manager import LocalBrowserManager from scrapypuppeteer.browser_managers.service_browser_manager import ServiceBrowserManager diff --git a/setup.py b/setup.py index f0e383d..fb96091 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def read_long_description(file_path): setup( name="scrapy-puppeteer-client", - version="0.3.4", + version="0.3.5", description="A library to use Puppeteer-managed browser in Scrapy spiders", long_description=read_long_description("README.md"), long_description_content_type="text/markdown",