diff --git a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py index b8829e6f..4c5bd9ba 100644 --- a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py +++ b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py @@ -1,7 +1,5 @@ -from io import BytesIO import json import os -from PIL import Image from typing import Callable, Optional, Any, Mapping, Dict, List from playwright.sync_api import Page, Locator from lavague.sdk.base_driver import BaseDriver diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py index bf3beb13..35a1c170 100644 --- a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py @@ -3,7 +3,7 @@ from typing import Callable, Dict, List, Optional from lavague.drivers.selenium.node import SeleniumNode -from lavague.sdk.action.navigation import NavigationOutput +from lavague.drivers.selenium.prompt import SELENIUM_PROMPT_TEMPLATE from lavague.sdk.base_driver import BaseDriver from lavague.sdk.base_driver.interaction import ( InteractionType, @@ -36,7 +36,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.ui import Select, WebDriverWait +from selenium.webdriver.support.ui import WebDriverWait class SeleniumDriver(BaseDriver[SeleniumNode]): @@ -80,33 +80,6 @@ def init(self) -> None: {"source": JS_SETUP_GET_EVENTS}, ) - def execute(self, action: NavigationOutput) -> None: - """Execute an action""" - with self.resolve_xpath(action.xpath) as node: - match action.navigation_command: - case InteractionType.CLICK: - node.element.click() - - case InteractionType.TYPE: - value = action.value or "" - if node.element.tag_name == "input": - node.element.clear() - if node.element.tag_name == "select": - select = Select(node.element) - try: - select.select_by_value(value) - except NoSuchElementException: - select.select_by_visible_text(value) - else: - node.element.send_keys(value) - - case InteractionType.HOVER: - ActionChains(self.driver).move_to_element(node.element).perform() - - case InteractionType.SCROLL: - direction = ScrollDirection.from_string(action.value or "DOWN") - self.scroll(action.xpath, direction) - def destroy(self) -> None: """Cleanly destroy the underlying driver""" self.driver.quit() @@ -394,144 +367,3 @@ def switch_frame(self, xpath: str) -> None: def switch_parent_frame(self) -> None: self.driver.switch_to.parent_frame() - - -SELENIUM_PROMPT_TEMPLATE = """ -You are a chrome extension and your goal is to interact with web pages. You have been given a series of HTML snippets and queries. -Your goal is to return a list of actions that should be done in order to execute the actions. -Always target elements by using the full XPATH. You can only use one of the Xpaths included in the HTML. Do not derive new Xpaths. - -Your response must always be in the YAML format with the yaml markdown indicator and must include the main item "actions" , which will contains the objects "action", which contains the string "name" of tool of choice, and necessary arguments ("args") if required by the tool. -There must be only ONE args sub-object, such as args (if the tool has multiple arguments). -You must always include the comments as well, describing your actions step by step, following strictly the format in the examples provided. - -Provide high level explanations about why you think this element is the right one. -Your answer must be short and concise. Always includes comments in the YAML before listing the actions. - -The actions available are: - -Name: click -Description: Click on an element with a specific xpath -Arguments: - - xpath (string) - -Name: setValue -Description: Focus on and set the value of an input element with a specific xpath -Arguments: - - xpath (string) - - value (string) - -Name: dropdownSelect -Description: Select an option from a dropdown menu by its value -Arguments: - - xpath (string) - - value (string) - -Name: setValueAndEnter -Description: Like "setValue", except then it presses ENTER. Use this tool can submit the form when there's no "submit" button. -Arguments: - - xpath (string) - - value (string) - -Name: hover -Description: Move the mouse cursor over an element identified by the given xpath. It can be used to reveal tooltips or dropdown that appear on hover. It can also be used before scrolling to ensure the focus is in the correct container before performing the scroll action. -Arguments: - - xpath (string) - -Name: scroll -Description: Scroll the container that holds the element identified by the given xpath -Arguments: - - xpath (string) - - value (string): UP or DOWN - -Here are examples of previous answers: -HTML: -