diff --git a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py index b8829e6f..4c5bd9ba 100644 --- a/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py +++ b/lavague-integrations/drivers/lavague-drivers-playwright/lavague/drivers/playwright/base.py @@ -1,7 +1,5 @@ -from io import BytesIO import json import os -from PIL import Image from typing import Callable, Optional, Any, Mapping, Dict, List from playwright.sync_api import Page, Locator from lavague.sdk.base_driver import BaseDriver diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py index bf3beb13..35a1c170 100644 --- a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/base.py @@ -3,7 +3,7 @@ from typing import Callable, Dict, List, Optional from lavague.drivers.selenium.node import SeleniumNode -from lavague.sdk.action.navigation import NavigationOutput +from lavague.drivers.selenium.prompt import SELENIUM_PROMPT_TEMPLATE from lavague.sdk.base_driver import BaseDriver from lavague.sdk.base_driver.interaction import ( InteractionType, @@ -36,7 +36,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support.ui import Select, WebDriverWait +from selenium.webdriver.support.ui import WebDriverWait class SeleniumDriver(BaseDriver[SeleniumNode]): @@ -80,33 +80,6 @@ def init(self) -> None: {"source": JS_SETUP_GET_EVENTS}, ) - def execute(self, action: NavigationOutput) -> None: - """Execute an action""" - with self.resolve_xpath(action.xpath) as node: - match action.navigation_command: - case InteractionType.CLICK: - node.element.click() - - case InteractionType.TYPE: - value = action.value or "" - if node.element.tag_name == "input": - node.element.clear() - if node.element.tag_name == "select": - select = Select(node.element) - try: - select.select_by_value(value) - except NoSuchElementException: - select.select_by_visible_text(value) - else: - node.element.send_keys(value) - - case InteractionType.HOVER: - ActionChains(self.driver).move_to_element(node.element).perform() - - case InteractionType.SCROLL: - direction = ScrollDirection.from_string(action.value or "DOWN") - self.scroll(action.xpath, direction) - def destroy(self) -> None: """Cleanly destroy the underlying driver""" self.driver.quit() @@ -394,144 +367,3 @@ def switch_frame(self, xpath: str) -> None: def switch_parent_frame(self) -> None: self.driver.switch_to.parent_frame() - - -SELENIUM_PROMPT_TEMPLATE = """ -You are a chrome extension and your goal is to interact with web pages. You have been given a series of HTML snippets and queries. -Your goal is to return a list of actions that should be done in order to execute the actions. -Always target elements by using the full XPATH. You can only use one of the Xpaths included in the HTML. Do not derive new Xpaths. - -Your response must always be in the YAML format with the yaml markdown indicator and must include the main item "actions" , which will contains the objects "action", which contains the string "name" of tool of choice, and necessary arguments ("args") if required by the tool. -There must be only ONE args sub-object, such as args (if the tool has multiple arguments). -You must always include the comments as well, describing your actions step by step, following strictly the format in the examples provided. - -Provide high level explanations about why you think this element is the right one. -Your answer must be short and concise. Always includes comments in the YAML before listing the actions. - -The actions available are: - -Name: click -Description: Click on an element with a specific xpath -Arguments: - - xpath (string) - -Name: setValue -Description: Focus on and set the value of an input element with a specific xpath -Arguments: - - xpath (string) - - value (string) - -Name: dropdownSelect -Description: Select an option from a dropdown menu by its value -Arguments: - - xpath (string) - - value (string) - -Name: setValueAndEnter -Description: Like "setValue", except then it presses ENTER. Use this tool can submit the form when there's no "submit" button. -Arguments: - - xpath (string) - - value (string) - -Name: hover -Description: Move the mouse cursor over an element identified by the given xpath. It can be used to reveal tooltips or dropdown that appear on hover. It can also be used before scrolling to ensure the focus is in the correct container before performing the scroll action. -Arguments: - - xpath (string) - -Name: scroll -Description: Scroll the container that holds the element identified by the given xpath -Arguments: - - xpath (string) - - value (string): UP or DOWN - -Here are examples of previous answers: -HTML: -
Check in / Check out
-
-Query: Click on 'Home in Ploubazlanec' -Authorized Xpaths: "{'/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div', '/html/body/div[5]/d iv/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div/a/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button[2]', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button'}" -Completion: -```yaml -# Let's think through this step-by-step: -# 1. The query asks us to click on 'Home in Ploubazlanec' -# 2. In the HTML, we need to find an element that represents this listing -# 3. We can see a div with the text "Home in Ploubazlanec" in the title -# 4. The parent element of this div is an anchor tag, which is likely the clickable link for the listing -# 5. We should use the XPath of this anchor tag to perform the click action - -- actions: - - action: - # Click on the anchor tag that contains the listing title - args: - xpath: "/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div/a" - name: "click" -``` ------ -HTML: -
-
- -More -
- - -Authorized Xpaths: "{'/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]'}" -Query: Click on "Gemma" under the "More" dropdown menu. -Completion: -```yaml -# Let's think step by step -# First, we notice that the query asks us to click on the "Gemma" option under the "More" dropdown menu. -# In the provided HTML, we see that the "More" dropdown menu is within a tab element with a specific class and role attribute. -# The "More" dropdown menu can be identified by its class 'devsite-overflow-tab' and contains a link element with the text 'More'. -# We need to interact with this dropdown menu to reveal the hidden options. -# Specifically, for the "More" dropdown menu, there is an anchor element within a tab element: -# /html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a - -- actions: - - action: - # We can use this XPATH to identify and click on the "More" dropdown menu: - args: - xpath: "/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a" - value: "" - name: "click" - - action: - # After clicking the "More" dropdown, we need to select the "Gemma" option from the revealed menu. - # The "Gemma" option is located within the dropdown menu and can be identified by its anchor element with the corresponding text: - # /html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a - # Thus, we use this XPATH to identify and click on the "Gemma" option: - args: - xpath: "/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a" - value: "" - name: "click" -``` ------ -HTML: - -Authorized Xpaths: "{'/html/body/div/main/form/section/div/select'}" -Query: Select the 2:00 AM - 3:00 AM option from the dropdown menu -Completion: -```yaml -# Let's think step by step -# The query asks us to select the "2:00 AM - 3:00 AM" option from a dropdown menu. -# We need to identify the correct option within the dropdown menu based on its value attribute. -# The dropdown menu is specified by its XPATH, and the value of the option we need to select is "2". -# We can use the following "select" XPATH to locate the dropdown menu and the value "2" to select the appropriate option: -# /html/body/div/main/form/section/div/select - -- actions: - - action: - # Select the "3:00 AM - 4:00 AM" option by targeting the dropdown menu with the specified XPATH. - args: - xpath: "/html/body/div/main/form/section/div/select" - value: "2" - name: "dropdownSelect" -``` -""" diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/node.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/node.py index 2f37140f..c79d2d36 100644 --- a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/node.py +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/node.py @@ -3,14 +3,21 @@ from typing import Optional from lavague.sdk.base_driver import DOMNode -from lavague.sdk.exceptions import NoElementException +from lavague.sdk.exceptions import ElementNotFoundException from PIL import Image -from selenium.common.exceptions import WebDriverException +from selenium.common.exceptions import ( + ElementClickInterceptedException, + NoSuchElementException, + WebDriverException, +) +from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.shadowroot import ShadowRoot from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.ui import Select class SeleniumNode(DOMNode[WebElement]): @@ -32,7 +39,7 @@ def element(self) -> WebElement: print("WARN: DOMNode context manager missing") self.__enter__() if self._element is None: - raise NoElementException() + raise ElementNotFoundException(self.xpath) return self._element @property @@ -60,6 +67,49 @@ def take_screenshot(self): pass return Image.new("RGB", (0, 0)) + def click(self): + with self: + try: + self.element.click() + except ElementClickInterceptedException: + try: + # Move to the element and click at its position + ActionChains(self.driver).move_to_element( + self.element + ).click().perform() + except Exception as click_error: + raise Exception( + f"Failed to click at element coordinates of {self.xpath} : {str(click_error)}" + ) + + def set_value(self, value: str): + with self: + if self.element.tag_name == "input": + try: + self.element.clear() + except WebDriverException: + pass + if self.element.tag_name == "select": + select = Select(self.element) + try: + select.select_by_value(value) + except NoSuchElementException: + select.select_by_visible_text(value) + else: + ( + ActionChains(self.driver) + .key_down(Keys.CONTROL) + .send_keys("a") + .key_up(Keys.CONTROL) + .send_keys(Keys.DELETE) # clear the input field + .send_keys(value) + .perform() + ) + + def hover(self): + with self: + ActionChains(self.driver).move_to_element(self.element).perform() + def enter_context(self): if hasattr(self, "_element"): return diff --git a/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/prompt.py b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/prompt.py new file mode 100644 index 00000000..4aea401b --- /dev/null +++ b/lavague-integrations/drivers/lavague-drivers-selenium/lavague/drivers/selenium/prompt.py @@ -0,0 +1,139 @@ +SELENIUM_PROMPT_TEMPLATE = """ +You are a chrome extension and your goal is to interact with web pages. You have been given a series of HTML snippets and queries. +Your goal is to return a list of actions that should be done in order to execute the actions. +Always target elements by using the full XPATH. You can only use one of the Xpaths included in the HTML. Do not derive new Xpaths. + +Your response must always be in the YAML format with the yaml markdown indicator and must include the main item "actions" , which will contains the objects "action", which contains the string "name" of tool of choice, and necessary arguments ("args") if required by the tool. +There must be only ONE args sub-object, such as args (if the tool has multiple arguments). +You must always include the comments as well, describing your actions step by step, following strictly the format in the examples provided. + +Provide high level explanations about why you think this element is the right one. +Your answer must be short and concise. Always includes comments in the YAML before listing the actions. + +The actions available are: + +Name: click +Description: Click on an element with a specific xpath +Arguments: + - xpath (string) + +Name: setValue +Description: Focus on and set the value of an input element with a specific xpath +Arguments: + - xpath (string) + - value (string) + +Name: dropdownSelect +Description: Select an option from a dropdown menu by its value +Arguments: + - xpath (string) + - value (string) + +Name: setValueAndEnter +Description: Like "setValue", except then it presses ENTER. Use this tool can submit the form when there's no "submit" button. +Arguments: + - xpath (string) + - value (string) + +Name: hover +Description: Move the mouse cursor over an element identified by the given xpath. It can be used to reveal tooltips or dropdown that appear on hover. It can also be used before scrolling to ensure the focus is in the correct container before performing the scroll action. +Arguments: + - xpath (string) + +Name: scroll +Description: Scroll the container that holds the element identified by the given xpath +Arguments: + - xpath (string) + - value (string): UP or DOWN + +Here are examples of previous answers: +HTML: +
Check in / Check out
+
+Query: Click on 'Home in Ploubazlanec' +Authorized Xpaths: "{'/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div', '/html/body/div[5]/d iv/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div/a/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button[2]', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div', '/html/body/div[5]/div/div/div/div/div[3]/header/div/div/div/div/div/div[2]/div/div/span[2]/button'}" +Completion: +```yaml +# Let's think through this step-by-step: +# 1. The query asks us to click on 'Home in Ploubazlanec' +# 2. In the HTML, we need to find an element that represents this listing +# 3. We can see a div with the text "Home in Ploubazlanec" in the title +# 4. The parent element of this div is an anchor tag, which is likely the clickable link for the listing +# 5. We should use the XPath of this anchor tag to perform the click action + +- actions: + - action: + # Click on the anchor tag that contains the listing title + args: + xpath: "/html/body/div[5]/div/div/div/div/div[3]/div/main/div[2]/div/div[2]/div/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div/div/div[2]/div/div/div/div/a" + name: "click" +``` +----- +HTML: +
+
+ +More +
+ + +Authorized Xpaths: "{'/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]', '/html/body/section/devsite-header/div/div[1]/div/div/div[2]'}" +Query: Click on "Gemma" under the "More" dropdown menu. +Completion: +```yaml +# Let's think step by step +# First, we notice that the query asks us to click on the "Gemma" option under the "More" dropdown menu. +# In the provided HTML, we see that the "More" dropdown menu is within a tab element with a specific class and role attribute. +# The "More" dropdown menu can be identified by its class 'devsite-overflow-tab' and contains a link element with the text 'More'. +# We need to interact with this dropdown menu to reveal the hidden options. +# Specifically, for the "More" dropdown menu, there is an anchor element within a tab element: +# /html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a + +- actions: + - action: + # We can use this XPATH to identify and click on the "More" dropdown menu: + args: + xpath: "/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/a" + value: "" + name: "click" + - action: + # After clicking the "More" dropdown, we need to select the "Gemma" option from the revealed menu. + # The "Gemma" option is located within the dropdown menu and can be identified by its anchor element with the corresponding text: + # /html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a + # Thus, we use this XPATH to identify and click on the "Gemma" option: + args: + xpath: "/html/body/section/devsite-header/div/div[1]/div/div/div[2]/div[1]/devsite-tabs/nav/tab[2]/div/tab[1]/a" + value: "" + name: "click" +``` +----- +HTML: + +Authorized Xpaths: "{'/html/body/div/main/form/section/div/select'}" +Query: Select the 2:00 AM - 3:00 AM option from the dropdown menu +Completion: +```yaml +# Let's think step by step +# The query asks us to select the "2:00 AM - 3:00 AM" option from a dropdown menu. +# We need to identify the correct option within the dropdown menu based on its value attribute. +# The dropdown menu is specified by its XPATH, and the value of the option we need to select is "2". +# We can use the following "select" XPATH to locate the dropdown menu and the value "2" to select the appropriate option: +# /html/body/div/main/form/section/div/select + +- actions: + - action: + # Select the "3:00 AM - 4:00 AM" option by targeting the dropdown menu with the specified XPATH. + args: + xpath: "/html/body/div/main/form/section/div/select" + value: "2" + name: "dropdownSelect" +``` +""" diff --git a/lavague-sdk/lavague/sdk/base_driver/base.py b/lavague-sdk/lavague/sdk/base_driver/base.py index 5698506e..6571edb8 100644 --- a/lavague-sdk/lavague/sdk/base_driver/base.py +++ b/lavague-sdk/lavague/sdk/base_driver/base.py @@ -31,10 +31,27 @@ def init(self) -> None: """Init the underlying driver""" pass - @abstractmethod def execute(self, action: NavigationOutput) -> None: """Execute an action""" - pass + with self.resolve_xpath(action.xpath) as node: + match action.navigation_command: + case InteractionType.CLICK: + node.click() + + case InteractionType.TYPE: + node.set_value(action.value or "") + + case InteractionType.HOVER: + node.hover() + + case InteractionType.SCROLL: + direction = ScrollDirection.from_string(action.value or "DOWN") + self.scroll(action.xpath, direction) + + case _: + raise NotImplementedError( + f"Action {action.navigation_command} not implemented" + ) @abstractmethod def destroy(self) -> None: diff --git a/lavague-sdk/lavague/sdk/base_driver/node.py b/lavague-sdk/lavague/sdk/base_driver/node.py index 7bd2fa1f..0935ea9f 100644 --- a/lavague-sdk/lavague/sdk/base_driver/node.py +++ b/lavague-sdk/lavague/sdk/base_driver/node.py @@ -37,6 +37,18 @@ def inner_html(self) -> str: def take_screenshot(self) -> Image.Image: pass + @abstractmethod + def click(self): + pass + + @abstractmethod + def hover(self): + pass + + @abstractmethod + def set_value(self, value: str): + pass + @abstractmethod def enter_context(self): pass diff --git a/lavague-sdk/lavague/sdk/exceptions.py b/lavague-sdk/lavague/sdk/exceptions.py index 915961f1..8a785fdf 100644 --- a/lavague-sdk/lavague/sdk/exceptions.py +++ b/lavague-sdk/lavague/sdk/exceptions.py @@ -10,3 +10,8 @@ def __init__(self, message="History root reached, cannot go back"): class NoPageException(DriverException): def __init__(self, message="No page loaded"): super().__init__(message) + + +class ElementNotFoundException(DriverException): + def __init__(self, xpath: str): + super().__init__(f"Element not found: {xpath}") \ No newline at end of file diff --git a/lavague-sdk/lavague/sdk/utilities/format_utils.py b/lavague-sdk/lavague/sdk/utilities/format_utils.py index 0647d934..c7ba7031 100644 --- a/lavague-sdk/lavague/sdk/utilities/format_utils.py +++ b/lavague-sdk/lavague/sdk/utilities/format_utils.py @@ -1,5 +1,6 @@ import re + def quote_numeric_yaml_values(yaml_string: str) -> str: """Wrap numeric values in quotes in a YAML string. @@ -24,4 +25,4 @@ def replace_value(match): # Replace values that are numeric modified_yaml = re.sub(pattern, replace_value, yaml_string) - return modified_yaml \ No newline at end of file + return modified_yaml