add llama index skyvern client

Skyvern-AI · Feb 21, 2025 · 58fa0cc · 58fa0cc
1 parent eec5ede
commit 58fa0cc
Show file tree

Hide file tree

Showing 4 changed files with 336 additions and 2 deletions.
diff --git a/integrations/langchain/README.md b/integrations/langchain/README.md
@@ -37,6 +37,7 @@ from langchain_openai import ChatOpenAI
 from langchain.agents import initialize_agent, AgentType
 from skyvern_langchain.agent import run_task_v2
 
+# load OpenAI API key from .env
 load_dotenv()
 
 llm = ChatOpenAI(model="gpt-4o", temperature=0)
@@ -72,6 +73,7 @@ from skyvern_langchain.agent import queue_task_v2, get_task_v2
 
 from langchain_community.tools.sleep.tool import SleepTool
 
+# load OpenAI API key from .env
 load_dotenv()
 
 llm = ChatOpenAI(model="gpt-4o", temperature=0)
@@ -110,6 +112,7 @@ from langchain_openai import ChatOpenAI
 from langchain.agents import initialize_agent, AgentType
 from skyvern_langchain.client import RunSkyvernClientTaskV2Tool
 
+# load OpenAI API key from .env
 load_dotenv()
 
 llm = ChatOpenAI(model="gpt-4o", temperature=0)
@@ -150,6 +153,7 @@ from skyvern_langchain.client import (
 
 from langchain_community.tools.sleep.tool import SleepTool
 
+# load OpenAI API key from .env
 load_dotenv()
 
 llm = ChatOpenAI(model="gpt-4o", temperature=0)

diff --git a/integrations/llama_index/README.md b/integrations/llama_index/README.md
@@ -0,0 +1,184 @@
+<!-- START doctoc generated TOC please keep comment here to allow auto update -->
+<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
+**Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
+
+- [Skyvern LlamaIndex](#skyvern-llamaindex)
+  - [Installation](#installation)
+  - [Usage](#usage)
+    - [Run a task(sync) with skyvern agent (calling skyvern agent function directly in the tool)](#run-a-tasksync-with-skyvern-agent-calling-skyvern-agent-function-directly-in-the-tool)
+    - [Run a task(async) with skyvern agent (calling skyvern agent function directly in the tool)](#run-a-taskasync-with-skyvern-agent-calling-skyvern-agent-function-directly-in-the-tool)
+    - [Run a task(sync) with skyvern client (calling skyvern OpenAPI in the tool)](#run-a-tasksync-with-skyvern-client-calling-skyvern-openapi-in-the-tool)
+    - [Run a task(async) with skyvern client (calling skyvern OpenAPI in the tool)](#run-a-taskasync-with-skyvern-client-calling-skyvern-openapi-in-the-tool)
+
+<!-- END doctoc generated TOC please keep comment here to allow auto update -->
+
+# Skyvern LlamaIndex
+
+This is a LlamaIndex integration for Skyvern.
+
+## Installation
+
+```bash
+pip install skyvern-llamaindex
+```
+
+## Usage
+
+### Run a task(sync) with skyvern agent (calling skyvern agent function directly in the tool)
+> sync task won't return until the task is finished.
+
+:warning: :warning: if you want to run this code block, you need to run `skyvern init --openai-api-key <your_openai_api_key>` command in your terminal to set up skyvern first.
+
+
+```python
+import asyncio
+from dotenv import load_dotenv
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.llms.openai import OpenAI
+from skyvern_llamaindex.agent import SkyvernAgentToolSpec
+
+# load OpenAI API key from .env
+load_dotenv()
+
+skyvern_tool = SkyvernAgentToolSpec()
+
+tools = skyvern_tool.to_tool_list(["run_task_v2"])
+
+agent = OpenAIAgent.from_tools(
+    tools=tools,
+    llm=OpenAI(model="gpt-4o"),
+    verbose=True,
+    max_function_calls=10,
+)
+
+# to run skyvern agent locally, must run `skyvern init` first
+response = agent.chat("Run the task with skyvern. The task is about 'Navigate to the Hacker News homepage and get the top 3 posts.'")
+print(response)
+```
+
+### Run a task(async) with skyvern agent (calling skyvern agent function directly in the tool)
+> async task will return immediately and the task will be running in the background. You can use `get_task_v2` tool to poll the task information until the task is finished.
+
+:warning: :warning: if you want to run this code block, you need to run `skyvern init --openai-api-key <your_openai_api_key>` command in your terminal to set up skyvern first.
+
+```python
+import asyncio
+from dotenv import load_dotenv
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.llms.openai import OpenAI
+from llama_index.core.tools import FunctionTool
+from skyvern_llamaindex.agent import SkyvernAgentToolSpec
+
+async def sleep(seconds: int) -> str:
+    await asyncio.sleep(seconds)
+    return f"Slept for {seconds} seconds"
+
+# load OpenAI API key from .env
+load_dotenv()
+
+skyvern_tool = SkyvernAgentToolSpec()
+
+sleep_tool = FunctionTool.from_defaults(
+    async_fn=sleep,
+    description="Sleep for a given number of seconds",
+    name="sleep",
+)
+
+tools = skyvern_tool.to_tool_list(["queue_task_v2", "get_task_v2"])
+tools.append(sleep_tool)
+
+agent = OpenAIAgent.from_tools(
+    tools=tools,
+    llm=OpenAI(model="gpt-4o"),
+    verbose=True,
+    max_function_calls=10,
+)
+
+response = agent.chat("Queue a task with Skyvern. The task is about 'Navigate to the Hacker News homepage and get the top 3 posts.' Then, get this task information until it's completed. The task information re-get interval should be 60s.")
+print(response)
+
+```
+
+### Run a task(sync) with skyvern client (calling skyvern OpenAPI in the tool)
+> sync task won't return until the task is finished.
+
+no need to run `skyvern init` command in your terminal to set up skyvern before using this integration.
+
+```python
+import asyncio
+from dotenv import load_dotenv
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.llms.openai import OpenAI
+from skyvern_llamaindex.client import SkyvernClientToolSpec
+
+
+async def sleep(seconds: int) -> str:
+    await asyncio.sleep(seconds)
+    return f"Slept for {seconds} seconds"
+
+# load OpenAI API key from .env
+load_dotenv()
+
+skyvern_client_tool = SkyvernClientToolSpec(
+    credential="<your_organization_api_key>",
+)
+
+tools = skyvern_client_tool.to_tool_list(["run_task_v2"])
+
+agent = OpenAIAgent.from_tools(
+    tools=tools,
+    llm=OpenAI(model="gpt-4o"),
+    verbose=True,
+    max_function_calls=10,
+)
+
+response = agent.chat("Run the task with skyvern. The task is about 'Navigate to the Hacker News homepage and get the top 3 posts.'")
+print(response)
+
+```
+
+### Run a task(async) with skyvern client (calling skyvern OpenAPI in the tool)
+> async task will return immediately and the task will be running in the background. You can use `GetSkyvernClientTaskV2Tool` tool to poll the task information until the task is finished.
+
+no need to run `skyvern init` command in your terminal to set up skyvern before using this integration.
+
+```python
+import asyncio
+from dotenv import load_dotenv
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.llms.openai import OpenAI
+from llama_index.core.tools import FunctionTool
+from skyvern_llamaindex.client import SkyvernClientToolSpec
+
+
+async def sleep(seconds: int) -> str:
+    await asyncio.sleep(seconds)
+    return f"Slept for {seconds} seconds"
+
+# load OpenAI API key from .env
+load_dotenv()
+
+skyvern_client_tool = SkyvernClientToolSpec(
+    credential="<your_organization_api_key>",
+)
+
+sleep_tool = FunctionTool.from_defaults(
+    async_fn=sleep,
+    description="Sleep for a given number of seconds",
+    name="sleep",
+)
+
+tools = skyvern_client_tool.to_tool_list(["queue_task_v2", "get_task_v2"])
+tools.append(sleep_tool)
+
+agent = OpenAIAgent.from_tools(
+    tools=tools,
+    llm=OpenAI(model="gpt-4o"),
+    verbose=True,
+    max_function_calls=10,
+)
+
+response = agent.chat("Queue a task with Skyvern. The task is about 'Navigate to the Hacker News homepage and get the top 3 posts.' Then, get this task information until it's completed. The task information re-get interval should be 60s.")
+print(response)
+
+```
diff --git a/integrations/llama_index/skyvern_llamaindex/agent.py b/integrations/llama_index/skyvern_llamaindex/agent.py
@@ -18,7 +18,7 @@ class SkyvernAgentToolSpec(BaseToolSpec):
         "queue_task_v2",
         "get_task_v2",
     ]
-    spec_schemas: Dict[str, ToolMetadata] = {
+    spec_metadata: Dict[str, ToolMetadata] = {
         "run_task_v1": ToolMetadata(
             name="run-skyvern-agent-task-v1",
             description="Use Skyvern agent to run a v1 task. It is usually used for the simple tasks. This function won't return until the task is finished.",
@@ -62,7 +62,7 @@ def get_metadata_from_fn_name(
         except AttributeError:
             return None
 
-        return self.spec_schemas.get(fn_name)
+        return self.spec_metadata.get(fn_name)
 
     async def run_task_v1(self, **kwargs: Dict[str, Any]) -> TaskResponse:
         """Use Skyvern agent to run a v1 task. It is usually used for the simple tasks. This function won't return until the task is finished."""

diff --git a/integrations/llama_index/skyvern_llamaindex/client.py b/integrations/llama_index/skyvern_llamaindex/client.py
@@ -0,0 +1,146 @@
+from typing import Any, Dict, List, Tuple
+
+from httpx import AsyncClient
+from llama_index.core.tools.tool_spec.base import SPEC_FUNCTION_TYPE, BaseToolSpec
+from llama_index.core.tools.types import ToolMetadata
+from skyvern_llamaindex.schema import GetTaskInput, TaskV1Request, TaskV2Request
+
+from skyvern.client import AsyncSkyvern
+from skyvern.forge.sdk.schemas.tasks import CreateTaskResponse, TaskResponse
+
+
+class SkyvernClientToolSpec(BaseToolSpec):
+    spec_functions: List[SPEC_FUNCTION_TYPE] = [
+        "run_task_v1",
+        "queue_task_v1",
+        "get_task_v1",
+        "run_task_v2",
+        "queue_task_v2",
+        "get_task_v2",
+    ]
+
+    spec_metadata: Dict[str, ToolMetadata] = {
+        "run_task_v1": ToolMetadata(
+            name="run-skyvern-client-task-v1",
+            description="Use Skyvern client to run a v1 task. It is usually used for the simple tasks. This function won't return until the task is finished.",
+            fn_schema=TaskV1Request,
+        ),
+        "queue_task_v1": ToolMetadata(
+            name="queue-skyvern-client-task-v1",
+            description="Use Skyvern client to queue a v1 task. It is usually used for the simple tasks. This function will return immediately and the task will be running in the background.",
+            fn_schema=TaskV1Request,
+        ),
+        "get_task_v1": ToolMetadata(
+            name="get-skyvern-client-task-v1",
+            description="Use Skyvern client to get a v1 task. v1 tasks are usually simple tasks.",
+            fn_schema=GetTaskInput,
+        ),
+        "run_task_v2": ToolMetadata(
+            name="run-skyvern-client-task-v2",
+            description="Use Skyvern client to run a v2 task. It is usually used for the complicated tasks. This function won't return until the task is finished.",
+            fn_schema=TaskV2Request,
+        ),
+        "queue_task_v2": ToolMetadata(
+            name="queue-skyvern-client-task-v2",
+            description="Use Skyvern client to queue a v2 task. It is usually used for the complicated tasks. This function will return immediately and the task will be running in the background.",
+            fn_schema=TaskV2Request,
+        ),
+        "get_task_v2": ToolMetadata(
+            name="get-skyvern-client-task-v2",
+            description="Use Skyvern client to get a v2 task. It is usually used for the complicated tasks.",
+            fn_schema=GetTaskInput,
+        ),
+    }
+
+    def __init__(self, credential: str, base_url: str = "https://api.skyvern.com"):
+        httpx_client = AsyncClient(
+            headers={
+                "Content-Type": "application/json",
+                "x-api-key": credential,
+            },
+        )
+        self.client = AsyncSkyvern(base_url=base_url, httpx_client=httpx_client)
+
+    def get_metadata_from_fn_name(
+        self, fn_name: str, spec_functions: List[str | Tuple[str, str]] | None = None
+    ) -> ToolMetadata | None:
+        try:
+            getattr(self, fn_name)
+        except AttributeError:
+            return None
+
+        return self.spec_metadata.get(fn_name)
+
+    async def run_task_v1(self, **kwargs: Dict[str, Any]) -> TaskResponse:
+        task_request = TaskV1Request(**kwargs)
+        return await self.client.agent.run_task(
+            max_steps_override=task_request.max_steps,
+            timeout_seconds=task_request.timeout_seconds,
+            url=task_request.url,
+            title=task_request.title,
+            webhook_callback_url=task_request.webhook_callback_url,
+            totp_verification_url=task_request.totp_verification_url,
+            totp_identifier=task_request.totp_identifier,
+            navigation_goal=task_request.navigation_goal,
+            data_extraction_goal=task_request.data_extraction_goal,
+            navigation_payload=task_request.navigation_goal,
+            error_code_mapping=task_request.error_code_mapping,
+            proxy_location=task_request.proxy_location,
+            extracted_information_schema=task_request.extracted_information_schema,
+            complete_criterion=task_request.complete_criterion,
+            terminate_criterion=task_request.terminate_criterion,
+            browser_session_id=task_request.browser_session_id,
+        )
+
+    async def queue_task_v1(self, **kwargs: Dict[str, Any]) -> CreateTaskResponse:
+        task_request = TaskV1Request(**kwargs)
+        return await self.client.agent.create_task(
+            max_steps_override=task_request.max_steps,
+            url=task_request.url,
+            title=task_request.title,
+            webhook_callback_url=task_request.webhook_callback_url,
+            totp_verification_url=task_request.totp_verification_url,
+            totp_identifier=task_request.totp_identifier,
+            navigation_goal=task_request.navigation_goal,
+            data_extraction_goal=task_request.data_extraction_goal,
+            navigation_payload=task_request.navigation_goal,
+            error_code_mapping=task_request.error_code_mapping,
+            proxy_location=task_request.proxy_location,
+            extracted_information_schema=task_request.extracted_information_schema,
+            complete_criterion=task_request.complete_criterion,
+            terminate_criterion=task_request.terminate_criterion,
+            browser_session_id=task_request.browser_session_id,
+        )
+
+    async def get_task_v1(self, task_id: str) -> TaskResponse:
+        return await self.client.agent.get_task(task_id=task_id)
+
+    async def run_task_v2(self, **kwargs: Dict[str, Any]) -> Dict[str, Any | None]:
+        task_request = TaskV2Request(**kwargs)
+        return await self.client.agent.run_observer_task_v_2(
+            max_iterations_override=task_request.max_iterations,
+            timeout_seconds=task_request.timeout_seconds,
+            user_prompt=task_request.user_prompt,
+            url=task_request.url,
+            browser_session_id=task_request.browser_session_id,
+            webhook_callback_url=task_request.webhook_callback_url,
+            totp_verification_url=task_request.totp_verification_url,
+            totp_identifier=task_request.totp_identifier,
+            proxy_location=task_request.proxy_location,
+        )
+
+    async def queue_task_v2(self, **kwargs: Dict[str, Any]) -> Dict[str, Any | None]:
+        task_request = TaskV2Request(**kwargs)
+        return await self.client.agent.observer_task_v_2(
+            max_iterations_override=task_request.max_iterations,
+            user_prompt=task_request.user_prompt,
+            url=task_request.url,
+            browser_session_id=task_request.browser_session_id,
+            webhook_callback_url=task_request.webhook_callback_url,
+            totp_verification_url=task_request.totp_verification_url,
+            totp_identifier=task_request.totp_identifier,
+            proxy_location=task_request.proxy_location,
+        )
+
+    async def get_task_v2(self, task_id: str) -> Dict[str, Any | None]:
+        return await self.client.agent.get_observer_task_v_2(task_id=task_id)