Skip to content

Commit

Permalink
feat(integrations): spider tool refractored + readme/typespec updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Vedantsahai18 committed Dec 3, 2024
1 parent a256a69 commit 0c55e12
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 61 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,7 @@ arguments:
query: string # The search query for searching with Brave
output:
result: string # The result of the Brave Search
result: list[dict] # A list of search results, each containing: title, link, snippet
```

</td>
Expand Down Expand Up @@ -1356,11 +1356,11 @@ setup:
arguments:
url: string # The URL for which to fetch data
mode: string # The type of crawlers (default: "scrape")
params: dict # (Optional) The parameters for the Spider API
content_type: string # (Optional) The content type to return. Default is "application/json". Other options: "text/csv", "application/xml", "application/jsonl"
output:
documents: list # The documents returned from the spider
result: list[dict] # A list of results, each containing: content, error, status, costs, url
```

</td>
Expand Down Expand Up @@ -1452,7 +1452,7 @@ arguments:
base64: boolean # Whether the input file is base64 encoded. Default is false.
output:
documents: list # The parsed data from the document
documents: list[Document] # A list of parsed documents
```

</td>
Expand Down Expand Up @@ -1520,7 +1520,7 @@ arguments:
sort_order: string # The sort order for the results, options: ascending, descending
output:
result: list # A list of search results, each containing: entry_id, title, updated, published, authors, summary, comment, journal_ref, doi, primary_category, categories, links, pdf_url, pdf_downloaded
result: list[dict] # A list of search results, each containing: entry_id, title, updated, published, authors, summary, comment, journal_ref, doi, primary_category, categories, links, pdf_url, pdf_downloaded
```

</td>
Expand Down
16 changes: 10 additions & 6 deletions agents-api/agents_api/autogen/Tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,9 +1639,11 @@ class SpiderFetchArguments(BaseModel):
"""
The URL to fetch data from
"""
mode: Literal["crawl", "scrape"] = "scrape"
content_type: Literal[
"application/json", "text/csv", "application/xml", "application/jsonl"
] = "application/json"
"""
The type of crawler to use
The content type to return
"""
params: dict[str, Any] | None = None
"""
Expand All @@ -1661,9 +1663,11 @@ class SpiderFetchArgumentsUpdate(BaseModel):
"""
The URL to fetch data from
"""
mode: Literal["crawl", "scrape"] = "scrape"
content_type: Literal[
"application/json", "text/csv", "application/xml", "application/jsonl"
] = "application/json"
"""
The type of crawler to use
The content type to return
"""
params: dict[str, Any] | None = None
"""
Expand All @@ -1683,7 +1687,7 @@ class SpiderIntegrationDef(BaseIntegrationDef):
"""
The provider must be "spider"
"""
method: str | None = None
method: Literal["crawl", "links", "screenshot", "search"] | None = None
"""
The specific method of the integration to call
"""
Expand All @@ -1709,7 +1713,7 @@ class SpiderIntegrationDefUpdate(BaseIntegrationDefUpdate):
"""
The provider must be "spider"
"""
method: str | None = None
method: Literal["crawl", "links", "screenshot", "search"] | None = None
"""
The specific method of the integration to call
"""
Expand Down
16 changes: 10 additions & 6 deletions integrations-service/integrations/autogen/Tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,9 +1639,11 @@ class SpiderFetchArguments(BaseModel):
"""
The URL to fetch data from
"""
mode: Literal["crawl", "scrape"] = "scrape"
content_type: Literal[
"application/json", "text/csv", "application/xml", "application/jsonl"
] = "application/json"
"""
The type of crawler to use
The content type to return
"""
params: dict[str, Any] | None = None
"""
Expand All @@ -1661,9 +1663,11 @@ class SpiderFetchArgumentsUpdate(BaseModel):
"""
The URL to fetch data from
"""
mode: Literal["crawl", "scrape"] = "scrape"
content_type: Literal[
"application/json", "text/csv", "application/xml", "application/jsonl"
] = "application/json"
"""
The type of crawler to use
The content type to return
"""
params: dict[str, Any] | None = None
"""
Expand All @@ -1683,7 +1687,7 @@ class SpiderIntegrationDef(BaseIntegrationDef):
"""
The provider must be "spider"
"""
method: str | None = None
method: Literal["crawl", "links", "screenshot", "search"] | None = None
"""
The specific method of the integration to call
"""
Expand All @@ -1709,7 +1713,7 @@ class SpiderIntegrationDefUpdate(BaseIntegrationDefUpdate):
"""
The provider must be "spider"
"""
method: str | None = None
method: Literal["crawl", "links", "screenshot", "search"] | None = None
"""
The specific method of the integration to call
"""
Expand Down
7 changes: 6 additions & 1 deletion integrations-service/integrations/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
from .ffmpeg import FfmpegSearchOutput as FfmpegSearchOutput
from .llama_parse import LlamaParseFetchOutput as LlamaParseFetchOutput
from .remote_browser import RemoteBrowserOutput as RemoteBrowserOutput
from .spider import SpiderFetchOutput as SpiderFetchOutput
from .spider import (
SpiderOutput as SpiderOutput,
)
from .spider import (
SpiderResponse as SpiderResponse,
)
from .weather import WeatherGetOutput as WeatherGetOutput
from .wikipedia import WikipediaSearchOutput as WikipediaSearchOutput
4 changes: 2 additions & 2 deletions integrations-service/integrations/models/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from .ffmpeg import FfmpegSearchOutput
from .llama_parse import LlamaParseFetchOutput
from .remote_browser import RemoteBrowserOutput
from .spider import SpiderFetchOutput
from .spider import SpiderOutput
from .weather import WeatherGetOutput
from .wikipedia import WikipediaSearchOutput

Expand Down Expand Up @@ -98,7 +98,6 @@ class ExecutionError(BaseModel):
]

ExecutionResponse = Union[
SpiderFetchOutput,
WeatherGetOutput,
EmailOutput,
WikipediaSearchOutput,
Expand All @@ -118,6 +117,7 @@ class ExecutionError(BaseModel):
CloudinaryUploadOutput,
ExecutionError,
ArxivSearchOutput,
SpiderOutput,
]


Expand Down
19 changes: 14 additions & 5 deletions integrations-service/integrations/models/spider.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
from langchain_core.documents import Document
from pydantic import Field
from typing import Any, List, Optional

from pydantic import BaseModel, Field

from .base_models import BaseOutput


class SpiderFetchOutput(BaseOutput):
documents: list[Document] = Field(
..., description="The documents returned from the spider"
class SpiderResponse(BaseModel):
content: Optional[str] = None
error: Optional[str] = None
status: Optional[int] = None
costs: Optional[dict[Any, Any]] = None
url: Optional[str] = None


class SpiderOutput(BaseOutput):
result: List[SpiderResponse] = Field(
..., description="The responses from the spider"
)
23 changes: 20 additions & 3 deletions integrations-service/integrations/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Arguments imports
BraveSearchArguments,
# Setup imports
# Setup imports
BraveSearchSetup,
BrowserbaseCompleteSessionArguments,
BrowserbaseCreateSessionArguments,
Expand Down Expand Up @@ -48,7 +47,7 @@
LlamaParseFetchOutput,
ProviderInfo,
RemoteBrowserOutput,
SpiderFetchOutput,
SpiderOutput,
WeatherGetOutput,
WikipediaSearchOutput,
)
Expand Down Expand Up @@ -99,7 +98,25 @@
method="crawl",
description="Crawl a website and extract data",
arguments=SpiderFetchArguments,
output=SpiderFetchOutput,
output=SpiderOutput,
),
BaseProviderMethod(
method="links",
description="Extract all links from the webpage",
arguments=SpiderFetchArguments,
output=SpiderOutput,
),
BaseProviderMethod(
method="screenshot",
description="Take a screenshot of the webpage",
arguments=SpiderFetchArguments,
output=SpiderOutput,
),
BaseProviderMethod(
method="search",
description="Search content within the webpage",
arguments=SpiderFetchArguments,
output=SpiderOutput,
),
],
info=ProviderInfo(
Expand Down
Loading

0 comments on commit 0c55e12

Please sign in to comment.