diff --git a/nextpy/ai/agent/assistant_agent.py b/nextpy/ai/agent/assistant_agent.py new file mode 100644 index 00000000..6b1425ef --- /dev/null +++ b/nextpy/ai/agent/assistant_agent.py @@ -0,0 +1,214 @@ +from typing import Any, Callable, Tuple +from nextpy.ai.agent.base_agent import BaseAgent +import logging +from pathlib import Path +from nextpy.ai import engine +import inspect +import asyncio +import logging + + +def _call_functions(functions): + for function, arguments, keyword_args in functions: + if inspect.iscoroutinefunction(function): + try: + other_loop = asyncio.get_event_loop() + import nest_asyncio + + nest_asyncio.apply(other_loop) + except RuntimeError: + pass + loop = asyncio.new_event_loop() + loop.run_until_complete(function(*arguments, **keyword_args)) + else: + function(*arguments, **keyword_args) + + +async def _a_call_functions(functions): + for function, arguments, keyword_args in functions: + if inspect.iscoroutinefunction(function): + await function(*arguments, **keyword_args) + else: + function(*arguments, **keyword_args) + + +class AssistantAgent(BaseAgent): + """ + AssistantAgent class represents an assistant agent that interacts with users in a conversational manner. + + :param name: The name of the assistant agent. + :type name: str + :param llm: The language model used by the assistant agent. + :type llm: LanguageModel + :param memory: The memory used by the assistant agent. + :type memory: Memory + :param async_mode: Whether the assistant agent should run in asynchronous mode or not. Default is True. + :type async_mode: bool, optional + :param system_message: The system message included in the prompt. Default is None. + :type system_message: str, optional + :param functions_before_call: List of functions to be called before the main function call. Default is None. + :type functions_before_call: List[Callable], optional + :param functions_after_call: List of functions to be called after the main function call. Default is None. + :type functions_after_call: List[Callable], optional + + The assistant agent is built on top of the existing BaseAgent and serves as a simple interface for creating an AI assistant agent. + It provides a convenient way to define an AI assistant agent that can interact with users in a conversational manner. + The assistant agent can be customized with a name, language model, memory, and other parameters. + It also supports asynchronous mode, allowing it to handle multiple conversations simultaneously. + + MultiagentManager can be used to manage multiple assistant agents and coordinate their interactions with users. + + + Example: + + + tailwind_agent = AssistantAgent(name='Tailwind Class Generator', llm=llm, memory=None, async_mode=False, + system_message='''automates the creation of Tailwind CSS classes, streamlining the process of building stylish and responsive user interfaces. By leveraging advanced algorithms and design principles, the Tailwind Class Generator analyzes your design elements and dynamically generates the optimal set of Tailwind utility classes. + This tool is designed to enhance efficiency in web development, allowing developers to focus more on high-level design decisions and less on manually crafting individual CSS rules. With the Tailwind Class Generator, achieving a visually appealing and consistent design becomes a seamless experience. + ''' + ) + """ + + DEFAULT_PROMPT = ''' + {{#system~}} {{name}}, you are working in the following team :{{agents}} + {{~/system}} + + {{#user~}} + Read the following CONVERSATION : + {{messages}} + Respond as {{name}}. Do not thank any team member or show appreciation." + {{~/user}} + + {{#assistant~}} + {{gen 'answer' temperature=0 max_tokens=500}} + {{~/assistant}} + ''' + + def __init__(self, + name, + llm=None, + memory=None, + async_mode: bool = False, + system_message: str | None = None, + custom_engine=None, + functions_before_call: Tuple[Callable, + Tuple[Any], Tuple[Any]] | None = None, + functions_after_call: Tuple[Callable, + Tuple[Any], Tuple[Any]] | None = None, + description: str = "Helpful AI Assistant Agent", + **kwargs): + """ + Initializes an instance of the AssistantAgent class. + + :param name: The name of the assistant agent. + :type name: str + :param llm: The language model used by the assistant agent. + :type llm: LanguageModel + :param memory: The memory used by the assistant agent. + :type memory: Memory + :param async_mode: Whether the assistant agent should run in asynchronous mode or not. Default is True. + :type async_mode: bool, optional + :param system_message: The system message to be displayed to the user. Default is None. + :type system_message: str, optional + :param engine: The engine used by the assistant agent. Either llm or engine must be provided. + :type engine: Engine, optional + :param functions_before_call: List of functions, args and kwargs, to be called before the main function call. Default is None. + :type functions_before_call: List[Callable], optional + :param functions_after_call: List of functions, args and kwargs to be called after the main function call. Default is None. + :type functions_after_call: List[Callable], optional + :param kwargs: Additional keyword arguments. + """ + super().__init__(llm=llm, **kwargs) + self.name = name + self.prompt = self.DEFAULT_PROMPT + self.system_message = system_message + # This is used by multiagent manager to determine whether to use receive or a_receive + self.async_mode = async_mode + + if system_message is not None: + try: + system_message = Path(system_message).read_text() + except Exception: + pass + self.prompt = self.prompt[:self.prompt.find( + '{{~/system}}')] + system_message + self.prompt[self.prompt.find('{{~/system}}'):] + + # Either llm or engine must be provided + if llm is not None or engine is not None: + logging.debug("Warning! Either llm or engine must be provided.") + + self.engine = custom_engine if custom_engine is not None else engine( + template=self.prompt, llm=llm, memory=memory, async_mode=async_mode, **kwargs) + self.output_key = 'answer' + self.functions_before_call = functions_before_call + self.functions_after_call = functions_after_call + self.description = description + + @staticmethod + def function_call_decorator(func): + """ + Decorator function that wraps the main function call with additional functions to be called before and after. + + :param func: The main function to be called. + :type func: Callable + :return: The wrapped function. + :rtype: Callable + """ + if inspect.iscoroutinefunction(func): + async def a_inner(self, *args, **kwargs): + if self.functions_before_call is not None: + await _a_call_functions(self.functions_before_call) + + result = await func(self, *args, **kwargs) + + if self.functions_after_call is not None: + await _a_call_functions(self.functions_after_call) + + return result + return a_inner + else: + def inner(self, *args, **kwargs): + if self.functions_before_call is not None: + _call_functions(self.functions_before_call) + + result = func(self, *args, **kwargs) + + if self.functions_after_call is not None: + _call_functions(self.functions_after_call) + + return result + return inner + + @function_call_decorator + def receive(self, agents, messages, termination_message): + """ + Receives messages from other agents and generates a response. + + :param agents: The list of agents involved in the conversation. + :type agents: List[str] + :param messages: The list of messages in the conversation. + :type messages: List[str] + :param termination_message: The termination message for the conversation. + :type termination_message: str + :return: The generated response. + :rtype: str + """ + output = self.run(agents=agents, messages=messages, name=self.name) + return output + + @function_call_decorator + async def a_receive(self, agents, messages, termination_message): + """ + Asynchronously receives messages from other agents and generates a response. + + :param agents: The list of agents involved in the conversation. + :type agents: List[str] + :param messages: The list of messages in the conversation. + :type messages: List[str] + :param termination_message: The termination message for the conversation. + :type termination_message: str + :return: The generated response. + :rtype: str + """ + output = await self.arun(agents=agents, messages=messages, name=self.name) + return output diff --git a/nextpy/ai/agent/base_agent.py b/nextpy/ai/agent/base_agent.py index 52eaa40c..c68cae30 100644 --- a/nextpy/ai/agent/base_agent.py +++ b/nextpy/ai/agent/base_agent.py @@ -1,4 +1,4 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. +# This file has been modified by the Nextpy Team in 2023 using AI skills and automation scripts. # We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. import argparse @@ -7,6 +7,7 @@ import json import logging from enum import Enum +from nextpy.ai.skills.base import BaseSkill from typing import Any, Dict, List, Optional, Union import nest_asyncio @@ -16,7 +17,6 @@ from nextpy.ai.engine._program import extract_text from nextpy.ai.memory.base import BaseMemory from nextpy.ai.rag.doc_loader import document_loader -from nextpy.ai.tools.basetool import BaseTool log = logging.getLogger(__name__) @@ -32,7 +32,7 @@ class AgentState(Enum): IDLE = 0 BUSY = 1 - USED_AS_TOOL = 2 + USED_AS_SKILL = 2 ERROR = 3 @@ -40,8 +40,8 @@ class BaseAgent: def __init__( self, rag: Optional[Any] = None, - tools: Optional[List[BaseTool]] = None, llm: Optional[Any] = None, + skills: Optional[List[BaseSkill]] = None, prompt_template: str = None, input_variables: Dict[str, Any] = {}, agent_id: str = "default", @@ -52,8 +52,8 @@ def __init__( ): self.agent_id = agent_id self.rag = rag - self.tools = tools self.llm = llm + self.skills = skills self.prompt_template = prompt_template self.input_variables = input_variables self.memory = memory @@ -98,14 +98,14 @@ def get_knowledge_variable(self): def default_llm_model(self): pass - def add_tool(self, tool: BaseTool) -> None: - """Add a tool to the agent's tool list.""" - self.tools.append(tool) + def add_skill(self, skill: BaseSkill) -> None: + """Add a skill to the agent's skill list.""" + self.skills.append(skill) - def remove_tool(self, tool: BaseTool) -> None: - """Remove a tool from the agent's tool list.""" - if tool in self.tools: - self.tools.remove(tool) + def remove_skill(self, skill: BaseSkill) -> None: + """Remove a skill from the agent's skill list.""" + if skill in self.skills: + self.skills.remove(skill) def llm_instance(self) -> engine.llms.OpenAI: """Create an instance of the language model.""" @@ -146,7 +146,8 @@ def run(self, **kwargs) -> Union[str, Dict[str, Any]]: RETRIEVED_KNOWLEDGE=retrieved_knowledge, **kwargs, silent=True ) else: - raise ValueError("knowledge_variable not found in input kwargs") + raise ValueError( + "knowledge_variable not found in input kwargs") else: output = self.engine(**kwargs, silent=True, from_agent=True) @@ -166,7 +167,8 @@ def run(self, **kwargs) -> Union[str, Dict[str, Any]]: if output.variables().get(_output_key): return output[_output_key] else: - logging.warning("Output key not found in output, so full output returned") + logging.warning( + "Output key not found in output, so full output returned") return output async def arun(self, **kwargs) -> Union[str, Dict[str, Any]]: @@ -188,7 +190,8 @@ async def arun(self, **kwargs) -> Union[str, Dict[str, Any]]: RETRIEVED_KNOWLEDGE=retrieved_knowledge, **kwargs, silent=True ) else: - raise ValueError("knowledge_variable not found in input kwargs") + raise ValueError( + "knowledge_variable not found in input kwargs") else: output = await self.engine(**kwargs, silent=True, from_agent=True) # Handle memory here @@ -207,7 +210,8 @@ async def arun(self, **kwargs) -> Union[str, Dict[str, Any]]: if output.variables().get(_output_key): return output[_output_key] else: - logging.warning("Output key not found in output, so full output returned") + logging.warning( + "Output key not found in output, so full output returned") return output def _handle_memory(self, new_program): @@ -260,7 +264,8 @@ def cli(self): ) for var in _vars: - parser.add_argument(f"--{var}", help=f"Pass {var} as an input variable") + parser.add_argument( + f"--{var}", help=f"Pass {var} as an input variable") args = parser.parse_args() @@ -329,7 +334,7 @@ def export_agent_config(self, config_path, export_json=False): "prompt_template": self.prompt_template, "input_variables": self.input_variables, "output_key": self.output_key, - # 'tools': None if self.tools is None else self.tools + # 'skills': None if self.skills is None else self.skills } with open(config_path, "w") as f: yaml.dump(config, f) @@ -352,7 +357,8 @@ def load_from_config(cls, config_file): rag = None if config["rag"] is not None: - rag_module_name, rag_class_name = config["rag"]["type"].rsplit(".", 1) + rag_module_name, rag_class_name = config["rag"]["type"].rsplit( + ".", 1) rag_module = importlib.import_module(rag_module_name) rag_class = getattr(rag_module, rag_class_name) @@ -391,9 +397,12 @@ def load_from_config(cls, config_file): vector_store_module_name, vector_store_class_name = config["rag"][ "vector_store" ]["type"].rsplit(".", 1) - vector_store_module = importlib.import_module(vector_store_module_name) - vector_store_class = getattr(vector_store_module, vector_store_class_name) - vector_store = vector_store_class(embedding_function=embedding_function) + vector_store_module = importlib.import_module( + vector_store_module_name) + vector_store_class = getattr( + vector_store_module, vector_store_class_name) + vector_store = vector_store_class( + embedding_function=embedding_function) rag = rag_class( raw_data=raw_data, diff --git a/nextpy/ai/agent/multiagent_manager.py b/nextpy/ai/agent/multiagent_manager.py new file mode 100644 index 00000000..48faf095 --- /dev/null +++ b/nextpy/ai/agent/multiagent_manager.py @@ -0,0 +1,261 @@ +from typing import Tuple, List, Any +from nextpy.ai.agent.assistant_agent import AssistantAgent +from nextpy.ai import engine + + +class MultiAgentManager: + """ + A class that manages multiple agents in a role-playing game. + + Attributes: + DEFAULT_PROMPT (str): The default prompt for the game. + SOLUTION_PROMPT (str): The prompt for generating the final solution. + agents (Tuple[AssistantAgent]): A tuple of AssistantAgent objects representing the participating agents. + messages (List[Any]): A list of messages exchanged between the agents and the user. + termination_message (str): The termination message indicating the end of the game. + error_message (str): The error message indicating an error in the game. + mode (str): The mode of the game (e.g., 'BROADCAST', 'ROUND_ROBIN'). + rounds (int): The number of rounds to play. + round_robin (bool): A flag indicating whether to use round-robin mode. + llm: The language model used by the agents. + memory: The memory used by the agents. + async_mode (bool): A flag indicating whether to use asynchronous mode. + debug_mode (bool): A flag indicating whether to enable debug mode. + """ + DEFAULT_PROMPT = ''' + {{#system~}} You are playing a role playing game with the following participants : \n{{agents}}{{~/system}} + + {{#user~}} + Read the following conversation and choose who the next speaker will be: + {{messages}} + Simply respond with the NAME of the next speaker without any other characters such as numbers or punctuations. + {{~/user}} + + {{#assistant~}} + {{gen 'answer' temperature=0 max_tokens=500}} + {{~/assistant}} + ''' + + SOLUTION_PROMPT = ''' + {{#system~}} You are a helpful and terse AI assistant{{~/system}} + + {{#user~}} + Read the following conversation: + {{messages}} + Now generate the final solution to the User's query. + {{~/user}} + + {{#assistant~}} + {{gen 'answer' temperature=0 max_tokens=500}} + {{~/assistant}} + ''' + + def __init__(self, + agents: Tuple[AssistantAgent], + messages: List[Any] | None = None, + termination_message: str = 'TERMINATE SUCCESSFULLY', + error_message: str = 'ERROR', + mode: str = 'BROADCAST', + rounds: int = 5, + round_robin: bool = True, + llm=None, + memory=None, + async_mode=False, + debug_mode=False): + + if messages is None: + messages = [] + + self.debug_mode = debug_mode + + if not any([isinstance(agent, AssistantAgent) + for agent in agents]): + self.DEFAULT_PROMPT = self.DEFAULT_PROMPT[:self.DEFAULT_PROMPT.find( + '{{~/system}}')] + '\nNote, User is also a participant, you can choose User.' + self.DEFAULT_PROMPT[self.DEFAULT_PROMPT.find('{{~/system}}'):] + else: + self.DEFAULT_PROMPT = self.DEFAULT_PROMPT[:self.DEFAULT_PROMPT.find( + '{{~/system}}')] + '\nNote, User is not a participant, you cannot choose User.' + self.DEFAULT_PROMPT[self.DEFAULT_PROMPT.find('{{~/system}}'):] + self.engine = engine( + self.DEFAULT_PROMPT, llm=llm, memory=memory, async_mode=async_mode) + self.solution_summarizer = engine( + self.SOLUTION_PROMPT, llm=llm, memory=memory, async_mode=async_mode) + + self.agents = agents + self.agent_dict = {agent.name: agent for agent in agents} + self.messages = messages + self.termination_message = termination_message + self.error_message = error_message + self.mode = mode + self.rounds = rounds + self.round_robin = round_robin + self.current_agent = 0 # Used to keep track of next agent in sequence + + @property + def agent_string(self): + """ + Returns a string representation of all the agent names separated by commas. + """ + return '\n\n'.join([f'NAME: {agent.name}\n DESC: {agent.description}' for agent in self.agents]) + + def run_sequence(self, context): + """ + Runs the sequence of agents in the multi-agent system. + + Args: + context: The context for the current round. + + Returns: + A list of messages exchanged between agents during the sequence. + """ + self.messages.append(['User', context]) + while self.rounds != 0 and not self._termination_message_received(): + if self.debug_mode: + print( + f'{"-"*5}Messaging next agent : {self.agents[self.current_agent].name}{"-"*5}\n\n') + + self._message_next_agent() + + if self.debug_mode: + print(f'{self.messages[-1][0]}\n\n{self.messages[-1][1]}') + + if self.current_agent == 0 and not self.round_robin: + break + + self.rounds -= 1 + return self.messages + + async def a_run_sequence(self, context): + """ + Runs the sequence of agents in the multi-agent system in async. + + Args: + context: The context for the current round. + + Returns: + A list of messages exchanged between agents during the sequence. + """ + self.messages.append(['User', context]) + while self.rounds != 0 and not self._termination_message_received(): + if self.debug_mode: + print( + f'{"-"*5}Messaging next agent : {self.agents[self.current_agent].name}{"-"*5}\n\n') + + await self._a_message_next_agent() + if self.debug_mode: + print( + f'{self.messages[-1][0]}\n\n{self.messages[-1][1]}') + + if self.current_agent == 0 and not self.round_robin: + break + + self.rounds -= 1 + return self.messages + + def run_auto(self, context): + """ + Runs the multi-agent manager in auto mode. + + Args: + context: The context for the multi-agent manager. + + Returns: + A list containing the messages exchanged between agents and the final solution. + """ + self.messages.append(['User', context]) + while self.rounds != 0 and not self._termination_message_received(): + next_agent = self._choose_next_agent() + if self.debug_mode: + print( + f'{"-" * 5}Messaging next agent : {next_agent.name}{"-" * 5}\n\n') + + self._message_next_agent(next_agent) + if self.debug_mode: + print( + f'{self.messages[-1][0]}\n\n{self.messages[-1][1]}') + + self.rounds -= 1 + final_solution = self.solution_summarizer( + messages=self._parse_messages()).get('answer') + + if self.debug_mode: + print(final_solution) + + return [self.messages, final_solution] + + async def _a_message_next_agent(self, next_agent=None): + """ + Sends a message to the next agent in the list and receives a response. + + Args: + next_agent (Agent, optional): The next agent to send the message to. If not provided, + the next agent in the list will be selected. Defaults to None. + + Returns: + None + """ + if next_agent is None: + next_agent = self.agents[self.current_agent] + self.current_agent = (self.current_agent + 1) % len(self.agents) + + if next_agent.async_mode: + received_message = await next_agent.a_receive( + self.agent_string, self._parse_messages(), self.termination_message) + else: + received_message = next_agent.receive( + self.agent_string, self._parse_messages(), self.termination_message) + + self.messages.append([next_agent.name, received_message]) + + def _message_next_agent(self, next_agent=None): + """ + Sends a message to the next agent in the sequence and receives a response. + + Args: + next_agent (Agent, optional): The next agent to send the message to. If None, the next agent in the sequence is used. + + Returns: + None + """ + + if next_agent is None: + next_agent = self.agents[self.current_agent] + self.current_agent = (self.current_agent + 1) % len(self.agents) + + assert not next_agent.async_mode, "Don't use run_sequence for async agents, use a_run_sequence instead" + + received_message = next_agent.receive( + self.agent_string, self._parse_messages(), self.termination_message) + + self.messages.append([next_agent.name, received_message]) + + def _termination_message_received(self): + """ + Checks if the termination message is present in the last received message. + + Returns: + bool: True if the termination message is present, False otherwise. + """ + return self.termination_message in self.messages[-1][1] + + def _parse_messages(self): + """ + Parses the messages stored in the `self.messages` list and returns a formatted string. + + Returns: + str: A formatted string containing the parsed messages. + """ + return f'\n\n{"-"*20}'.join([f'{index}) {message[0]}\n{message[1]}' for index, message in enumerate(self.messages)]) + + def _choose_next_agent(self): + """ + Chooses the next agent based on the output of the engine. + + Returns: + The next agent to be used. + + """ + output = self.engine(agents=self.agent_string, + messages=self._parse_messages()) + if self.debug_mode: + print(f"Chosen next agent as {output.get('answer')}") + return self.agent_dict[output.get('answer')] diff --git a/nextpy/ai/agent/userproxy_agent.py b/nextpy/ai/agent/userproxy_agent.py new file mode 100644 index 00000000..5297a128 --- /dev/null +++ b/nextpy/ai/agent/userproxy_agent.py @@ -0,0 +1,79 @@ +from nextpy.ai.agent.assistant_agent import AssistantAgent +from typing import Any, Tuple, Callable + + +class UserProxyAgent(AssistantAgent): + """ + Initializes a UserProxyAgent instance. + + :param async_mode: Indicates if the agent should operate in asynchronous mode. + :param functions_before_call: A tuple of functions to be called before the main function call. + :param functions_after_call: A tuple of functions to be called after the main function call. + :param description: A brief description of the agent's capabilities. + :param kwargs: Additional keyword arguments. + """ + + def __init__(self, + async_mode: bool = False, + functions_before_call: Tuple[Callable, + Tuple[Any], Tuple[Any]] | None = None, + functions_after_call: Tuple[Callable, + Tuple[Any], Tuple[Any]] | None = None, + description: str = "User Proxy Agent capable of receiving user input.", + **kwargs): + self.name = 'User' + self.description = description + self.async_mode = async_mode + self.functions_before_call = functions_before_call + self.functions_after_call = functions_after_call + + @AssistantAgent.function_call_decorator + def receive(self, *args, **kwargs): + """ + Receives messages from other agents and generates a response. + + :param agents: The list of agents involved in the conversation. + :type agents: List[str] + :param messages: The list of messages in the conversation. + :type messages: List[str] + :param termination_message: The termination message for the conversation. + :type termination_message: str + :return: The generated response. + :rtype: str + """ + return self._receive_user_input() + + @AssistantAgent.function_call_decorator + async def a_receive(self, *args, **kwargs): + """ + Asynchronously receives messages from other agents and generates a response. + + :param agents: The list of agents involved in the conversation. + :type agents: List[str] + :param messages: The list of messages in the conversation. + :type messages: List[str] + :param termination_message: The termination message for the conversation. + :type termination_message: str + :return: The generated response. + :rtype: str + """ + return await self.a_receive_user_input() + + def _receive_user_input(self): + """ + Receives user input and returns it as the response. + + :return: The user input. + :rtype: str + """ + return input('Provide feedback to chat_manager:') + + async def a_receive_user_input(self): + """ + Asynchronously receives user input and returns it as the response. + + :return: The user input. + :rtype: str + """ + import asyncio + return await asyncio.to_thread(input, 'Provide feedback to chat_manager:') diff --git a/nextpy/ai/config.py b/nextpy/ai/config.py deleted file mode 100644 index 9704dd7a..00000000 --- a/nextpy/ai/config.py +++ /dev/null @@ -1,53 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Env values will be passed from from os.environ -Automatically loads environment variables from .env file. -""" - -import os -from pathlib import Path -from typing import Optional - -import yaml -from dotenv import load_dotenv -from pydantic import BaseSettings - -load_dotenv("./.env") - - -CONFIG_FILE = "config.yaml" -ROOT_DIR = os.path.dirname(Path(__file__).parent.parent) -config_path = ROOT_DIR + "/" + CONFIG_FILE - -if os.path.exists(config_path): - with open(config_path, "r") as file: - config_data = yaml.safe_load(file) - -else: - config_data = {"OPENAI_API_KEY": ""} - config_data["OPENAI_API_KEY"] = input("Enter OPENAI_API_KEY:") - config_data["OPENAI_ORG_ID"] = input("Enter OPENAI_ORG_ID:") - config_data["SERP_API_KEY"] = input("Enter SERP_API_KEY:") - config_data["GOOGLE_SEARCH_API_KEY"] = input("Enter GOOGLE_SEARCH_API_KEY:") - - -class Config: - OPENAI_API_KEY = config_data["OPENAI_API_KEY"] - OPENAI_ORG_ID = config_data["OPENAI_ORG_ID"] - SERP_API_KEY = config_data["SERP_API_KEY"] - GOOGLE_SEARCH_API_KEY = config_data["GOOGLE_SEARCH_API_KEY"] - - -class AgentBoxSettings(BaseSettings): - """AgentBox API Config.""" - - VERBOSE: bool = False - SHOW_INFO: bool = True - - AGENTBOX_API_KEY: Optional[str] = None - AGENTBOX_BASE_URL: str = "https://agentboxapi.com/api/v1" - AGENTBOX_TIMEOUT: int = 20 - - -settings = AgentBoxSettings() diff --git "a/nextpy/ai/config.\360\237\244\226" "b/nextpy/ai/config.\360\237\244\226" deleted file mode 100644 index 52d94419..00000000 --- "a/nextpy/ai/config.\360\237\244\226" +++ /dev/null @@ -1,41 +0,0 @@ -llm: - type: "OpenAI" - model: "gpt-3.5-turbo" -rag: - data_source: "./test_data/meteoric" - data_loader: "SimpleDirectoryReader" - data_transformer: - type: "CharacterTextSplitter" - chunk_overlap: 40 - chunk_size: 1024 - vector_store: - type: "Chroma" - embedding_function: "OpenAIEmbeddings" -agent: - type: "ChatAgent" - prompt_template: | - {{#user~}} - You will use this FORMAT only to answer user's QUERY - FORMAT: {{format}} - QUERY: {{input}} - - Use the below knowledge to answer QUERY in given FORMAT:- - {{RETRIEVED_KNOWLEDGE}} - {{~/user}} - - {{#assistant~}} - Yes, I will tell you about with that - {{~/assistant}} - - {{#user~}} - Yes, tell me - {{~/user}} - - {{#assistant~}} - {{gen 'response' temperature=0 max_tokens=300}} - {{~/assistant}} - input_variables: - knowledge_variable: "input" - extras: "format" - output_key: "response" - diff --git a/nextpy/ai/config/__init__.py b/nextpy/ai/config/__init__.py new file mode 100644 index 00000000..e6ee1211 --- /dev/null +++ b/nextpy/ai/config/__init__.py @@ -0,0 +1 @@ +# init file for config diff --git a/nextpy/ai/finetune/LLMFinetune.py b/nextpy/ai/finetune/LLMFinetune.py deleted file mode 100644 index 82839a08..00000000 --- a/nextpy/ai/finetune/LLMFinetune.py +++ /dev/null @@ -1,27 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from abc import ABC, abstractmethod -from logging import Logger - -import openai - - -class LLMFinetune(ABC): - def __init__(self, logger: Logger, openai_key: str): - self.logger = logger - openai.api_key = openai_key - - @abstractmethod - def transform_data( - self, - train_csv_file: str, - val_csv_file: str, - train_output_file: str, - val_output_file: str, - ) -> str: - pass - - @abstractmethod - def finetune(self, **kwargs): - pass diff --git a/nextpy/ai/finetune/openai_finetune.py b/nextpy/ai/finetune/openai_finetune.py deleted file mode 100644 index e0f5c966..00000000 --- a/nextpy/ai/finetune/openai_finetune.py +++ /dev/null @@ -1,207 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import csv -import json -import logging -import time -from logging import Logger -from typing import List, Optional - -import openai - -from . import LLMFinetune - -# openai.organization = "YOUR_ORG_ID" -# APIKEY -# openai.Model.list() - - -class OpenaiFinetune(LLMFinetune): - def __init__(self, logger: Logger, openai_key: str): - self.logger = logger - openai.api_key = openai_key - - def transform_data( - self, - train_csv_file: str, - val_csv_file: str, - train_output_file: str, - val_output_file: str, - llm_model: str = "openai", - ) -> str: - """Transforms CSV files into JSONL and creates files for fine-tuning.""" - # Verify llm_model - if llm_model != "openai": - raise ValueError("Unsupported model:", llm_model) - - # Paths and Output files - paths = [train_csv_file, val_csv_file] - output_files = [train_output_file, val_output_file] - - # Extracting prompt-completion pairs - prompt_completion_pairs = [] - for csv_file in paths: - with open(csv_file, "r") as f: - reader = csv.reader(f) - for row in reader: - if len(row) >= 2: - prompt = row[0] - completion = row[1] - prompt_completion_pairs.append((prompt, completion)) - - # Writing to JSONL - for output_file, pairs in zip(output_files, prompt_completion_pairs): - with open(output_file, "w") as f: - for pair in pairs: - json_obj = {"prompt": pair[0], "completion": pair[1]} - json_str = json.dumps(json_obj) - f.write(json_str + "\n") - - # Creating Files - ids = [] - for output_file in output_files: - if not output_file.endswith(".jsonl"): - raise Exception( - "args `output_file` must be the **file** path to the .jsonl file" - ) - try: - _ = openai.File.create( - file=open(output_file, "rb"), purpose="fine-tune" - ) - ids.append(_) - except Exception as e: - self.logger.error(f"Error creating file: {e}") - raise e - - return output_files, ids - - # TODO: Specify use of the method - # def model( - # self, - # model_name: str, - # input: str, - # instruction: str, - # n: int, - # temperature: float, - # top_p: float, - # ): - # try: - # model = openai.Edit.create( - # model=model_name, - # temperature=temperature, - # top_p=top_p, - # input=input, - # instruction=instruction, - # n=n, - # ) - # return model - # except Exception as e: - # self.logger.error(f"Error creating model: {e}") - # raise e - - def finetune( - self, - training_file: str, - model_name: Optional[str] = "curie", - n_epoch: Optional[int] = 4, - validation_file: Optional[str] = None, - batch_size: Optional[int] = None, - learning_rate_multiplier: Optional[int] = None, - prompt_loss_weight: Optional[int] = 0.01, - compute_classification_metrics: Optional[bool] = False, - classification_n_classes: Optional[int] = None, - classification_positive_class: Optional[str] = None, - classification_betas: Optional[List[float]] = None, - suffix: Optional[str] = None, - ): - """_summary_. - - Args: - training_file (str): The ID of an uploaded file that contains training data. - model_name (Optional[str], optional): The name of the base model to fine-tune. You can select one of "ada", "babbage", "curie", "davinci", or a fine-tuned model created after 2022-04-21. Defaults to "curie". - n_epoch (Optional[int], optional): Number of epochs to train the model for. Defaults to 4. - validation_file (Optional[str], optional): The ID of an uploaded file that contains validation data. Defaults to None. - batch_size (Optional[int], optional): Batch size to use for training. Defaults to None. - learning_rate_multiplier (Optional[int], optional): Learning rate multiplier to use for training. Defaults to None. - prompt_loss_weight (Optional[int], optional): Weight to use for loss on the prompt tokens. Defaults to 0.01. - compute_classification_metrics (Optional[bool], optional): If True, classification metrics such as accuracy and f1-score are computed for validation set. Defaults to False. - classification_n_classes (Optional[int], optional): Number of classes in a classification task. Defaults to None. - classification_positive_class (Optional[str], optional): This parameter is needed to generate precision, recall, and F1 metrics when doing binary classification. Defaults to None. - classification_betas (Optional[List[float]], optional): If this is provided, we calculate F-beta scores at the specified beta values. Defaults to None. - suffix (Optional[str], optional): A string of up to 40 characters that will be added to your fine-tuned model name. Defaults to None. - - Raises: - e: Errors generated while creating fine-tune job - Exception: If fine-tuning job fails - - Returns: - _type_: _description_ - """ - # openai.FineTune.create(training_file="file-XGinujblHPwGLSztz8cPS8XY") - - job_id = None - try: - job_id = openai.FineTune.create( - training_file=training_file, - model=model_name, - n_epochs=n_epoch, - validation_file=validation_file, - batch_size=batch_size, - learning_rate_multiplier=learning_rate_multiplier, - prompt_loss_weight=prompt_loss_weight, - compute_classification_metrics=compute_classification_metrics, - classification_n_classes=classification_n_classes, - classification_positive_class=classification_positive_class, - classification_betas=classification_betas, - suffix=suffix, - ) - while openai.FineTune.retrieve(job_id.get("id")).get("status") == "pending": - time.sleep(1) - self.logger.info( - "Fine-tuning job status: %s", - openai.FineTune.retrieve(job_id.get("id")).get("status"), - ) - - if openai.FineTune.retrieve(job_id.get("id")).get("status") == "failed": - self.logger.error("Fine-tuning job failed") - raise Exception("Fine-tuning job failed") - - self.logger.info("Fine-tuning job completed successfully") - return job_id - - except Exception as e: - self.logger.error(f"Error creating fine-tune job: {e}") - raise e - - -if __name__ == "__main__": - from creds import OPENAI_KEY - - logger = logging.getLogger(__name__) - logger.setLevel(logging.DEBUG) - logger.addHandler(logging.StreamHandler()) - finetune = Finetune(logger, openai_key=OPENAI_KEY) - train_path, val_path = finetune.generate_jsonl_from_csv( - "sports_train.csv", "sports_val.csv", "sports_train.jsonl", "sports_val.jsonl" - ) - output_paths, ids = finetune.create_file(output_files=[train_path, val_path]) - train_file, val_file = output_paths - train_id, val_id = ids - job_id = finetune.finetune( - training_file=train_id.get("id"), - n_epoch=1, - validation_file=val_id.get("id"), - suffix="sports", - batch_size=4, - compute_classification_metrics=True, - classification_n_classes=2, - classification_positive_class="hockey", - classification_betas=[0.5, 1, 2], - prompt_loss_weight=0.01, - model_name="curie", - learning_rate_multiplier=1.0, - ) - print("#" * 5, end="\n\n") - print(type(openai.FineTune.retrieve(job_id.get("id")))) - print(openai.FineTune.retrieve(job_id.get("id"))) diff --git a/nextpy/ai/finetune/transformer_finetune.py b/nextpy/ai/finetune/transformer_finetune.py deleted file mode 100644 index dc7afa27..00000000 --- a/nextpy/ai/finetune/transformer_finetune.py +++ /dev/null @@ -1,86 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from logging import Logger - -from datasets import load_dataset -from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments - -from . import LLMFinetune - - -class TransformersFinetune(LLMFinetune): - def __init__(self, logger: Logger, base_model: str): - super().__init__(logger, openai_key=None) - self.model = AutoModelForCausalLM.from_pretrained(base_model) - self.tokenizer = AutoTokenizer.from_pretrained(base_model) - - def transform_data( - self, - train_csv_file: str, - val_csv_file: str, - train_output_file: str, - val_output_file: str, - ) -> str: - # Implement logic to transform CSV files to desired JSON or other formats - # You can load, process, and save the CSV data here - # Return the path or message confirming the transformation - pass - - def finetune( - self, - data_path, - output_dir, - num_epochs=1, - batch_size=32, - learning_rate=5e-5, - val_set_size=0.1, - max_length=512, - ): - # Load dataset - data = load_dataset("json", data_files={"train": data_path}) - - # Split data into training and validation sets - train_val = data["train"].train_test_split( - test_size=val_set_size, shuffle=True, seed=42 - ) - train_data = train_val["train"] - valid_data = train_val["test"] - - # Tokenization function - def tokenize_function(examples): - return self.tokenizer( - examples["text"], - truncation=True, - max_length=max_length, - padding="max_length", - ) - - # Tokenize dataset - train_data = train_data.map(tokenize_function, batched=True) - valid_data = valid_data.map(tokenize_function, batched=True) - - # Training arguments - training_args = TrainingArguments( - per_device_train_batch_size=batch_size, - per_device_eval_batch_size=batch_size, - num_train_epochs=num_epochs, - learning_rate=learning_rate, - output_dir=output_dir, - evaluation_strategy="steps" if val_set_size > 0 else "no", - logging_dir="./logs", - ) - - # Trainer - trainer = Trainer( - model=self.model, - args=training_args, - train_dataset=train_data, - eval_dataset=valid_data, - ) - - # Training - trainer.train() - - # Save model - self.model.save_pretrained(output_dir) diff --git a/nextpy/ai/hooks/__init__.py b/nextpy/ai/hooks/__init__.py new file mode 100644 index 00000000..9cd84a05 --- /dev/null +++ b/nextpy/ai/hooks/__init__.py @@ -0,0 +1 @@ +# init file for hooks diff --git a/nextpy/ai/hooks/hook_base.py b/nextpy/ai/hooks/hook_base.py new file mode 100644 index 00000000..7ce95f18 --- /dev/null +++ b/nextpy/ai/hooks/hook_base.py @@ -0,0 +1 @@ +# base class for all hooks diff --git a/nextpy/ai/hooks/hook_manager.py b/nextpy/ai/hooks/hook_manager.py new file mode 100644 index 00000000..763640be --- /dev/null +++ b/nextpy/ai/hooks/hook_manager.py @@ -0,0 +1 @@ +# manager to retrieve and register hooks diff --git a/nextpy/ai/models/audio/README.md b/nextpy/ai/models/audio/README.md deleted file mode 100644 index 2709c840..00000000 --- a/nextpy/ai/models/audio/README.md +++ /dev/null @@ -1,61 +0,0 @@ -Source : https://github.com/Shaunwei/RealChar/tree/main ( RealChar. - Your Realtime AI Character) - -# ElevenLabs Voice Cloning Guide - - -This README serves as a guide on how to use ElevenLabs for voice cloning. Follow the steps below to clone a voice, test it, and fine-tune it for the best results. - -## Collecting Data - -Before you start, you'll need voice data. Download high quality vocal only audio clips. Check the [training_data](.ai-example/audio/training_data) folder for reference. - -If you're creating your own dataset, ensure the audio is high quality. It should have no background noise, clear pronunciation. - -The audio format must be mp3 and should be about 1 minute long in total. - -## Creating an ElevenLabs Account - -Visit [ElevenLabs](https://beta.elevenlabs.io/) to create an account. You'll need this to access the speech synthesis and voice cloning features. - -Get your `ELEVEN_LABS_API_KEY`: -1. Click profile icon and select 'profile'. -2. Copy API Key - -## Speech Synthesis/Voice Cloning - -Follow these steps to clone a voice: - -1. Go to the [speech synthesis page](https://beta.elevenlabs.io/speech-synthesis). -2. Click "Add Voice". -3. Click "Add Generative or Cloned Voice". -4. Click "Instant Voice Cloning". -5. Fill in all the required information and upload your audio samples. -6. Click "Add Voice". - -## Testing Your Voice - -To test the voice you've just created: - -1. Go back to the [speech synthesis page](https://beta.elevenlabs.io/speech-synthesis). -2. Choose the voice you just created in Settings. -4. Type some text and click "Generate". - -## Fine-tuning Your Voice - -You can make the voice read better by adjusting system and user prompts. -Here are some tips: - -- If the voice is too monotone, lower the Stability to make it more emotional. However, setting the Stability to zero can sometimes lead to a strange accent. -- Longer sentences tend to be spoken better because they provide more context for the AI speaker to understand. -- For shorter sentences that are spoken too quickly, replace "." with "...". Add "-" or a newline for a pause. -- Add emotion-related words or phrases, or use punctuation marks like “!”, “?” to add emotions to the voice. - -## Using Your Custom Voice in Our Project - -You need the voice id of cloned voice. Here's how: -1. go to https://api.elevenlabs.io/docs -2. choose Get Voices api -3. follow the instruction and find the specific voice_id in the Responses. -4. Do not forget to update your .env file with `ELEVEN_LABS_API_KEY` and voice ids. - - diff --git a/nextpy/ai/models/audio/__init__.py b/nextpy/ai/models/audio/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/models/audio/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/models/audio/speech_to_text/__init__.py b/nextpy/ai/models/audio/speech_to_text/__init__.py deleted file mode 100644 index 8922896f..00000000 --- a/nextpy/ai/models/audio/speech_to_text/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os - -from nextpy.ai.models.audio.speech_to_text.base import SpeechToText - - -def get_speech_to_text() -> SpeechToText: - use = os.getenv("SPEECH_TO_TEXT_USE", "LOCAL_WHISPER") - if use == "GOOGLE": - from nextpy.ai.audio.speech_to_text.google import Google - - Google.initialize() - return Google.get_instance() - elif use == "LOCAL_WHISPER": - from nextpy.ai.audio.speech_to_text.whisper import Whisper - - Whisper.initialize(use="local") - return Whisper.get_instance() - elif use == "OPENAI_WHISPER": - from nextpy.ai.audio.speech_to_text.whisper import Whisper - - Whisper.initialize(use="api") - return Whisper.get_instance() - else: - raise NotImplementedError(f"Unknown speech to text engine: {use}") diff --git a/nextpy/ai/models/audio/speech_to_text/base.py b/nextpy/ai/models/audio/speech_to_text/base.py deleted file mode 100644 index d30cb846..00000000 --- a/nextpy/ai/models/audio/speech_to_text/base.py +++ /dev/null @@ -1,13 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from abc import ABC, abstractmethod - - -class SpeechToText(ABC): - @abstractmethod - def transcribe( - self, audio_bytes, platform="web", prompt="", language="en-US" - ) -> str: - # platform: 'web' | 'mobile' | 'terminal' - pass diff --git a/nextpy/ai/models/audio/speech_to_text/google.py b/nextpy/ai/models/audio/speech_to_text/google.py deleted file mode 100644 index 8f7d4df8..00000000 --- a/nextpy/ai/models/audio/speech_to_text/google.py +++ /dev/null @@ -1,55 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import types - -from google.cloud import speech - -from nextpy.ai.models.audio.speech_to_text.base import SpeechToText -from nextpy.utils.logger import get_logger -from nextpy.utils.singleton import Singleton - -logger = get_logger(__name__) -config = types.SimpleNamespace( - **{ - "web": { - "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, - "sample_rate_hertz": 48000, - "language_code": "en-US", - "max_alternatives": 1, - }, - "terminal": { - "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, - "sample_rate_hertz": 44100, - "language_code": "en-US", - "max_alternatives": 1, - }, - } -) - - -class Google(Singleton, SpeechToText): - def __init__(self): - super().__init__() - logger.info("Setting up [Google Speech to Text]...") - self.client = speech.SpeechClient() - - def transcribe(self, audio_bytes, platform, prompt="", language="en-US") -> str: - batch_config = speech.RecognitionConfig( - { - "speech_contexts": [speech.SpeechContext(phrases=prompt.split(","))], - **config.__dict__[platform], - } - ) - batch_config.language_code = language - if language != "en-US": - batch_config.alternative_language_codes = ["en-US"] - response = self.client.recognize( - config=batch_config, audio=speech.RecognitionAudio(content=audio_bytes) - ) - if not response.results: - return "" - result = response.results[0] - if not result.alternatives: - return "" - return result.alternatives[0].transcript diff --git a/nextpy/ai/models/audio/speech_to_text/whisper.py b/nextpy/ai/models/audio/speech_to_text/whisper.py deleted file mode 100644 index 9252a45e..00000000 --- a/nextpy/ai/models/audio/speech_to_text/whisper.py +++ /dev/null @@ -1,103 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import io -import os -import types -import wave - -import speech_recognition as sr -from faster_whisper import WhisperModel -from pydub import AudioSegment -from torch.cuda import is_available as is_cuda_available - -from nextpy.ai.models.audio.speech_to_text.base import SpeechToText -from nextpy.utils.logger import get_logger -from nextpy.utils.singleton import Singleton - -DEBUG = False -logger = get_logger(__name__) -config = types.SimpleNamespace( - **{ - "model": os.getenv("LOCAL_WHISPER_MODEL", "base"), - "language": "en", - "api_key": os.getenv("OPENAI_API_KEY"), - } -) - -# Whisper use a shorter version for language code. Provide a mapping to convert -# from the standard language code to the whisper language code. -WHISPER_LANGUAGE_CODE_MAPPING = { - "en-US": "en", - "es-ES": "es", - "fr-FR": "fr", - "de-DE": "de", - "it-IT": "it", - "pt-PT": "pt", - "hi-IN": "hi", - "pl-PL": "pl", -} - - -class Whisper(Singleton, SpeechToText): - def __init__(self, use="local"): - super().__init__() - if use == "local": - device = "cuda" if is_cuda_available() else "cpu" - logger.info( - f"Loading [Local Whisper] model: [{config.model}]({device}) ..." - ) - self.model = WhisperModel( - model_size_or_path=config.model, - device="auto", - download_root=None, - ) - self.recognizer = sr.Recognizer() - self.use = use - if DEBUG: - self.wf = wave.open("output.wav", "wb") - self.wf.setnchannels(1) # Assuming mono audio - self.wf.setsampwidth(2) # Assuming 16-bit audio - self.wf.setframerate(44100) # Assuming 44100Hz sample rate - - def transcribe(self, audio_bytes, platform, prompt="", language="en-US"): - logger.info("Transcribing audio...") - if platform == "web": - audio = self._convert_webm_to_wav(audio_bytes, self.use == "local") - else: - audio = self._convert_bytes_to_wav(audio_bytes, self.use == "local") - if self.use == "local": - return self._transcribe(audio, prompt) - elif self.use == "api": - return self._transcribe_api(audio, prompt) - - def _transcribe(self, audio, prompt="", language="en-US"): - language = WHISPER_LANGUAGE_CODE_MAPPING.get(language, config.language) - segs, _ = self.model.transcribe( - audio, language=language, vad_filter=True, initial_prompt=prompt - ) - text = " ".join([seg.text for seg in segs]) - return text - - def _transcribe_api(self, audio, prompt=""): - text = self.recognizer.recognize_whisper_api( - audio, - api_key=config.api_key, - ) - return text - - def _convert_webm_to_wav(self, webm_data, local=True): - webm_audio = AudioSegment.from_file(io.BytesIO(webm_data), format="webm") - wav_data = io.BytesIO() - webm_audio.export(wav_data, format="wav") - if local: - return wav_data - with sr.AudioFile(wav_data) as source: - audio = self.recognizer.record(source) - return audio - - def _convert_bytes_to_wav(self, audio_bytes, local=True): - if local: - audio = io.BytesIO(sr.AudioData(audio_bytes, 44100, 2).get_wav_data()) - return audio - return sr.AudioData(audio_bytes, 44100, 2) diff --git a/nextpy/ai/models/audio/text_to_speech/__init__.py b/nextpy/ai/models/audio/text_to_speech/__init__.py deleted file mode 100644 index 7d19b4f5..00000000 --- a/nextpy/ai/models/audio/text_to_speech/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os - -from nextpy.ai.models.audio.text_to_speech.base import TextToSpeech - - -def get_text_to_speech(tts: str = None) -> TextToSpeech: - if not tts: - tts = os.getenv("TEXT_TO_SPEECH_USE", "ELEVEN_LABS") - if tts == "ELEVEN_LABS": - from nextpy.ai.audio.text_to_speech.elevenlabs import ElevenLabs - - ElevenLabs.initialize() - return ElevenLabs.get_instance() - elif tts == "GOOGLE_TTS": - from nextpy.ai.audio.text_to_speech.google_cloud_tts import GoogleCloudTTS - - GoogleCloudTTS.initialize() - return GoogleCloudTTS.get_instance() - elif tts == "UNREAL_SPEECH": - from nextpy.ai.audio.text_to_speech.unreal_speech import UnrealSpeech - - UnrealSpeech.initialize() - return UnrealSpeech.get_instance() - else: - raise NotImplementedError(f"Unknown text to speech engine: {tts}") diff --git a/nextpy/ai/models/audio/text_to_speech/base.py b/nextpy/ai/models/audio/text_to_speech/base.py deleted file mode 100644 index c972e822..00000000 --- a/nextpy/ai/models/audio/text_to_speech/base.py +++ /dev/null @@ -1,10 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from abc import ABC, abstractmethod - - -class TextToSpeech(ABC): - @abstractmethod - async def stream(self, *args, **kwargs): - pass diff --git a/nextpy/ai/models/audio/text_to_speech/elevenlabs.py b/nextpy/ai/models/audio/text_to_speech/elevenlabs.py deleted file mode 100644 index 434eebae..00000000 --- a/nextpy/ai/models/audio/text_to_speech/elevenlabs.py +++ /dev/null @@ -1,74 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import asyncio -import os -import types - -import httpx - -from nextpy.ai.models.audio.text_to_speech.base import TextToSpeech -from nextpy.utils.logger import get_logger -from nextpy.utils.singleton import Singleton - -logger = get_logger(__name__) -DEBUG = False - -config = types.SimpleNamespace( - **{ - "chunk_size": 1024, - "url": "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream", - "headers": { - "Accept": "audio/mpeg", - "Content-Type": "application/json", - "xi-api-key": os.environ["ELEVEN_LABS_API_KEY"], - }, - "data": { - "model_id": "eleven_monolingual_v1", - "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}, - }, - } -) - - -class ElevenLabs(Singleton, TextToSpeech): - def __init__(self): - super().__init__() - logger.info("Initializing [ElevenLabs Text To Speech] voices...") - - async def stream( - self, - text, - websocket, - tts_event: asyncio.Event, - voice_id="21m00Tcm4TlvDq8ikWAM", - first_sentence=False, - language="en-US", - ) -> None: - if DEBUG: - return - if voice_id == "": - logger.info( - f"voice_id is not found in .env file, using ElevenLabs default voice" - ) - voice_id = "21m00Tcm4TlvDq8ikWAM" - headers = config.headers - if language != "en-US": - config.data["model_id"] = "eleven_multilingual_v1" - data = { - "text": text, - **config.data, - } - url = config.url.format(voice_id=voice_id) - if first_sentence: - url = url + "?optimize_streaming_latency=4" - async with httpx.AsyncClient() as client: - response = await client.post(url, json=data, headers=headers) - if response.status_code != 200: - logger.error(f"ElevenLabs returns response {response.status_code}") - async for chunk in response.aiter_bytes(): - await asyncio.sleep(0.1) - if tts_event.is_set(): - # stop streaming audio - break - await websocket.send_bytes(chunk) diff --git a/nextpy/ai/models/audio/text_to_speech/google_cloud_tts.py b/nextpy/ai/models/audio/text_to_speech/google_cloud_tts.py deleted file mode 100644 index 0abc11a1..00000000 --- a/nextpy/ai/models/audio/text_to_speech/google_cloud_tts.py +++ /dev/null @@ -1,100 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import asyncio -import base64 -import os -import types - -import google.auth.transport.requests -import httpx -from google.oauth2 import service_account - -from nextpy.ai.models.audio.text_to_speech.base import TextToSpeech -from nextpy.utils.logger import get_logger -from nextpy.utils.singleton import Singleton - -logger = get_logger(__name__) -DEBUG = False - -config = types.SimpleNamespace( - **{ - "url": "https://texttospeech.googleapis.com/v1/text:synthesize", - "headers": { - "Content-Type": "application/json", - }, - "data": { - "voice": { - "languageCode": "en-US", - "name": "en-US-Studio-M", - "ssmlGender": "NEUTRAL", - }, - "audioConfig": {"audioEncoding": "MP3"}, - }, - "service_account_file": os.getenv( - "GOOGLE_APPLICATION_CREDENTIALS", "default/path.json" - ), - } -) - - -class GoogleCloudTTS(Singleton, TextToSpeech): - def __init__(self): - super().__init__() - logger.info("Initializing [Google Cloud Text To Speech] voices...") - - # Load the service account key - credentials = service_account.Credentials.from_service_account_file( - config.service_account_file, - scopes=["https://www.googleapis.com/auth/cloud-platform"], - ) - - # Request an access token - auth_req = google.auth.transport.requests.Request() - credentials.refresh(auth_req) - - # Now credentials.valid is True and credentials.token contains the access token - self.access_token = credentials.token - - # Set the Authorization header with the access token - config.headers["Authorization"] = f"Bearer {self.access_token}" - - async def stream( - self, - text, - websocket, - tts_event: asyncio.Event, - voice_id="en-US-Standard-C", - first_sentence=False, - language="en-US", - ) -> None: - if DEBUG: - return - if voice_id == "": - logger.info( - "voice_id is not found in .env file, using Google default voice" - ) - voice_id = "en-US-Standard-C" - headers = config.headers - # For customized voices - - # if language != 'en-US': - # config.data["voice"]["languageCode"] = language - # config.data["voice"]["name"] = voice_id - data = { - "input": {"text": text}, - **config.data, - } - url = config.url - async with httpx.AsyncClient() as client: - response = await client.post(url, json=data, headers=headers) - # Google Cloud TTS API does not support streaming, we send the whole content at once - if response.status_code != 200: - logger.error( - f"Google Cloud TTS returns response {response.status_code}" - ) - else: - audio_content = response.content - # Decode the base64-encoded audio content - audio_content = base64.b64decode(audio_content) - await websocket.send_bytes(audio_content) diff --git a/nextpy/ai/models/audio/text_to_speech/unreal_speech.py b/nextpy/ai/models/audio/text_to_speech/unreal_speech.py deleted file mode 100644 index 7f09a651..00000000 --- a/nextpy/ai/models/audio/text_to_speech/unreal_speech.py +++ /dev/null @@ -1,56 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import asyncio -import types - -import httpx - -from nextpy.ai.models.audio.text_to_speech.base import TextToSpeech -from nextpy.utils.logger import get_logger -from nextpy.utils.singleton import Singleton - -logger = get_logger(__name__) -DEBUG = False - -config = types.SimpleNamespace( - **{ - "chunk_size": 1024, - "url": "https://lab.api.unrealspeech.com/stream", - "headers": { - "Accept": "audio/mpeg", - "Content-Type": "application/json", - }, - "data": { - "speed": -0.2, - }, - } -) - - -class UnrealSpeech(Singleton, TextToSpeech): - def __init__(self): - super().__init__() - logger.info("Initializing [Unreal Speech] voices...") - - async def stream( - self, text, websocket, tts_event: asyncio.Event, voice_id=5, *args, **kwargs - ) -> None: - if DEBUG: - return - params = { - "text": text, - "speaker_index": voice_id, - **config.data, - } - - async with httpx.AsyncClient() as client: - response = await client.get(config.url, params=params) - if response.status_code != 200: - logger.error(f"Unreal Speech returns response {response.status_code}") - async for chunk in response.aiter_bytes(): - await asyncio.sleep(0.1) - if tts_event.is_set(): - # stop streaming audio - break - await websocket.send_bytes(chunk) diff --git a/nextpy/ai/models/embedding/__init__.py b/nextpy/ai/models/embedding/__init__.py deleted file mode 100644 index bf61f8a2..00000000 --- a/nextpy/ai/models/embedding/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrappers around embedding modules.""" -import logging -from typing import Any - -from nextpy.ai.models.embedding.aleph_alpha import ( - AlephAlphaAsymmetricSemanticEmbedding, - AlephAlphaSymmetricSemanticEmbedding, -) -from nextpy.ai.models.embedding.bedrock import BedrockEmbeddings -from nextpy.ai.models.embedding.cohere import CohereEmbeddings -from nextpy.ai.models.embedding.dashscope import DashScopeEmbeddings -from nextpy.ai.models.embedding.deepinfra import DeepInfraEmbeddings -from nextpy.ai.models.embedding.elasticsearch import ElasticsearchEmbeddings -from nextpy.ai.models.embedding.embaas import EmbaasEmbeddings -from nextpy.ai.models.embedding.fake import FakeEmbeddings -from nextpy.ai.models.embedding.google_palm import GooglePalmEmbeddings -from nextpy.ai.models.embedding.huggingface import ( - HuggingFaceHubEmbeddings, - HuggingFaceInstructEmbeddings, - HuggingFaceSetenceTransformersEmbeddings, -) -from nextpy.ai.models.embedding.jina import JinaEmbeddings -from nextpy.ai.models.embedding.llamacpp import LlamaCppEmbeddings -from nextpy.ai.models.embedding.minimax import MiniMaxEmbeddings -from nextpy.ai.models.embedding.modelscopehub import ModelScopeEmbeddings -from nextpy.ai.models.embedding.mosaicml import MosaicMLInstructorEmbeddings -from nextpy.ai.models.embedding.openai import OpenAIEmbeddings -from nextpy.ai.models.embedding.tensorflowhub import TensorflowHubEmbeddings - -logger = logging.getLogger(__name__) - -__all__ = [ - "AlephAlphaAsymmetricSemanticEmbedding", - "AlephAlphaSymmetricSemanticEmbedding", - "BedrockEmbeddings", - "CohereEmbeddings", - "DashScopeEmbeddings", - "DeepInfraEmbeddings", - "ElasticsearchEmbeddings", - "EmbaasEmbeddings", - "FakeEmbeddings", - "GooglePalmEmbeddings", - "HuggingFaceSetenceTransformersEmbeddings", - "HuggingFaceInstructEmbeddings", - "HuggingFaceHubEmbeddings", - "JinaEmbeddings", - "LlamaCppEmbeddings", - "MiniMaxEmbeddings", - "ModelScopeEmbeddings", - "MosaicMLInstructorEmbeddings", - "OpenAIEmbeddings", - "TensorflowHubEmbeddings", -] diff --git a/nextpy/ai/models/embedding/aleph_alpha.py b/nextpy/ai/models/embedding/aleph_alpha.py deleted file mode 100644 index 4015f459..00000000 --- a/nextpy/ai/models/embedding/aleph_alpha.py +++ /dev/null @@ -1,183 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Any, Dict, List, Optional -from pydantic import BaseModel, root_validator -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - - -class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings): - - client: Any #: :meta private: - - model: Optional[str] = "luminous-base" - hosting: Optional[str] = "https://api.aleph-alpha.com" - normalize: Optional[bool] = True - compress_to_size: Optional[int] = 128 - contextual_control_threshold: Optional[int] = None - control_log_additive: Optional[bool] = True - aleph_alpha_api_key: Optional[str] = None - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - aleph_alpha_api_key = get_from_dict_or_env( - values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY" - ) - try: - from aleph_alpha_client import Client - except ImportError: - raise ValueError( - "Could not import aleph_alpha_client python package. " - "Please install it with `pip install aleph_alpha_client`." - ) - values["client"] = Client(token=aleph_alpha_api_key) - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to Aleph Alpha's asymmetric Document endpoint. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - try: - from aleph_alpha_client import ( - Prompt, - SemanticEmbeddingRequest, - SemanticRepresentation, - ) - except ImportError: - raise ValueError( - "Could not import aleph_alpha_client python package. " - "Please install it with `pip install aleph_alpha_client`." - ) - document_embeddings = [] - - for text in texts: - document_params = { - "prompt": Prompt.from_text(text), - "representation": SemanticRepresentation.Document, - "compress_to_size": self.compress_to_size, - "normalize": self.normalize, - "contextual_control_threshold": self.contextual_control_threshold, - "control_log_additive": self.control_log_additive, - } - - document_request = SemanticEmbeddingRequest(**document_params) - document_response = self.client.semantic_embed( - request=document_request, model=self.model - ) - - document_embeddings.append(document_response.embedding) - - return document_embeddings - - def embed_query(self, text: str) -> List[float]: - """Call out to Aleph Alpha's asymmetric, query embedding endpoint - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - try: - from aleph_alpha_client import ( - Prompt, - SemanticEmbeddingRequest, - SemanticRepresentation, - ) - except ImportError: - raise ValueError( - "Could not import aleph_alpha_client python package. " - "Please install it with `pip install aleph_alpha_client`." - ) - symmetric_params = { - "prompt": Prompt.from_text(text), - "representation": SemanticRepresentation.Query, - "compress_to_size": self.compress_to_size, - "normalize": self.normalize, - "contextual_control_threshold": self.contextual_control_threshold, - "control_log_additive": self.control_log_additive, - } - - symmetric_request = SemanticEmbeddingRequest(**symmetric_params) - symmetric_response = self.client.semantic_embed( - request=symmetric_request, model=self.model - ) - - return symmetric_response.embedding - - -class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding): - """The symmetric version of the Aleph Alpha's semantic embeddings. - - The main difference is that here, both the documents and - queries are embedded with a SemanticRepresentation.Symmetric - Example: - .. code-block:: python - - from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding - - embeddings = AlephAlphaAsymmetricSemanticEmbedding() - text = "This is a test text" - - doc_result = embeddings.embed_documents([text]) - query_result = embeddings.embed_query(text) - """ - - def _embed(self, text: str) -> List[float]: - try: - from aleph_alpha_client import ( - Prompt, - SemanticEmbeddingRequest, - SemanticRepresentation, - ) - except ImportError: - raise ValueError( - "Could not import aleph_alpha_client python package. " - "Please install it with `pip install aleph_alpha_client`." - ) - query_params = { - "prompt": Prompt.from_text(text), - "representation": SemanticRepresentation.Symmetric, - "compress_to_size": self.compress_to_size, - "normalize": self.normalize, - "contextual_control_threshold": self.contextual_control_threshold, - "control_log_additive": self.control_log_additive, - } - - query_request = SemanticEmbeddingRequest(**query_params) - query_response = self.client.semantic_embed( - request=query_request, model=self.model - ) - - return query_response.embedding - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to Aleph Alpha's Document endpoint. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - document_embeddings = [] - - for text in texts: - document_embeddings.append(self._embed(text)) - return document_embeddings - - def embed_query(self, text: str) -> List[float]: - """Call out to Aleph Alpha's asymmetric, query embedding endpoint - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - return self._embed(text) diff --git a/nextpy/ai/models/embedding/base.py b/nextpy/ai/models/embedding/base.py deleted file mode 100644 index 662b1a2a..00000000 --- a/nextpy/ai/models/embedding/base.py +++ /dev/null @@ -1,18 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Interface for embedding models.""" -from abc import ABC, abstractmethod -from typing import List - - -class Embeddings(ABC): - """Interface for embedding models.""" - - @abstractmethod - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed search docs.""" - - @abstractmethod - def embed_query(self, text: str) -> List[float]: - """Embed query text.""" diff --git a/nextpy/ai/models/embedding/bedrock.py b/nextpy/ai/models/embedding/bedrock.py deleted file mode 100644 index 01bce089..00000000 --- a/nextpy/ai/models/embedding/bedrock.py +++ /dev/null @@ -1,163 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import json -import os -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.ai.models.embedding.base import Embeddings - - -class BedrockEmbeddings(BaseModel, Embeddings): - """Embeddings provider to invoke Bedrock embedding models. - - To authenticate, the AWS client uses the following methods to - automatically load credentials: - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - - If a specific credential profile should be used, you must pass - the name of the profile from the ~/.aws/credentials file that is to be used. - - Make sure the credentials / roles used have the required policies to - access the Bedrock service. - """ - - """ - Example: - .. code-block:: python - - from nextpy.ai.bedrock_embedding import BedrockEmbeddings - - region_name ="us-east-1" - credentials_profile_name = "default" - model_id = "amazon.titan-e1t-medium" - - be = BedrockEmbeddings( - credentials_profile_name=credentials_profile_name, - region_name=region_name, - model_id=model_id - ) - """ - - client: Any #: :meta private: - - region_name: Optional[str] = None - """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable - or region specified in ~/.aws/config in case it is not provided here. - """ - - credentials_profile_name: Optional[str] = None - """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which - has either access keys or role information specified. - If not specified, the default credential profile or, if on an EC2 instance, - credentials from IMDS will be used. - See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - """ - - model_id: str = "amazon.titan-e1t-medium" - """Id of the model to call, e.g., amazon.titan-e1t-medium, this is - equivalent to the modelId property in the list-foundation-models api""" - - model_kwargs: Optional[Dict] = None - """Key word arguments to pass to the model.""" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that AWS credentials to and python package exists in environment.""" - if values["client"] is not None: - return values - - try: - import boto3 - - if values["credentials_profile_name"] is not None: - session = boto3.Session(profile_name=values["credentials_profile_name"]) - else: - # use default credentials - session = boto3.Session() - - client_params = {} - if values["region_name"]: - client_params["region_name"] = values["region_name"] - - values["client"] = session.client("bedrock", **client_params) - - except ImportError: - raise ModuleNotFoundError( - "Could not import boto3 python package. " - "Please install it with `pip install boto3`." - ) - except Exception as e: - raise ValueError( - "Could not load credentials to authenticate with AWS client. " - "Please check that credentials in the specified " - "profile name are valid." - ) from e - - return values - - def _embedding_func(self, text: str) -> List[float]: - """Call out to Bedrock embedding endpoint.""" - # replace newlines, which can negatively affect performance. - text = text.replace(os.linesep, " ") - _model_kwargs = self.model_kwargs or {} - - input_body = {**_model_kwargs} - input_body["inputText"] = text - body = json.dumps(input_body) - content_type = "application/json" - accepts = "application/json" - - embeddings = [] - try: - response = self.client.invoke_model( - body=body, - modelId=self.model_id, - accept=accepts, - contentType=content_type, - ) - response_body = json.loads(response.get("body").read()) - embeddings = response_body.get("embedding") - except Exception as e: - raise ValueError(f"Error raised by inference endpoint: {e}") - - return embeddings - - def embed_documents( - self, texts: List[str], chunk_size: int = 1 - ) -> List[List[float]]: - """Compute doc embeddings using a Bedrock model. - - Args: - texts: The list of texts to embed. - chunk_size: Bedrock currently only allows single string - inputs, so chunk size is always 1. This input is here - only for compatibility with the embeddings interface. - - - Returns: - List of embeddings, one for each text. - """ - results = [] - for text in texts: - response = self._embedding_func(text) - results.append(response) - return results - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a Bedrock model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - return self._embedding_func(text) diff --git a/nextpy/ai/models/embedding/cohere.py b/nextpy/ai/models/embedding/cohere.py deleted file mode 100644 index 573a75c4..00000000 --- a/nextpy/ai/models/embedding/cohere.py +++ /dev/null @@ -1,86 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around Cohere embedding models.""" -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - - -class CohereEmbeddings(BaseModel, Embeddings): - """Wrapper around Cohere embedding models. - - To use, you should have the ``cohere`` python package installed, and the - environment variable ``COHERE_API_KEY`` set with your API key or pass it - as a named parameter to the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.models.embedding import CohereEmbeddings - cohere = CohereEmbeddings( - model="embed-english-light-v2.0", cohere_api_key="my-api-key" - ) - """ - - client: Any #: :meta private: - model: str = "embed-english-v2.0" - """Model name to use.""" - - truncate: Optional[str] = None - """Truncate embeddings that are too long from start or end ("NONE"|"START"|"END")""" - - cohere_api_key: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - cohere_api_key = get_from_dict_or_env( - values, "cohere_api_key", "COHERE_API_KEY" - ) - try: - import cohere - - values["client"] = cohere.Client(cohere_api_key) - except ImportError: - raise ValueError( - "Could not import cohere python package. " - "Please install it with `pip install cohere`." - ) - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to Cohere's embedding endpoint. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - embeddings = self.client.embed( - model=self.model, texts=texts, truncate=self.truncate - ).embeddings - return [list(map(float, e)) for e in embeddings] - - def embed_query(self, text: str) -> List[float]: - """Call out to Cohere's embedding endpoint. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - embedding = self.client.embed( - model=self.model, texts=[text], truncate=self.truncate - ).embeddings[0] - return list(map(float, embedding)) diff --git a/nextpy/ai/models/embedding/dashscope.py b/nextpy/ai/models/embedding/dashscope.py deleted file mode 100644 index 655ef80e..00000000 --- a/nextpy/ai/models/embedding/dashscope.py +++ /dev/null @@ -1,156 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around DashScope embedding models.""" -import logging -from typing import ( - Any, - Callable, - Dict, - List, - Optional, -) - -from pydantic import BaseModel, Extra, root_validator -from requests.exceptions import HTTPError -from tenacity import ( - before_sleep_log, - retry, - retry_if_exception_type, - stop_after_attempt, - wait_exponential, -) - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -logger = logging.getLogger(__name__) - - -class DashScopeEmbeddings(BaseModel, Embeddings): - """Wrapper around DashScope embedding models. - - To use, you should have the ``dashscope`` python package installed, and the - environment variable ``DASHSCOPE_API_KEY`` set with your API key or pass it - as a named parameter to the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.models.embedding import DashScopeEmbeddings - embeddings = DashScopeEmbeddings(dashscope_api_key="my-api-key") - - Example: - .. code-block:: python - - import os - os.environ["DASHSCOPE_API_KEY"] = "your DashScope API KEY" - - from nextpy.ai.models.embeddings.dashscope import DashScopeEmbeddings - embeddings = DashScopeEmbeddings( - model="text-embedding-v1", - ) - text = "This is a test query." - query_result = embeddings.embed_query(text) - - """ - - client: Any #: :meta private: - model: str = "text-embedding-v1" - dashscope_api_key: Optional[str] = None - """Maximum number of retries to make when generating.""" - max_retries: int = 5 - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - import dashscope - - """Validate that api key and python package exists in environment.""" - values["dashscope_api_key"] = get_from_dict_or_env( - values, "dashscope_api_key", "DASHSCOPE_API_KEY" - ) - dashscope.api_key = values["dashscope_api_key"] - try: - import dashscope - - values["client"] = dashscope.TextEmbedding - except ImportError: - raise ImportError( - "Could not import dashscope python package. " - "Please install it with `pip install dashscope`." - ) - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to DashScope's embedding endpoint for embedding search docs. - - Args: - texts: The list of texts to embed. - chunk_size: The chunk size of embeddings. If None, will use the chunk size - specified by the class. - - Returns: - List of embeddings, one for each text. - """ - embeddings = embed_with_retry( - self, input=texts, text_type="document", model=self.model - ) - embedding_list = [item["embedding"] for item in embeddings] - return embedding_list - - def embed_query(self, text: str) -> List[float]: - """Call out to DashScope's embedding endpoint for embedding query text. - - Args: - text: The text to embed. - - Returns: - Embedding for the text. - """ - embedding = embed_with_retry( - self, input=text, text_type="query", model=self.model - )[0]["embedding"] - return embedding - - -def _create_retry_decorator(embeddings: DashScopeEmbeddings) -> Callable[[Any], Any]: - multiplier = 1 - min_seconds = 1 - max_seconds = 4 - # Wait 2^x * 1 second between each retry starting with - # 1 seconds, then up to 4 seconds, then 4 seconds afterwards - return retry( - reraise=True, - stop=stop_after_attempt(embeddings.max_retries), - wait=wait_exponential(multiplier, min=min_seconds, max=max_seconds), - retry=(retry_if_exception_type(HTTPError)), - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - - -def embed_with_retry(embeddings: DashScopeEmbeddings, **kwargs: Any) -> Any: - """Use tenacity to retry the embedding call.""" - retry_decorator = _create_retry_decorator(embeddings) - - @retry_decorator - def _embed_with_retry(**kwargs: Any) -> Any: - resp = embeddings.client.call(**kwargs) - if resp.status_code == 200: - return resp.output["embeddings"] - elif resp.status_code in [400, 401]: - raise ValueError( - f"status_code: {resp.status_code} \n " - f"code: {resp.code} \n message: {resp.message}" - ) - else: - raise HTTPError( - f"HTTP error occurred: status_code: {resp.status_code} \n " - f"code: {resp.code} \n message: {resp.message}" - ) - - return _embed_with_retry(**kwargs) diff --git a/nextpy/ai/models/embedding/deepinfra.py b/nextpy/ai/models/embedding/deepinfra.py deleted file mode 100644 index a51c3066..00000000 --- a/nextpy/ai/models/embedding/deepinfra.py +++ /dev/null @@ -1,132 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Any, Dict, List, Mapping, Optional - -import requests -from pydantic import BaseModel, Extra, root_validator - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -DEFAULT_MODEL_ID = "sentence-transformers/clip-ViT-B-32" - - -class DeepInfraEmbeddings(BaseModel, Embeddings): - """Wrapper around Deep Infra's embedding inference service. - - To use, you should have the - environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. - There are multiple embedding models available, - see https://deepinfra.com/models?type=embeddings. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import DeepInfraEmbeddings - deepinfra_emb = DeepInfraEmbeddings( - model_id="sentence-transformers/clip-ViT-B-32", - deepinfra_api_token="my-api-key" - ) - r1 = deepinfra_emb.embed_documents( - [ - "Alpha is the first letter of Greek alphabet", - "Beta is the second letter of Greek alphabet", - ] - ) - r2 = deepinfra_emb.embed_query( - "What is the second letter of Greek alphabet" - ) - - """ - - model_id: str = DEFAULT_MODEL_ID - """Embeddings model to use.""" - normalize: bool = False - """whether to normalize the computed embeddings""" - embed_instruction: str = "passage: " - """Instruction used to embed documents.""" - query_instruction: str = "query: " - """Instruction used to embed the query.""" - model_kwargs: Optional[dict] = None - """Other model keyword args""" - - deepinfra_api_token: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - deepinfra_api_token = get_from_dict_or_env( - values, "deepinfra_api_token", "DEEPINFRA_API_TOKEN" - ) - values["deepinfra_api_token"] = deepinfra_api_token - return values - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying parameters.""" - return {"model_id": self.model_id} - - def _embed(self, input: List[str]) -> List[List[float]]: - _model_kwargs = self.model_kwargs or {} - # HTTP headers for authorization - headers = { - "Authorization": f"bearer {self.deepinfra_api_token}", - "Content-Type": "application/json", - } - # send request - try: - res = requests.post( - f"https://api.deepinfra.com/v1/inference/{self.model_id}", - headers=headers, - json={"inputs": input, "normalize": self.normalize, **_model_kwargs}, - ) - except requests.exceptions.RequestException as e: - raise ValueError(f"Error raised by inference endpoint: {e}") - - if res.status_code != 200: - raise ValueError( - "Error raised by inference API HTTP code: %s, %s" - % (res.status_code, res.text) - ) - try: - t = res.json() - embeddings = t["embeddings"] - except requests.exceptions.JSONDecodeError as e: - raise ValueError( - f"Error raised by inference API: {e}.\nResponse: {res.text}" - ) - - return embeddings - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed documents using a Deep Infra deployed embedding model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - instruction_pairs = [f"{self.query_instruction}{text}" for text in texts] - embeddings = self._embed(instruction_pairs) - return embeddings - - def embed_query(self, text: str) -> List[float]: - """Embed a query using a Deep Infra deployed embedding model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - instruction_pair = f"{self.query_instruction}{text}" - embedding = self._embed([instruction_pair])[0] - return embedding diff --git a/nextpy/ai/models/embedding/elasticsearch.py b/nextpy/ai/models/embedding/elasticsearch.py deleted file mode 100644 index 9f95a973..00000000 --- a/nextpy/ai/models/embedding/elasticsearch.py +++ /dev/null @@ -1,219 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, List, Optional - -from nextpy.utils.data_ops import get_from_dict_or_env - -if TYPE_CHECKING: - from elasticsearch import Elasticsearch - -from nextpy.ai.models.embedding.base import Embeddings - - -class ElasticsearchEmbeddings(Embeddings): - """Wrapper around Elasticsearch embedding models. - - This class provides an interface to generate embedding using a model deployed - in an Elasticsearch cluster. It requires an Elasticsearch connection object - and the model_id of the model deployed in the cluster. - - In Elasticsearch you need to have an embedding model loaded and deployed. - - https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-trained-model.html - - https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-models.html - """ # noqa: E501 - - def __init__( - self, - client: Any, - model_id: str, - *, - input_field: str = "text_field", - ): - """Initialize the ElasticsearchEmbeddings instance. - - Args: - client (MlClient): An Elasticsearch ML client object. - model_id (str): The model_id of the model deployed in the Elasticsearch - cluster. - input_field (str): The name of the key for the input text field in the - document. Defaults to 'text_field'. - """ - self.client = client - self.model_id = model_id - self.input_field = input_field - - @classmethod - def from_credentials( - cls, - model_id: str, - *, - es_cloud_id: Optional[str] = None, - es_user: Optional[str] = None, - es_password: Optional[str] = None, - input_field: str = "text_field", - ) -> ElasticsearchEmbeddings: - """Instantiate embeddings from Elasticsearch credentials. - - Args: - model_id (str): The model_id of the model deployed in the Elasticsearch - cluster. - input_field (str): The name of the key for the input text field in the - document. Defaults to 'text_field'. - es_cloud_id: (str, optional): The Elasticsearch cloud ID to connect to. - es_user: (str, optional): Elasticsearch username. - es_password: (str, optional): Elasticsearch password. - - Example: - .. code-block:: python - - from langchain.embeddings import ElasticsearchEmbeddings - - # Define the model ID and input field name (if different from default) - model_id = "your_model_id" - # Optional, only if different from 'text_field' - input_field = "your_input_field" - - # Credentials can be passed in two ways. Either set the env vars - # ES_CLOUD_ID, ES_USER, ES_PASSWORD and they will be automatically - # pulled in, or pass them in directly as kwargs. - embeddings = ElasticsearchEmbeddings.from_credentials( - model_id, - input_field=input_field, - # es_cloud_id="foo", - # es_user="bar", - # es_password="baz", - ) - - documents = [ - "This is an example document.", - "Another example document to generate embeddings for.", - ] - embeddings_generator.embed_documents(documents) - """ - try: - from elasticsearch import Elasticsearch - from elasticsearch.client import MlClient - except ImportError: - raise ImportError( - "elasticsearch package not found, please install with 'pip install " - "elasticsearch'" - ) - - es_cloud_id = es_cloud_id or get_from_dict_or_env("es_cloud_id", "ES_CLOUD_ID") - es_user = es_user or get_from_dict_or_env("es_user", "ES_USER") - es_password = es_password or get_from_dict_or_env("es_password", "ES_PASSWORD") - - # Connect to Elasticsearch - es_connection = Elasticsearch( - cloud_id=es_cloud_id, basic_auth=(es_user, es_password) - ) - client = MlClient(es_connection) - return cls(client, model_id, input_field=input_field) - - @classmethod - def from_es_connection( - cls, - model_id: str, - es_connection: Elasticsearch, - input_field: str = "text_field", - ) -> ElasticsearchEmbeddings: - """Instantiate embeddings from an existing Elasticsearch connection. - - This method provides a way to create an instance of the ElasticsearchEmbeddings - class using an existing Elasticsearch connection. The connection object is used - to create an MlClient, which is then used to initialize the - ElasticsearchEmbeddings instance. - - Args: - model_id (str): The model_id of the model deployed in the Elasticsearch cluster. - es_connection (elasticsearch.Elasticsearch): An existing Elasticsearch - connection object. input_field (str, optional): The name of the key for the - input text field in the document. Defaults to 'text_field'. - - Returns: - ElasticsearchEmbeddings: An instance of the ElasticsearchEmbeddings class. - - Example: - .. code-block:: python - - from elasticsearch import Elasticsearch - - from langchain.embeddings import ElasticsearchEmbeddings - - # Define the model ID and input field name (if different from default) - model_id = "your_model_id" - # Optional, only if different from 'text_field' - input_field = "your_input_field" - - # Create Elasticsearch connection - es_connection = Elasticsearch( - hosts=["localhost:9200"], http_auth=("user", "password") - ) - - # Instantiate ElasticsearchEmbeddings using the existing connection - embeddings = ElasticsearchEmbeddings.from_es_connection( - model_id, - es_connection, - input_field=input_field, - ) - - documents = [ - "This is an example document.", - "Another example document to generate embeddings for.", - ] - embeddings_generator.embed_documents(documents) - """ - # Importing MlClient from elasticsearch.client within the method to - # avoid unnecessary import if the method is not used - from elasticsearch.client import MlClient - - # Create an MlClient from the given Elasticsearch connection - client = MlClient(es_connection) - - # Return a new instance of the ElasticsearchEmbeddings class with - # the MlClient, model_id, and input_field - return cls(client, model_id, input_field=input_field) - - def _embedding_func(self, texts: List[str]) -> List[List[float]]: - """Generate embeddings for the given texts using the Elasticsearch model. - - Args: - texts (List[str]): A list of text strings to generate embeddings for. - - Returns: - List[List[float]]: A list of embeddings, one for each text in the input - list. - """ - response = self.client.infer_trained_model( - model_id=self.model_id, docs=[{self.input_field: text} for text in texts] - ) - - embeddings = [doc["predicted_value"] for doc in response["inference_results"]] - return embeddings - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Generate embeddings for a list of documents. - - Args: - texts (List[str]): A list of document text strings to generate embeddings - for. - - Returns: - List[List[float]]: A list of embeddings, one for each document in the input - list. - """ - return self._embedding_func(texts) - - def embed_query(self, text: str) -> List[float]: - """Generate an embedding for a single query text. - - Args: - text (str): The query text to generate an embedding for. - - Returns: - List[float]: The embedding for the input query text. - """ - return self._embedding_func([text])[0] diff --git a/nextpy/ai/models/embedding/embaas.py b/nextpy/ai/models/embedding/embaas.py deleted file mode 100644 index c1d58346..00000000 --- a/nextpy/ai/models/embedding/embaas.py +++ /dev/null @@ -1,142 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around embaas embeddings API.""" -from typing import Any, Dict, List, Mapping, Optional - -import requests -from pydantic import BaseModel, Extra, root_validator -from typing_extensions import NotRequired, TypedDict - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -# Currently supported maximum batch size for embedding requests -MAX_BATCH_SIZE = 256 -EMBAAS_API_URL = "https://api.embaas.io/v1/embeddings/" - - -class EmbaasEmbeddingsPayload(TypedDict): - """Payload for the embaas embeddings API.""" - - model: str - texts: List[str] - instruction: NotRequired[str] - - -class EmbaasEmbeddings(BaseModel, Embeddings): - """Wrapper around embaas's embedding service. - - To use, you should have the - environment variable ``EMBAAS_API_KEY`` set with your API key, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - # Initialise with default model and instruction - from nextpy.ai.models.embeddings import EmbaasEmbeddings - emb = EmbaasEmbeddings() - - # Initialise with custom model and instruction - from nextpy.ai.models.embeddings import EmbaasEmbeddings - emb_model = "instructor-large" - emb_inst = "Represent the Wikipedia document for retrieval" - emb = EmbaasEmbeddings( - model=emb_model, - instruction=emb_inst - ) - """ - - model: str = "e5-large-v2" - """The model used for embeddings.""" - instruction: Optional[str] = None - """Instruction used for domain-specific embeddings.""" - api_url: str = EMBAAS_API_URL - """The URL for the embaas embeddings API.""" - embaas_api_key: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - embaas_api_key = get_from_dict_or_env( - values, "embaas_api_key", "EMBAAS_API_KEY" - ) - values["embaas_api_key"] = embaas_api_key - return values - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying params.""" - return {"model": self.model, "instruction": self.instruction} - - def _generate_payload(self, texts: List[str]) -> EmbaasEmbeddingsPayload: - """Generates payload for the API request.""" - payload = EmbaasEmbeddingsPayload(texts=texts, model=self.model) - if self.instruction: - payload["instruction"] = self.instruction - return payload - - def _handle_request(self, payload: EmbaasEmbeddingsPayload) -> List[List[float]]: - """Sends a request to the Embaas API and handles the response.""" - headers = { - "Authorization": f"Bearer {self.embaas_api_key}", - "Content-Type": "application/json", - } - - response = requests.post(self.api_url, headers=headers, json=payload) - response.raise_for_status() - - parsed_response = response.json() - embeddings = [item["embedding"] for item in parsed_response["data"]] - - return embeddings - - def _generate_embeddings(self, texts: List[str]) -> List[List[float]]: - """Generate embeddings using the Embaas API.""" - payload = self._generate_payload(texts) - try: - return self._handle_request(payload) - except requests.exceptions.RequestException as e: - if e.response is None or not e.response.text: - raise ValueError(f"Error raised by embaas embeddings API: {e}") - - parsed_response = e.response.json() - if "message" in parsed_response: - raise ValueError( - "Validation Error raised by embaas embeddings API:" - f"{parsed_response['message']}" - ) - raise - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Get embeddings for a list of texts. - - Args: - texts: The list of texts to get embeddings for. - - Returns: - List of embeddings, one for each text. - """ - batches = [ - texts[i : i + MAX_BATCH_SIZE] for i in range(0, len(texts), MAX_BATCH_SIZE) - ] - embeddings = [self._generate_embeddings(batch) for batch in batches] - # flatten the list of lists into a single list - return [embedding for batch in embeddings for embedding in batch] - - def embed_query(self, text: str) -> List[float]: - """Get embeddings for a single text. - - Args: - text: The text to get embeddings for. - - Returns: - List of embeddings. - """ - return self.embed_documents([text])[0] diff --git a/nextpy/ai/models/embedding/fake.py b/nextpy/ai/models/embedding/fake.py deleted file mode 100644 index aa24f3f0..00000000 --- a/nextpy/ai/models/embedding/fake.py +++ /dev/null @@ -1,22 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import List - -import numpy as np -from pydantic import BaseModel - -from nextpy.ai.models.embedding.base import Embeddings - - -class FakeEmbeddings(Embeddings, BaseModel): - size: int - - def _get_embedding(self) -> List[float]: - return list(np.random.normal(size=self.size)) - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - return [self._get_embedding() for _ in texts] - - def embed_query(self, text: str) -> List[float]: - return self._get_embedding() diff --git a/nextpy/ai/models/embedding/google_palm.py b/nextpy/ai/models/embedding/google_palm.py deleted file mode 100644 index 0befcc13..00000000 --- a/nextpy/ai/models/embedding/google_palm.py +++ /dev/null @@ -1,86 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import logging -from typing import Any, Callable, Dict, List, Optional - -from pydantic import BaseModel, root_validator -from tenacity import ( - before_sleep_log, - retry, - retry_if_exception_type, - stop_after_attempt, - wait_exponential, -) - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -logger = logging.getLogger(__name__) - - -class GooglePalmEmbeddings(BaseModel, Embeddings): - client: Any - google_api_key: Optional[str] - model_name: str = "models/embedding-gecko-001" - """Model name to use.""" - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate api key, python package exists.""" - google_api_key = get_from_dict_or_env( - values, "google_api_key", "GOOGLE_API_KEY" - ) - try: - import google.generativeai as genai - - genai.configure(api_key=google_api_key) - except ImportError: - raise ImportError("Could not import google.generativeai python package.") - - values["client"] = genai - - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - return [self.embed_query(text) for text in texts] - - def embed_query(self, text: str) -> List[float]: - """Embed query text.""" - embedding = embed_with_retry(self, self.model_name, text) - return embedding["embedding"] - - -def _create_retry_decorator() -> Callable[[Any], Any]: - """Returns a tenacity retry decorator, preconfigured to handle PaLM exceptions.""" - import google.api_core.exceptions - - multiplier = 2 - min_seconds = 1 - max_seconds = 60 - max_retries = 10 - - return retry( - reraise=True, - stop=stop_after_attempt(max_retries), - wait=wait_exponential(multiplier=multiplier, min=min_seconds, max=max_seconds), - retry=( - retry_if_exception_type(google.api_core.exceptions.ResourceExhausted) - | retry_if_exception_type(google.api_core.exceptions.ServiceUnavailable) - | retry_if_exception_type(google.api_core.exceptions.GoogleAPIError) - ), - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - - -def embed_with_retry( - embeddings: GooglePalmEmbeddings, *args: Any, **kwargs: Any -) -> Any: - """Use tenacity to retry the completion call.""" - retry_decorator = _create_retry_decorator() - - @retry_decorator - def _embed_with_retry(*args: Any, **kwargs: Any) -> Any: - return embeddings.client.generate_embeddings(*args, **kwargs) - - return _embed_with_retry(*args, **kwargs) diff --git a/nextpy/ai/models/embedding/huggingface.py b/nextpy/ai/models/embedding/huggingface.py deleted file mode 100644 index a270f7c1..00000000 --- a/nextpy/ai/models/embedding/huggingface.py +++ /dev/null @@ -1,274 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around HuggingFace embedding models: hub, sentence-transformers and instruct embeddings.""" -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Extra, Field, root_validator - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" -DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" -DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " -DEFAULT_QUERY_INSTRUCTION = ( - "Represent the question for retrieving supporting documents: " -) - -DEFAULT_REPO_ID = "sentence-transformers/all-mpnet-base-v2" -VALID_TASKS = ("feature-extraction",) - - -class HuggingFaceHubEmbeddings(BaseModel, Embeddings): - """Wrapper around HuggingFaceHub embedding models. - - To use, you should have the ``huggingface_hub`` python package installed, and the - environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import HuggingFaceHubEmbeddings - repo_id = "sentence-transformers/all-mpnet-base-v2" - hf = HuggingFaceHubEmbeddings( - repo_id=repo_id, - task="feature-extraction", - huggingfacehub_api_token="my-api-key", - ) - """ - - client: Any #: :meta private: - repo_id: str = DEFAULT_REPO_ID - """Model name to use.""" - task: Optional[str] = "feature-extraction" - """Task to call the model with.""" - model_kwargs: Optional[dict] = None - """Key word arguments to pass to the model.""" - - huggingfacehub_api_token: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - huggingfacehub_api_token = get_from_dict_or_env( - values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" - ) - try: - from huggingface_hub.inference_api import InferenceApi - - repo_id = values["repo_id"] - if not repo_id.startswith("sentence-transformers"): - raise ValueError( - "Currently only 'sentence-transformers' embedding models " - f"are supported. Got invalid 'repo_id' {repo_id}." - ) - client = InferenceApi( - repo_id=repo_id, - token=huggingfacehub_api_token, - task=values.get("task"), - ) - if client.task not in VALID_TASKS: - raise ValueError( - f"Got invalid task {client.task}, " - f"currently only {VALID_TASKS} are supported" - ) - values["client"] = client - except ImportError: - raise ValueError( - "Could not import huggingface_hub python package. " - "Please install it with `pip install huggingface_hub`." - ) - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to HuggingFaceHub's embedding endpoint for embedding search docs. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - # replace newlines, which can negatively affect performance. - texts = [text.replace("\n", " ") for text in texts] - _model_kwargs = self.model_kwargs or {} - responses = self.client(inputs=texts, params=_model_kwargs) - return responses - - def embed_query(self, text: str) -> List[float]: - """Call out to HuggingFaceHub's embedding endpoint for embedding query text. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - response = self.embed_documents([text])[0] - return response - - -class HuggingFaceSetenceTransformersEmbeddings(BaseModel, Embeddings): - """Wrapper around sentence_transformers embedding models. - - To use, you should have the ``sentence_transformers`` python package installed. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import HuggingFaceEmbeddings - - model_name = "sentence-transformers/all-mpnet-base-v2" - model_kwargs = {'device': 'cpu'} - encode_kwargs = {'normalize_embeddings': False} - hf = HuggingFaceEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs - ) - """ - - client: Any #: :meta private: - model_name: str = DEFAULT_MODEL_NAME - """Model name to use.""" - cache_folder: Optional[str] = None - """Path to store models. - Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass to the model.""" - encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass when calling the `encode` method of the model.""" - - def __init__(self, **kwargs: Any): - """Initialize the sentence_transformer.""" - super().__init__(**kwargs) - try: - import sentence_transformers - - except ImportError as exc: - raise ImportError( - "Could not import sentence_transformers python package. " - "Please install it with `pip install sentence_transformers`." - ) from exc - - self.client = sentence_transformers.SentenceTransformer( - self.model_name, cache_folder=self.cache_folder, **self.model_kwargs - ) - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace transformer model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - texts = list(map(lambda x: x.replace("\n", " "), texts)) - embeddings = self.client.encode(texts, **self.encode_kwargs) - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace transformer model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - text = text.replace("\n", " ") - embedding = self.client.encode(text, **self.encode_kwargs) - return embedding.tolist() - - -class HuggingFaceInstructEmbeddings(BaseModel, Embeddings): - """Wrapper around sentence_transformers embedding models. - - To use, you should have the ``sentence_transformers`` - and ``InstructorEmbedding`` python packages installed. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import HuggingFaceInstructEmbeddings - - model_name = "hkunlp/instructor-large" - model_kwargs = {'device': 'cpu'} - encode_kwargs = {'normalize_embeddings': True} - hf = HuggingFaceInstructEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs - ) - """ - - client: Any #: :meta private: - model_name: str = DEFAULT_INSTRUCT_MODEL - """Model name to use.""" - cache_folder: Optional[str] = None - """Path to store models. - Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" - model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass to the model.""" - encode_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Key word arguments to pass when calling the `encode` method of the model.""" - embed_instruction: str = DEFAULT_EMBED_INSTRUCTION - """Instruction to use for embedding documents.""" - query_instruction: str = DEFAULT_QUERY_INSTRUCTION - """Instruction to use for embedding query.""" - - def __init__(self, **kwargs: Any): - """Initialize the sentence_transformer.""" - super().__init__(**kwargs) - try: - from InstructorEmbedding import INSTRUCTOR - - self.client = INSTRUCTOR( - self.model_name, cache_folder=self.cache_folder, **self.model_kwargs - ) - except ImportError as e: - raise ValueError("Dependencies for InstructorEmbedding not found.") from e - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a HuggingFace instruct model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - instruction_pairs = [[self.embed_instruction, text] for text in texts] - embeddings = self.client.encode(instruction_pairs, **self.encode_kwargs) - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a HuggingFace instruct model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - instruction_pair = [self.query_instruction, text] - embedding = self.client.encode([instruction_pair], **self.encode_kwargs)[0] - return embedding.tolist() diff --git a/nextpy/ai/models/embedding/jina.py b/nextpy/ai/models/embedding/jina.py deleted file mode 100644 index 92779714..00000000 --- a/nextpy/ai/models/embedding/jina.py +++ /dev/null @@ -1,101 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os -from typing import Any, Dict, List, Optional - -import requests -from pydantic import BaseModel, root_validator - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - - -class JinaEmbeddings(BaseModel, Embeddings): - client: Any #: :meta private: - - model_name: str = "ViT-B-32::openai" - """Model name to use.""" - - jina_auth_token: Optional[str] = None - jina_api_url: str = "https://api.clip.jina.ai/api/v1/models/" - request_headers: Optional[dict] = None - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that auth token exists in environment.""" - # Set Auth - jina_auth_token = get_from_dict_or_env( - values, "jina_auth_token", "JINA_AUTH_TOKEN" - ) - values["jina_auth_token"] = jina_auth_token - values["request_headers"] = (("authorization", jina_auth_token),) - - # Test that package is installed - try: - import jina - except ImportError: - raise ImportError( - "Could not import `jina` python package. " - "Please install it with `pip install jina`." - ) - - # Setup client - jina_api_url = os.environ.get("JINA_API_URL", values["jina_api_url"]) - model_name = values["model_name"] - try: - resp = requests.get( - jina_api_url + f"?model_name={model_name}", - headers={"Authorization": jina_auth_token}, - ) - - if resp.status_code == 401: - raise ValueError( - "The given Jina auth token is invalid. " - "Please check your Jina auth token." - ) - elif resp.status_code == 404: - raise ValueError( - f"The given model name `{model_name}` is not valid. " - f"Please go to https://cloud.jina.ai/user/inference " - f"and create a model with the given model name." - ) - resp.raise_for_status() - - endpoint = resp.json()["endpoints"]["grpc"] - values["client"] = jina.Client(host=endpoint) - except requests.exceptions.HTTPError as err: - raise ValueError(f"Error: {err!r}") - return values - - def _post(self, docs: List[Any], **kwargs: Any) -> Any: - payload = dict(inputs=docs, metadata=self.request_headers, **kwargs) - return self.client.post(on="/encode", **payload) - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Call out to Jina's embedding endpoint. - Args: - texts: The list of texts to embed. - - Returns: - List of embedding, one for each text. - """ - from docarray import Document, DocumentArray - - embeddings = self._post( - docs=DocumentArray([Document(text=t) for t in texts]) - ).embeddings - return [list(map(float, e)) for e in embeddings] - - def embed_query(self, text: str) -> List[float]: - """Call out to Jina's embedding endpoint. - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - from docarray import Document, DocumentArray - - embedding = self._post(docs=DocumentArray([Document(text=text)])).embeddings[0] - return list(map(float, embedding)) diff --git a/nextpy/ai/models/embedding/llamacpp.py b/nextpy/ai/models/embedding/llamacpp.py deleted file mode 100644 index 289834f3..00000000 --- a/nextpy/ai/models/embedding/llamacpp.py +++ /dev/null @@ -1,127 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around llama.cpp embedding models.""" -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Extra, Field, root_validator - -from nextpy.ai.models.embedding.base import Embeddings - - -class LlamaCppEmbeddings(BaseModel, Embeddings): - """Wrapper around llama.cpp embedding models. - - To use, you should have the llama-cpp-python library installed, and provide the - path to the Llama model as a named parameter to the constructor. - Check out: https://github.com/abetlen/llama-cpp-python - - Example: - .. code-block:: python - - from nextpy.ai.embedding import LlamaCppEmbeddings - llama = LlamaCppEmbeddings(model_path="/path/to/model.bin") - """ - - client: Any #: :meta private: - model_path: str - - n_ctx: int = Field(512, alias="n_ctx") - """Token context window.""" - - n_parts: int = Field(-1, alias="n_parts") - """Number of parts to split the model into. - If -1, the number of parts is automatically determined.""" - - seed: int = Field(-1, alias="seed") - """Seed. If -1, a random seed is used.""" - - f16_kv: bool = Field(False, alias="f16_kv") - """Use half-precision for key/value cache.""" - - logits_all: bool = Field(False, alias="logits_all") - """Return logits for all tokens, not just the last token.""" - - vocab_only: bool = Field(False, alias="vocab_only") - """Only load the vocabulary, no weights.""" - - use_mlock: bool = Field(False, alias="use_mlock") - """Force system to keep model in RAM.""" - - n_threads: Optional[int] = Field(None, alias="n_threads") - """Number of threads to use. If None, the number - of threads is automatically determined.""" - - n_batch: Optional[int] = Field(8, alias="n_batch") - """Number of tokens to process in parallel. - Should be a number between 1 and n_ctx.""" - - n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") - """Number of layers to be loaded into gpu memory. Default None.""" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that llama-cpp-python library is installed.""" - model_path = values["model_path"] - model_param_names = [ - "n_ctx", - "n_parts", - "seed", - "f16_kv", - "logits_all", - "vocab_only", - "use_mlock", - "n_threads", - "n_batch", - ] - model_params = {k: values[k] for k in model_param_names} - # For backwards compatibility, only include if non-null. - if values["n_gpu_layers"] is not None: - model_params["n_gpu_layers"] = values["n_gpu_layers"] - - try: - from llama_cpp import Llama - - values["client"] = Llama(model_path, embedding=True, **model_params) - except ImportError: - raise ModuleNotFoundError( - "Could not import llama-cpp-python library. " - "Please install the llama-cpp-python library to " - "use this embedding model: pip install llama-cpp-python" - ) - except Exception as e: - raise ValueError( - f"Could not load Llama model from path: {model_path}. " - f"Received error {e}" - ) - - return values - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed a list of documents using the Llama model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - embeddings = [self.client.embed(text) for text in texts] - return [list(map(float, e)) for e in embeddings] - - def embed_query(self, text: str) -> List[float]: - """Embed a query using the Llama model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - embedding = self.client.embed(text) - return list(map(float, embedding)) diff --git a/nextpy/ai/models/embedding/minimax.py b/nextpy/ai/models/embedding/minimax.py deleted file mode 100644 index e9a7d7a2..00000000 --- a/nextpy/ai/models/embedding/minimax.py +++ /dev/null @@ -1,164 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -import logging -from typing import Any, Callable, Dict, List, Optional - -import requests -from pydantic import BaseModel, Extra, root_validator -from tenacity import ( - before_sleep_log, - retry, - stop_after_attempt, - wait_exponential, -) - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -logger = logging.getLogger(__name__) - - -def _create_retry_decorator() -> Callable[[Any], Any]: - """Returns a tenacity retry decorator.""" - multiplier = 1 - min_seconds = 1 - max_seconds = 4 - max_retries = 6 - - return retry( - reraise=True, - stop=stop_after_attempt(max_retries), - wait=wait_exponential(multiplier=multiplier, min=min_seconds, max=max_seconds), - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - - -def embed_with_retry(embeddings: MiniMaxEmbeddings, *args: Any, **kwargs: Any) -> Any: - """Use tenacity to retry the completion call.""" - retry_decorator = _create_retry_decorator() - - @retry_decorator - def _embed_with_retry(*args: Any, **kwargs: Any) -> Any: - return embeddings.embed(*args, **kwargs) - - return _embed_with_retry(*args, **kwargs) - - -class MiniMaxEmbeddings(BaseModel, Embeddings): - """Wrapper around MiniMax's embedding inference service. - - To use, you should have the environment variable ``MINIMAX_GROUP_ID`` and - ``MINIMAX_API_KEY`` set with your API token, or pass it as a named parameter to - the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import MiniMaxEmbeddings - embeddings = MiniMaxEmbeddings() - - query_text = "This is a test query." - query_result = embeddings.embed_query(query_text) - - document_text = "This is a test document." - document_result = embeddings.embed_documents([document_text]) - - """ - - endpoint_url: str = "https://api.minimax.chat/v1/embeddings" - """Endpoint URL to use.""" - model: str = "embo-01" - """Embeddings model name to use.""" - embed_type_db: str = "db" - """For embed_documents""" - embed_type_query: str = "query" - """For embed_query""" - - minimax_group_id: Optional[str] = None - """Group ID for MiniMax API.""" - minimax_api_key: Optional[str] = None - """API Key for MiniMax API.""" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that group id and api key exists in environment.""" - minimax_group_id = get_from_dict_or_env( - values, "minimax_group_id", "MINIMAX_GROUP_ID" - ) - minimax_api_key = get_from_dict_or_env( - values, "minimax_api_key", "MINIMAX_API_KEY" - ) - values["minimax_group_id"] = minimax_group_id - values["minimax_api_key"] = minimax_api_key - return values - - def embed( - self, - texts: List[str], - embed_type: str, - ) -> List[List[float]]: - payload = { - "model": self.model, - "type": embed_type, - "texts": texts, - } - - # HTTP headers for authorization - headers = { - "Authorization": f"Bearer {self.minimax_api_key}", - "Content-Type": "application/json", - } - - params = { - "GroupId": self.minimax_group_id, - } - - # send request - response = requests.post( - self.endpoint_url, params=params, headers=headers, json=payload - ) - parsed_response = response.json() - - # check for errors - if parsed_response["base_resp"]["status_code"] != 0: - raise ValueError( - f"MiniMax API returned an error: {parsed_response['base_resp']}" - ) - - embeddings = parsed_response["vectors"] - - return embeddings - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed documents using a MiniMax embedding endpoint. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - embeddings = embed_with_retry(self, texts=texts, embed_type=self.embed_type_db) - return embeddings - - def embed_query(self, text: str) -> List[float]: - """Embed a query using a MiniMax embedding endpoint. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - embeddings = embed_with_retry( - self, texts=[text], embed_type=self.embed_type_query - ) - return embeddings[0] diff --git a/nextpy/ai/models/embedding/modelscopehub.py b/nextpy/ai/models/embedding/modelscopehub.py deleted file mode 100644 index a676c2f8..00000000 --- a/nextpy/ai/models/embedding/modelscopehub.py +++ /dev/null @@ -1,75 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around ModelScopeHub embedding models.""" -from typing import Any, List - -from pydantic import BaseModel, Extra - -from nextpy.ai.models.embedding.base import Embeddings - - -class ModelScopeEmbeddings(BaseModel, Embeddings): - """Wrapper around modelscope_hub embedding models. - - To use, you should have the ``modelscope`` python package installed. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import ModelScopeEmbeddings - model_id = "damo/nlp_corom_sentence-embedding_english-base" - embed = ModelScopeEmbeddings(model_id=model_id) - """ - - embed: Any - model_id: str = "damo/nlp_corom_sentence-embedding_english-base" - """Model name to use.""" - - def __init__(self, **kwargs: Any): - """Initialize the modelscope.""" - super().__init__(**kwargs) - try: - from modelscope.pipelines import pipeline - from modelscope.utils.constant import Tasks - - self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id) - - except ImportError as e: - raise ImportError( - "Could not import some python packages." - "Please install it with `pip install modelscope`." - ) from e - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a modelscope embedding model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - texts = list(map(lambda x: x.replace("\n", " "), texts)) - inputs = {"source_sentence": texts} - embeddings = self.embed(input=inputs)["text_embedding"] - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a modelscope embedding model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - text = text.replace("\n", " ") - inputs = {"source_sentence": [text]} - embedding = self.embed(input=inputs)["text_embedding"][0] - return embedding.tolist() diff --git a/nextpy/ai/models/embedding/mosaicml.py b/nextpy/ai/models/embedding/mosaicml.py deleted file mode 100644 index b7882992..00000000 --- a/nextpy/ai/models/embedding/mosaicml.py +++ /dev/null @@ -1,169 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around MosaicML APIs.""" -from __future__ import annotations - -from typing import Any, Dict, List, Mapping, Optional, Tuple - -import requests -from pydantic import BaseModel, Extra, root_validator - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - - -class MosaicMLInstructorEmbeddings(BaseModel, Embeddings): - """Wrapper around MosaicML's embedding inference service. - - To use, you should have the - environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.endpoints import MosaicMLInstructorEmbeddings - endpoint_url = ( - "https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict" - ) - mosaic_llm = MosaicMLInstructorEmbeddings( - endpoint_url=endpoint_url, - mosaicml_api_token="my-api-key" - ) - """ - - endpoint_url: str = ( - "https://models.hosted-on.mosaicml.hosting/instructor-xl/v1/predict" - ) - """Endpoint URL to use.""" - embed_instruction: str = "Represent the document for retrieval: " - """Instruction used to embed documents.""" - query_instruction: str = ( - "Represent the question for retrieving supporting documents: " - ) - """Instruction used to embed the query.""" - retry_sleep: float = 1.0 - """How long to try sleeping for if a rate limit is encountered""" - - mosaicml_api_token: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - mosaicml_api_token = get_from_dict_or_env( - values, "mosaicml_api_token", "MOSAICML_API_TOKEN" - ) - values["mosaicml_api_token"] = mosaicml_api_token - return values - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying parameters.""" - return {"endpoint_url": self.endpoint_url} - - def _embed( - self, input: List[Tuple[str, str]], is_retry: bool = False - ) -> List[List[float]]: - payload = {"input_strings": input} - - # HTTP headers for authorization - headers = { - "Authorization": f"{self.mosaicml_api_token}", - "Content-Type": "application/json", - } - - # send request - try: - response = requests.post(self.endpoint_url, headers=headers, json=payload) - except requests.exceptions.RequestException as e: - raise ValueError(f"Error raised by inference endpoint: {e}") - - try: - parsed_response = response.json() - - if "error" in parsed_response: - # if we get rate limited, try sleeping for 1 second - if ( - not is_retry - and "rate limit exceeded" in parsed_response["error"].lower() - ): - import time - - time.sleep(self.retry_sleep) - - return self._embed(input, is_retry=True) - - raise ValueError( - f"Error raised by inference API: {parsed_response['error']}" - ) - - # The inference API has changed a couple of times, so we add some handling - # to be robust to multiple response formats. - if isinstance(parsed_response, dict): - if "data" in parsed_response: - output_item = parsed_response["data"] - elif "output" in parsed_response: - output_item = parsed_response["output"] - else: - raise ValueError( - f"No key data or output in response: {parsed_response}" - ) - - if isinstance(output_item, list) and isinstance(output_item[0], list): - embeddings = output_item - else: - embeddings = [output_item] - elif isinstance(parsed_response, list): - first_item = parsed_response[0] - if isinstance(first_item, list): - embeddings = parsed_response - elif isinstance(first_item, dict): - if "output" in first_item: - embeddings = [item["output"] for item in parsed_response] - else: - raise ValueError( - f"No key data or output in response: {parsed_response}" - ) - else: - raise ValueError(f"Unexpected response format: {parsed_response}") - else: - raise ValueError(f"Unexpected response type: {parsed_response}") - - except requests.exceptions.JSONDecodeError as e: - raise ValueError( - f"Error raised by inference API: {e}.\nResponse: {response.text}" - ) - - return embeddings - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Embed documents using a MosaicML deployed instructor embedding model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - instruction_pairs = [(self.embed_instruction, text) for text in texts] - embeddings = self._embed(instruction_pairs) - return embeddings - - def embed_query(self, text: str) -> List[float]: - """Embed a query using a MosaicML deployed instructor embedding model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - instruction_pair = (self.query_instruction, text) - embedding = self._embed([instruction_pair])[0] - return embedding diff --git a/nextpy/ai/models/embedding/openai.py b/nextpy/ai/models/embedding/openai.py deleted file mode 100644 index 9db23568..00000000 --- a/nextpy/ai/models/embedding/openai.py +++ /dev/null @@ -1,311 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -import logging -from typing import ( - Any, - Callable, - Dict, - List, - Literal, - Optional, - Sequence, - Set, - Tuple, - Union, -) - -import numpy as np -from pydantic import BaseModel, Extra, root_validator -from tenacity import ( - before_sleep_log, - retry, - retry_if_exception_type, - stop_after_attempt, - wait_exponential, -) - -from nextpy.ai.models.embedding.base import Embeddings -from nextpy.utils.data_ops import get_from_dict_or_env - -logger = logging.getLogger(__name__) - - -class OpenAIEmbeddings(BaseModel, Embeddings): - """Wrapper around OpenAI embedding models. - - To use, you should have the ``openai`` python package installed, and the - environment variable ``OPENAI_API_KEY`` set with your API key or pass it - as a named parameter to the constructor. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import OpenAIEmbeddings - openai = OpenAIEmbeddings(openai_api_key="my-api-key") - - In order to use the library with Microsoft Azure endpoints, you need to set - the OPENAI_API_TYPE, OPENAI_API_BASE, OPENAI_API_KEY and OPENAI_API_VERSION. - The OPENAI_API_TYPE must be set to 'azure' and the others correspond to - the properties of your endpoint. - In addition, the deployment name must be passed as the model parameter. - - Example: - .. code-block:: python - - import os - os.environ["OPENAI_API_TYPE"] = "azure" - os.environ["OPENAI_API_BASE"] = "https:// Dict: - """Validate that api key and python package exists in environment.""" - values["openai_api_key"] = get_from_dict_or_env( - values, "openai_api_key", "OPENAI_API_KEY" - ) - values["openai_api_base"] = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", - default="", - ) - values["openai_api_type"] = get_from_dict_or_env( - values, - "openai_api_type", - "OPENAI_API_TYPE", - default="", - ) - values["openai_proxy"] = get_from_dict_or_env( - values, - "openai_proxy", - "OPENAI_PROXY", - default="", - ) - if values["openai_api_type"] in ("azure", "azure_ad", "azuread"): - default_api_version = "2022-12-01" - else: - default_api_version = "" - values["openai_api_version"] = get_from_dict_or_env( - values, - "openai_api_version", - "OPENAI_API_VERSION", - default=default_api_version, - ) - values["openai_organization"] = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", - ) - try: - import openai - - values["client"] = openai.Embedding - except ImportError: - raise ImportError( - "Could not import openai python package. " - "Please install it with `pip install openai`." - ) - return values - - @property - def _invocation_params(self) -> Dict: - openai_args = { - "engine": self.deployment, - "request_timeout": self.request_timeout, - "headers": self.headers, - "api_key": self.openai_api_key, - "organization": self.openai_organization, - "api_base": self.openai_api_base, - "api_type": self.openai_api_type, - "api_version": self.openai_api_version, - } - if self.openai_proxy: - import openai - - openai.proxy = { - "http": self.openai_proxy, - "https": self.openai_proxy, - } # type: ignore[assignment] # noqa: E501 - return openai_args - - # please refer to - # https://github.com/openai/openai-cookbook/blob/main/examples/Embedding_long_inputs.ipynb - def _get_len_safe_embeddings( - self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None - ) -> List[List[float]]: - embeddings: List[List[float]] = [[] for _ in range(len(texts))] - try: - import tiktoken - except ImportError: - raise ImportError( - "Could not import tiktoken python package. " - "This is needed in order to for OpenAIEmbeddings. " - "Please install it with `pip install tiktoken`." - ) - - tokens = [] - indices = [] - encoding = tiktoken.model.encoding_for_model(self.model) - for i, text in enumerate(texts): - if self.model.endswith("001"): - # See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500 - # replace newlines, which can negatively affect performance. - text = text.replace("\n", " ") - token = encoding.encode( - text, - allowed_special=self.allowed_special, - disallowed_special=self.disallowed_special, - ) - for j in range(0, len(token), self.embedding_ctx_length): - tokens += [token[j : j + self.embedding_ctx_length]] - indices += [i] - - batched_embeddings = [] - _chunk_size = chunk_size or self.chunk_size - for i in range(0, len(tokens), _chunk_size): - response = embed_with_retry( - self, - input=tokens[i : i + _chunk_size], - **self._invocation_params, - ) - batched_embeddings += [r["embedding"] for r in response["data"]] - - results: List[List[List[float]]] = [[] for _ in range(len(texts))] - num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))] - for i in range(len(indices)): - results[indices[i]].append(batched_embeddings[i]) - num_tokens_in_batch[indices[i]].append(len(tokens[i])) - - for i in range(len(texts)): - _result = results[i] - if len(_result) == 0: - average = embed_with_retry(self, input="", **self._invocation_params,)[ - "data" - ][0]["embedding"] - else: - average = np.average(_result, axis=0, weights=num_tokens_in_batch[i]) - embeddings[i] = (average / np.linalg.norm(average)).tolist() - - return embeddings - - def _embedding_func(self, text: str, *, engine: str) -> List[float]: - """Call out to OpenAI's embedding endpoint.""" - # handle large input text - if len(text) > self.embedding_ctx_length: - return self._get_len_safe_embeddings([text], engine=engine)[0] - else: - if self.model.endswith("001"): - # See: https://github.com/openai/openai-python/issues/418#issuecomment-1525939500 - # replace newlines, which can negatively affect performance. - text = text.replace("\n", " ") - return embed_with_retry(self, input=[text], **self._invocation_params,)[ - "data" - ][0]["embedding"] - - def embed_documents( - self, texts: List[str], chunk_size: Optional[int] = 0 - ) -> List[List[float]]: - """Call out to OpenAI's embedding endpoint for embedding search docs. - - Args: - texts: The list of texts to embed. - chunk_size: The chunk size of embeddings. If None, will use the chunk size - specified by the class. - - Returns: - List of embeddings, one for each text. - """ - # NOTE: to keep things simple, we assume the list may contain texts longer - # than the maximum context and use length-safe embedding function. - return self._get_len_safe_embeddings(texts, engine=self.deployment) - - def embed_query(self, text: str) -> List[float]: - """Call out to OpenAI's embedding endpoint for embedding query text. - - Args: - text: The text to embed. - - Returns: - Embedding for the text. - """ - embedding = self._embedding_func(text, engine=self.deployment) - return embedding - - -def _create_retry_decorator(embeddings: OpenAIEmbeddings) -> Callable[[Any], Any]: - import openai - - min_seconds = 4 - max_seconds = 10 - # Wait 2^x * 1 second between each retry starting with - # 4 seconds, then up to 10 seconds, then 10 seconds afterwards - return retry( - reraise=True, - stop=stop_after_attempt(embeddings.max_retries), - wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), - retry=( - retry_if_exception_type(openai.error.Timeout) - | retry_if_exception_type(openai.error.APIError) - | retry_if_exception_type(openai.error.APIConnectionError) - | retry_if_exception_type(openai.error.RateLimitError) - | retry_if_exception_type(openai.error.ServiceUnavailableError) - ), - before_sleep=before_sleep_log(logger, logging.WARNING), - ) - - -def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any: - """Use tenacity to retry the embedding call.""" - retry_decorator = _create_retry_decorator(embeddings) - - @retry_decorator - def _embed_with_retry(**kwargs: Any) -> Any: - return embeddings.client.create(**kwargs) - - return _embed_with_retry(**kwargs) diff --git a/nextpy/ai/models/embedding/tensorflowhub.py b/nextpy/ai/models/embedding/tensorflowhub.py deleted file mode 100644 index 3ae5665f..00000000 --- a/nextpy/ai/models/embedding/tensorflowhub.py +++ /dev/null @@ -1,80 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Wrapper around TensorflowHub embedding models.""" -from typing import Any, List - -from pydantic import BaseModel, Extra - -from nextpy.ai.models.embedding.base import Embeddings - -DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" - - -class TensorflowHubEmbeddings(BaseModel, Embeddings): - """Wrapper around tensorflow_hub embedding models. - - To use, you should have the ``tensorflow_text`` python package installed. - - Example: - .. code-block:: python - - from nextpy.ai.models.embeddings import TensorflowHubEmbeddings - url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" - tf = TensorflowHubEmbeddings(model_url=url) - """ - - embed: Any #: :meta private: - model_url: str = DEFAULT_MODEL_URL - """Model name to use.""" - - def __init__(self, **kwargs: Any): - """Initialize the tensorflow_hub and tensorflow_text.""" - super().__init__(**kwargs) - try: - import tensorflow_hub - except ImportError: - raise ImportError( - "Could not import tensorflow-hub python package. " - "Please install it with `pip install tensorflow-hub``." - ) - try: - import tensorflow_text # noqa - except ImportError: - raise ImportError( - "Could not import tensorflow_text python package. " - "Please install it with `pip install tensorflow_text``." - ) - - self.embed = tensorflow_hub.load(self.model_url) - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def embed_documents(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a TensorflowHub embedding model. - - Args: - texts: The list of texts to embed. - - Returns: - List of embeddings, one for each text. - """ - texts = list(map(lambda x: x.replace("\n", " "), texts)) - embeddings = self.embed(texts).numpy() - return embeddings.tolist() - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a TensorflowHub embedding model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - text = text.replace("\n", " ") - embedding = self.embed([text]).numpy()[0] - return embedding.tolist() diff --git a/nextpy/ai/models/image/Readme.md b/nextpy/ai/models/image/Readme.md deleted file mode 100644 index c678ac69..00000000 --- a/nextpy/ai/models/image/Readme.md +++ /dev/null @@ -1,63 +0,0 @@ -# OpenAI DALL-E Image Generation - -This is a simple Python interface for generating images using OpenAI's DALL-E model. - -## Prerequisites - -Ensure you have the `openai` Python library installed. If not, you can install it using pip: - -```bash -pip install openai -``` -# Usage Dalle - -```python - -# Define your API key and any other settings -api_key = 'your-api-key-here' -image_model = 'your-image-model-here' # Optional -number_of_results = 5 # Optional, default is 1 - -# Create an instance of the OpenAiDalle class -dalle = OpenAiDalle(api_key, image_model, number_of_results) - -# Define a prompt and image size -prompt = 'A beautiful sunset over the mountains' -size = 512 # Optional, default is 512 - -# Generate an image -response = dalle.generate_image(prompt, size) - -# Print the response -print(response) -``` -# Usage - -```python - -# Define your API key and any other settings -api_key = 'your-api-key-here' -image_model = 'your-image-model-here' # Optional -number_of_results = 5 # Optional, default is 1 -client_id = 'your-client-id-here' # Optional -client_version = 'your-client-version-here' # Optional - -# Create an instance of the StableDiffusion class -image_llm = StableDiffusion(api_key, image_model, number_of_results, client_id, client_version) - -# Define a prompt and image size -prompt = 'A beautiful sunset over the mountains' -size = 512 # Optional, default is 512 - -# Define other settings -style_preset = 'enhance' # Optional, default is 'enhance' -cfg_scale = 7 # Optional, default is 7 -steps = 50 # Optional, default is 50 -seed = 0 # Optional, default is 0 - -# Generate an image -response = image_llm.generate_image(prompt, size, style_preset, cfg_scale, steps, seed) - -# Print the response -print(response) -``` \ No newline at end of file diff --git a/nextpy/ai/models/image/_base.py b/nextpy/ai/models/image/_base.py deleted file mode 100644 index 9a92ac96..00000000 --- a/nextpy/ai/models/image/_base.py +++ /dev/null @@ -1,14 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from abc import ABC, abstractmethod - - -class BaseImageModel(ABC): - @abstractmethod - def get_image_model(self): - pass - - @abstractmethod - def generate_image(self, prompt: str, size: int = 512, num: int = 2): - pass diff --git a/nextpy/ai/models/image/openai_dalle.py b/nextpy/ai/models/image/openai_dalle.py deleted file mode 100644 index 6abed644..00000000 --- a/nextpy/ai/models/image/openai_dalle.py +++ /dev/null @@ -1,44 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os - -import openai - -from ._base import BaseImageModel - - -class OpenAiDalle(BaseImageModel): - def __init__(self, api_key, image_model=None, number_of_results=1): - """Args: - api_key (str): The OpenAI API key. - image_model (str): The image model. - number_of_results (int): The number of results. - """ - self.number_of_results = number_of_results - self.api_key = api_key - self.image_model = image_model - openai.api_key = api_key - openai.api_base = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") - - def get_image_model(self): - """Returns: - str: The image model. - """ - return self.image_model - - def generate_image(self, prompt: str, size: int = 512): - """Call the OpenAI image API. - - Args: - prompt (str): The prompt. - size (int): The size. - num (int): The number of images. - - Returns: - dict: The response. - """ - response = openai.Image.create( - prompt=prompt, n=self.number_of_results, size=f"{size}x{size}" - ) - return response diff --git a/nextpy/ai/models/image/stable_diffusion.py b/nextpy/ai/models/image/stable_diffusion.py deleted file mode 100644 index c012da33..00000000 --- a/nextpy/ai/models/image/stable_diffusion.py +++ /dev/null @@ -1,101 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import base64 -import os - -import requests - -from ._base import BaseImageModel - - -class StableDiffusion(BaseImageModel): - def __init__( - self, - api_key, - image_model=None, - number_of_results=1, - client_id=None, - client_version=None, - ): - """Args: - api_key (str): The Stability API key. - image_model (str): The image model. - number_of_results (int): The number of results. - client_id (str): Client ID. - client_version (str): Client version. - """ - self.api_key = api_key - self.image_model = image_model or "stable-diffusion-xl-beta-v2-2-2" - self.number_of_results = number_of_results - self.api_host = os.getenv("API_HOST", "https://api.stability.ai") - self.url = f"{self.api_host}/v1/generation/{self.image_model}/text-to-image" - self.client_id = client_id - self.client_version = client_version - - def get_image_model(self): - """Returns: - str: The image model. - """ - return self.image_model - - def generate_image( - self, - prompt: str, - size: int = 512, - style_preset="enhance", - cfg_scale=7, - steps=50, - seed=0, - ): - """Call the Stability image API. - - Args: - prompt (str): The prompt. - size (int): The size. - style_preset (str): The style preset. - cfg_scale (int): The config scale. - steps (int): The number of diffusion steps. - seed (int): The seed for random noise. - - Returns: - dict: The response. - """ - body = { - "width": size, - "height": size, - "steps": steps, - "seed": seed, - "cfg_scale": cfg_scale, - "samples": self.number_of_results, - "style_preset": style_preset, - "text_prompts": [{"text": prompt, "weight": 1}], - } - headers = { - "Accept": "application/json", - "Content-Type": "application/json", - "Authorization": f"Bearer {self.api_key}", - } - - # Add client ID and version headers if provided - if self.client_id is not None: - headers["Stability-Client-ID"] = self.client_id - if self.client_version is not None: - headers["Stability-Client-Version"] = self.client_version - - response = requests.post( - self.url, - headers=headers, - json=body, - ) - - if response.status_code != 200: - raise Exception("Non-200 response: " + str(response.text)) - - data = response.json() - - for _i, image in enumerate(data["artifacts"]): - with open(f"./out/txt2img_{image['seed']}.png", "wb") as f: - f.write(base64.b64decode(image["base64"])) - - return data diff --git a/nextpy/ai/models/llm/__init__.py b/nextpy/ai/models/llm/__init__.py deleted file mode 100644 index 658f5989..00000000 --- a/nextpy/ai/models/llm/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from .llm_client import Azure, OpenAI diff --git a/nextpy/ai/models/llm/llm_client.py b/nextpy/ai/models/llm/llm_client.py deleted file mode 100644 index a73ab524..00000000 --- a/nextpy/ai/models/llm/llm_client.py +++ /dev/null @@ -1,59 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os -from abc import ABC, abstractmethod - -from litellm import completion - - -class LLMClient(ABC): - def __init__(self, api_key): - self.api_key = api_key - self.chat = self.Chat(self) - - class Chat(ABC): - def __init__(self, parent): - self.api_key = parent.api_key - self.completions = self.Completions(self) - - class Completions(ABC): - def __init__(self, parent): - self.api_key = parent.api_key - - @abstractmethod - def create(self, model, messages): - pass - - -class OpenAI(LLMClient): - class Chat(LLMClient.Chat): - class Completions(LLMClient.Chat.Completions): - def create(self, model, messages): - os.environ["OPENAI_API_KEY"] = self.api_key - response = completion(model=model, messages=messages) - return response - - -class Azure(LLMClient): - class Chat(LLMClient.Chat): - class Completions(LLMClient.Chat.Completions): - def create(self, model, messages): - os.environ["AZURE_API_KEY"] = self.api_key - os.environ["AZURE_API_BASE"] = "your-azure-api-base" - os.environ["AZURE_API_VERSION"] = "your-azure-api-version" - response = completion(model=model, messages=messages) - return response - - -# Usage for OpenAI -# openai_client = OpenAI(api_key="sk-") # Replace with your API key -# openai_response = openai_client.chat.completions.create( -# model="gpt-3.5-turbo", -# messages=[ -# {"role": "system", "content": "You are a helpful assistant."}, -# {"role": "user", "content": "Hello!"} -# ] -# ) -# # Print the OpenAI response -# print(openai_response["choices"][0]["message"]) diff --git "a/nextpy/ai/prompt_on_the_outside.\360\237\226\212\357\270\217" "b/nextpy/ai/prompt_on_the_outside.\360\237\226\212\357\270\217" deleted file mode 100644 index 615b0f0a..00000000 --- "a/nextpy/ai/prompt_on_the_outside.\360\237\226\212\357\270\217" +++ /dev/null @@ -1,13 +0,0 @@ -{{#system~}} -You are a helpful assistant -{{~/system}} - -{{~#geneach 'conversation' stop=False}} -{{#user~}} -{{set 'this.user_text' (await 'user_text') hidden=False}} -{{~/user}} - -{{#assistant~}} -{{gen 'this.ai_text' temperature=0 max_tokens=300}} -{{~/assistant}} -{{~/geneach}} diff --git a/nextpy/ai/rag/document_loaders/airtable/requirements.txt b/nextpy/ai/rag/document_loaders/airtable/requirements.txt deleted file mode 100644 index 83c39582..00000000 --- a/nextpy/ai/rag/document_loaders/airtable/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyairtable \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/apify/actor/requirements.txt b/nextpy/ai/rag/document_loaders/apify/actor/requirements.txt deleted file mode 100644 index 5a3a1cbf..00000000 --- a/nextpy/ai/rag/document_loaders/apify/actor/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apify-client diff --git a/nextpy/ai/rag/document_loaders/apify/dataset/requirements.txt b/nextpy/ai/rag/document_loaders/apify/dataset/requirements.txt deleted file mode 100644 index 5a3a1cbf..00000000 --- a/nextpy/ai/rag/document_loaders/apify/dataset/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -apify-client diff --git a/nextpy/ai/rag/document_loaders/asana/requirements.txt b/nextpy/ai/rag/document_loaders/asana/requirements.txt deleted file mode 100644 index d7cf09d4..00000000 --- a/nextpy/ai/rag/document_loaders/asana/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -asana diff --git a/nextpy/ai/rag/document_loaders/azcognitive_search/requirements.txt b/nextpy/ai/rag/document_loaders/azcognitive_search/requirements.txt deleted file mode 100644 index 9dbd6a12..00000000 --- a/nextpy/ai/rag/document_loaders/azcognitive_search/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -azure-search-documents -azure-identity \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/azstorage_blob/requirements.txt b/nextpy/ai/rag/document_loaders/azstorage_blob/requirements.txt deleted file mode 100644 index fa3619d2..00000000 --- a/nextpy/ai/rag/document_loaders/azstorage_blob/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -azure-storage-blob -azure-identity \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/bilibili/requirements.txt b/nextpy/ai/rag/document_loaders/bilibili/requirements.txt deleted file mode 100644 index 376ce433..00000000 --- a/nextpy/ai/rag/document_loaders/bilibili/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -bilibili_api -requests \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/boarddocs/requirements.txt b/nextpy/ai/rag/document_loaders/boarddocs/requirements.txt deleted file mode 100644 index af9477ef..00000000 --- a/nextpy/ai/rag/document_loaders/boarddocs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -bs4 -html2text -requests \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/chatgpt_plugin/requirements.txt b/nextpy/ai/rag/document_loaders/chatgpt_plugin/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/nextpy/ai/rag/document_loaders/chroma/requirements.txt b/nextpy/ai/rag/document_loaders/chroma/requirements.txt deleted file mode 100644 index 6dee1ba4..00000000 --- a/nextpy/ai/rag/document_loaders/chroma/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -chromadb \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/confluence/requirements.txt b/nextpy/ai/rag/document_loaders/confluence/requirements.txt deleted file mode 100644 index 4996a3e3..00000000 --- a/nextpy/ai/rag/document_loaders/confluence/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -atlassian-python-api -html2text -pytesseract -pdf2image -Pillow -docx2txt -xlrd -svglib -retrying \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/couchdb/requirements.txt b/nextpy/ai/rag/document_loaders/couchdb/requirements.txt deleted file mode 100644 index a9f1fb1f..00000000 --- a/nextpy/ai/rag/document_loaders/couchdb/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -couchdb3 diff --git a/nextpy/ai/rag/document_loaders/deeplake/requirements.txt b/nextpy/ai/rag/document_loaders/deeplake/requirements.txt deleted file mode 100644 index bd1ea014..00000000 --- a/nextpy/ai/rag/document_loaders/deeplake/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -deeplake \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/discord/requirements.txt b/nextpy/ai/rag/document_loaders/discord/requirements.txt deleted file mode 100644 index 503dba90..00000000 --- a/nextpy/ai/rag/document_loaders/discord/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -discord.py \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/docugami/requirements.txt b/nextpy/ai/rag/document_loaders/docugami/requirements.txt deleted file mode 100644 index dd7c9377..00000000 --- a/nextpy/ai/rag/document_loaders/docugami/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -lxml -requests -typing \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/elasticsearch/requirements.txt b/nextpy/ai/rag/document_loaders/elasticsearch/requirements.txt deleted file mode 100644 index 79228389..00000000 --- a/nextpy/ai/rag/document_loaders/elasticsearch/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -httpx \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/faiss/requirements.txt b/nextpy/ai/rag/document_loaders/faiss/requirements.txt deleted file mode 100644 index f4193d23..00000000 --- a/nextpy/ai/rag/document_loaders/faiss/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -faiss \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/feedly_rss/requirements.txt b/nextpy/ai/rag/document_loaders/feedly_rss/requirements.txt deleted file mode 100644 index 42628943..00000000 --- a/nextpy/ai/rag/document_loaders/feedly_rss/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -feedly-client \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/feishu_docs/requirements.txt b/nextpy/ai/rag/document_loaders/feishu_docs/requirements.txt deleted file mode 100644 index fc75559e..00000000 --- a/nextpy/ai/rag/document_loaders/feishu_docs/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -openams -requests \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/audio/requirements.txt b/nextpy/ai/rag/document_loaders/file/audio/requirements.txt deleted file mode 100644 index 36719d37..00000000 --- a/nextpy/ai/rag/document_loaders/file/audio/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -openai-whisper -pydub \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/audio_gladia/requirements.txt b/nextpy/ai/rag/document_loaders/file/audio_gladia/requirements.txt deleted file mode 100644 index 36719d37..00000000 --- a/nextpy/ai/rag/document_loaders/file/audio_gladia/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -openai-whisper -pydub \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/cjk_pdf/requirements.txt b/nextpy/ai/rag/document_loaders/file/cjk_pdf/requirements.txt deleted file mode 100644 index 698b6805..00000000 --- a/nextpy/ai/rag/document_loaders/file/cjk_pdf/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pdfminer.six \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/deepdoctection/requirements.txt b/nextpy/ai/rag/document_loaders/file/deepdoctection/requirements.txt deleted file mode 100644 index 4b422009..00000000 --- a/nextpy/ai/rag/document_loaders/file/deepdoctection/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -deepdoctection[pt] -torch \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/docx/requirements.txt b/nextpy/ai/rag/document_loaders/file/docx/requirements.txt deleted file mode 100644 index a5866142..00000000 --- a/nextpy/ai/rag/document_loaders/file/docx/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -docx2txt \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/epub/requirements.txt b/nextpy/ai/rag/document_loaders/file/epub/requirements.txt deleted file mode 100644 index dc7adf05..00000000 --- a/nextpy/ai/rag/document_loaders/file/epub/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -ebooklib -html2text \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/flat_pdf/requirements.txt b/nextpy/ai/rag/document_loaders/file/flat_pdf/requirements.txt deleted file mode 100644 index 4a34ddfa..00000000 --- a/nextpy/ai/rag/document_loaders/file/flat_pdf/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -PyMuPDF==1.21.1 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/image/requirements.txt b/nextpy/ai/rag/document_loaders/file/image/requirements.txt deleted file mode 100644 index 66a8a119..00000000 --- a/nextpy/ai/rag/document_loaders/file/image/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -transformers -Pillow -torch -torchvision -sentencepiece -pytesseract \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/image_blip/requirements.txt b/nextpy/ai/rag/document_loaders/file/image_blip/requirements.txt deleted file mode 100644 index 752103bf..00000000 --- a/nextpy/ai/rag/document_loaders/file/image_blip/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -transformers -sentencepiece -Pillow diff --git a/nextpy/ai/rag/document_loaders/file/image_blip2/requirements.txt b/nextpy/ai/rag/document_loaders/file/image_blip2/requirements.txt deleted file mode 100644 index 752103bf..00000000 --- a/nextpy/ai/rag/document_loaders/file/image_blip2/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -transformers -sentencepiece -Pillow diff --git a/nextpy/ai/rag/document_loaders/file/image_deplot/requirements.txt b/nextpy/ai/rag/document_loaders/file/image_deplot/requirements.txt deleted file mode 100644 index 752103bf..00000000 --- a/nextpy/ai/rag/document_loaders/file/image_deplot/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch -transformers -sentencepiece -Pillow diff --git a/nextpy/ai/rag/document_loaders/file/ipynb/requirements.txt b/nextpy/ai/rag/document_loaders/file/ipynb/requirements.txt deleted file mode 100644 index b8b380fe..00000000 --- a/nextpy/ai/rag/document_loaders/file/ipynb/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -nbconvert \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/json/requirements.txt b/nextpy/ai/rag/document_loaders/file/json/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/nextpy/ai/rag/document_loaders/file/mbox/requirements.txt b/nextpy/ai/rag/document_loaders/file/mbox/requirements.txt deleted file mode 100644 index 041f722c..00000000 --- a/nextpy/ai/rag/document_loaders/file/mbox/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -beautifulsoup4 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/pandas_csv/requirements.txt b/nextpy/ai/rag/document_loaders/file/pandas_csv/requirements.txt deleted file mode 100644 index 1411a4a0..00000000 --- a/nextpy/ai/rag/document_loaders/file/pandas_csv/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pandas \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/pandas_excel/requirements.txt b/nextpy/ai/rag/document_loaders/file/pandas_excel/requirements.txt deleted file mode 100644 index 1411a4a0..00000000 --- a/nextpy/ai/rag/document_loaders/file/pandas_excel/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pandas \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/pdf/requirements.txt b/nextpy/ai/rag/document_loaders/file/pdf/requirements.txt deleted file mode 100644 index 1a69c480..00000000 --- a/nextpy/ai/rag/document_loaders/file/pdf/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pypdf diff --git a/nextpy/ai/rag/document_loaders/file/pdf_miner/requirements.txt b/nextpy/ai/rag/document_loaders/file/pdf_miner/requirements.txt deleted file mode 100644 index 48060604..00000000 --- a/nextpy/ai/rag/document_loaders/file/pdf_miner/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pdfminer.six diff --git a/nextpy/ai/rag/document_loaders/file/pptx/requirements.txt b/nextpy/ai/rag/document_loaders/file/pptx/requirements.txt deleted file mode 100644 index f2834fb2..00000000 --- a/nextpy/ai/rag/document_loaders/file/pptx/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -transformers -Pillow -torch -torchvision -python-pptx \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/pymu_pdf/requirements.txt b/nextpy/ai/rag/document_loaders/file/pymu_pdf/requirements.txt deleted file mode 100644 index 2d431b0f..00000000 --- a/nextpy/ai/rag/document_loaders/file/pymu_pdf/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -PyMuPDF \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/rdf/requirements.txt b/nextpy/ai/rag/document_loaders/file/rdf/requirements.txt deleted file mode 100644 index fad8467e..00000000 --- a/nextpy/ai/rag/document_loaders/file/rdf/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -rdflib~=6.2.0 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/file/unstructured/requirements.txt b/nextpy/ai/rag/document_loaders/file/unstructured/requirements.txt deleted file mode 100644 index 9e290371..00000000 --- a/nextpy/ai/rag/document_loaders/file/unstructured/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -unstructured -nltk \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/firebase_realtimedb/requirements.txt b/nextpy/ai/rag/document_loaders/firebase_realtimedb/requirements.txt deleted file mode 100644 index 4720fc6f..00000000 --- a/nextpy/ai/rag/document_loaders/firebase_realtimedb/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -firebase-admin \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/firestore/requirements.txt b/nextpy/ai/rag/document_loaders/firestore/requirements.txt deleted file mode 100644 index aacb83a9..00000000 --- a/nextpy/ai/rag/document_loaders/firestore/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -google-cloud-firestore diff --git a/nextpy/ai/rag/document_loaders/github_repo/requirements.txt b/nextpy/ai/rag/document_loaders/github_repo/requirements.txt deleted file mode 100644 index 79228389..00000000 --- a/nextpy/ai/rag/document_loaders/github_repo/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -httpx \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/github_repo_issues/requirements.txt b/nextpy/ai/rag/document_loaders/github_repo_issues/requirements.txt deleted file mode 100644 index 79228389..00000000 --- a/nextpy/ai/rag/document_loaders/github_repo_issues/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -httpx \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/gmail/requirements.txt b/nextpy/ai/rag/document_loaders/gmail/requirements.txt deleted file mode 100644 index fcf4511e..00000000 --- a/nextpy/ai/rag/document_loaders/gmail/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -google-api-python-client -google-auth-httplib2 -google-auth-oauthlib -beautifulsoup4 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/google_calendar/requirements.txt b/nextpy/ai/rag/document_loaders/google_calendar/requirements.txt deleted file mode 100644 index ee8b5257..00000000 --- a/nextpy/ai/rag/document_loaders/google_calendar/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-api-python-client -google-auth-httplib2 -google-auth-oauthlib \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/google_docs/requirements.txt b/nextpy/ai/rag/document_loaders/google_docs/requirements.txt deleted file mode 100644 index ee8b5257..00000000 --- a/nextpy/ai/rag/document_loaders/google_docs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-api-python-client -google-auth-httplib2 -google-auth-oauthlib \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/google_drive/requirements.txt b/nextpy/ai/rag/document_loaders/google_drive/requirements.txt deleted file mode 100644 index ba868485..00000000 --- a/nextpy/ai/rag/document_loaders/google_drive/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -google-api-python-client -google-auth-httplib2 -google-auth-oauthlib -PyDrive \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/google_keep/requirements.txt b/nextpy/ai/rag/document_loaders/google_keep/requirements.txt deleted file mode 100644 index f5436632..00000000 --- a/nextpy/ai/rag/document_loaders/google_keep/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -gkeepapi diff --git a/nextpy/ai/rag/document_loaders/google_sheets/requirements.txt b/nextpy/ai/rag/document_loaders/google_sheets/requirements.txt deleted file mode 100644 index ee8b5257..00000000 --- a/nextpy/ai/rag/document_loaders/google_sheets/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -google-api-python-client -google-auth-httplib2 -google-auth-oauthlib \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/graphdb_cypher/requirements.txt b/nextpy/ai/rag/document_loaders/graphdb_cypher/requirements.txt deleted file mode 100644 index 68fec45c..00000000 --- a/nextpy/ai/rag/document_loaders/graphdb_cypher/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -neo4j diff --git a/nextpy/ai/rag/document_loaders/graphql/requirements.txt b/nextpy/ai/rag/document_loaders/graphql/requirements.txt deleted file mode 100644 index 21fdd175..00000000 --- a/nextpy/ai/rag/document_loaders/graphql/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -gql -requests_toolbelt \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/hatena_blog/requirements.txt b/nextpy/ai/rag/document_loaders/hatena_blog/requirements.txt deleted file mode 100644 index da1564b3..00000000 --- a/nextpy/ai/rag/document_loaders/hatena_blog/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -requests -beautifulsoup4 -lxml \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/hubspot/requirements.txt b/nextpy/ai/rag/document_loaders/hubspot/requirements.txt deleted file mode 100644 index ef8e3ebc..00000000 --- a/nextpy/ai/rag/document_loaders/hubspot/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -hubspot-api-client \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/huggingface/fs/requirements.txt b/nextpy/ai/rag/document_loaders/huggingface/fs/requirements.txt deleted file mode 100644 index 29e43968..00000000 --- a/nextpy/ai/rag/document_loaders/huggingface/fs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -huggingface-hub \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/intercom/requirements.txt b/nextpy/ai/rag/document_loaders/intercom/requirements.txt deleted file mode 100644 index 2f1f891a..00000000 --- a/nextpy/ai/rag/document_loaders/intercom/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4==4.11.1 -requests==2.28.1 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/jira/requirements.txt b/nextpy/ai/rag/document_loaders/jira/requirements.txt deleted file mode 100644 index 9cf40eaa..00000000 --- a/nextpy/ai/rag/document_loaders/jira/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -jira diff --git a/nextpy/ai/rag/document_loaders/jsondata/requirements.txt b/nextpy/ai/rag/document_loaders/jsondata/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/nextpy/ai/rag/document_loaders/kaltura/esearch/requirements.txt b/nextpy/ai/rag/document_loaders/kaltura/esearch/requirements.txt deleted file mode 100644 index 40818090..00000000 --- a/nextpy/ai/rag/document_loaders/kaltura/esearch/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -KalturaApiClient~=19.3.0 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/kibela/requirements.txt b/nextpy/ai/rag/document_loaders/kibela/requirements.txt deleted file mode 100644 index 11388188..00000000 --- a/nextpy/ai/rag/document_loaders/kibela/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -gql diff --git a/nextpy/ai/rag/document_loaders/mangoapps_guides/requirements.txt b/nextpy/ai/rag/document_loaders/mangoapps_guides/requirements.txt deleted file mode 100644 index 6ddd8a01..00000000 --- a/nextpy/ai/rag/document_loaders/mangoapps_guides/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4>=4.11.1 -requests>=2.28.1 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/maps/requirements.txt b/nextpy/ai/rag/document_loaders/maps/requirements.txt deleted file mode 100644 index 721f6444..00000000 --- a/nextpy/ai/rag/document_loaders/maps/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -osmxtract -geopy \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/metal/requirements.txt b/nextpy/ai/rag/document_loaders/metal/requirements.txt deleted file mode 100644 index 66b852b0..00000000 --- a/nextpy/ai/rag/document_loaders/metal/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -metal_sdk diff --git a/nextpy/ai/rag/document_loaders/milvus/requirements.txt b/nextpy/ai/rag/document_loaders/milvus/requirements.txt deleted file mode 100644 index de2c40e3..00000000 --- a/nextpy/ai/rag/document_loaders/milvus/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymilvus \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/mondaydotcom/requirements.txt b/nextpy/ai/rag/document_loaders/mondaydotcom/requirements.txt deleted file mode 100644 index 663bd1f6..00000000 --- a/nextpy/ai/rag/document_loaders/mondaydotcom/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/mongo/requirements.txt b/nextpy/ai/rag/document_loaders/mongo/requirements.txt deleted file mode 100644 index 8c7d698b..00000000 --- a/nextpy/ai/rag/document_loaders/mongo/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymongo \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/opendal_reader/azblob/requirements.txt b/nextpy/ai/rag/document_loaders/opendal_reader/azblob/requirements.txt deleted file mode 100644 index e55fe80f..00000000 --- a/nextpy/ai/rag/document_loaders/opendal_reader/azblob/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -opendal==0.30.3 diff --git a/nextpy/ai/rag/document_loaders/opendal_reader/gcs/requirements.txt b/nextpy/ai/rag/document_loaders/opendal_reader/gcs/requirements.txt deleted file mode 100644 index e55fe80f..00000000 --- a/nextpy/ai/rag/document_loaders/opendal_reader/gcs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -opendal==0.30.3 diff --git a/nextpy/ai/rag/document_loaders/opendal_reader/requirements.txt b/nextpy/ai/rag/document_loaders/opendal_reader/requirements.txt deleted file mode 100644 index e55fe80f..00000000 --- a/nextpy/ai/rag/document_loaders/opendal_reader/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -opendal==0.30.3 diff --git a/nextpy/ai/rag/document_loaders/opendal_reader/s3/requirements.txt b/nextpy/ai/rag/document_loaders/opendal_reader/s3/requirements.txt deleted file mode 100644 index e55fe80f..00000000 --- a/nextpy/ai/rag/document_loaders/opendal_reader/s3/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -opendal==0.30.3 diff --git a/nextpy/ai/rag/document_loaders/outlook_localcalendar/requirements.txt b/nextpy/ai/rag/document_loaders/outlook_localcalendar/requirements.txt deleted file mode 100644 index 10c9d322..00000000 --- a/nextpy/ai/rag/document_loaders/outlook_localcalendar/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pywin32 diff --git a/nextpy/ai/rag/document_loaders/pandas_ai/requirements.txt b/nextpy/ai/rag/document_loaders/pandas_ai/requirements.txt deleted file mode 100644 index b758d929..00000000 --- a/nextpy/ai/rag/document_loaders/pandas_ai/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pandasai \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/papers/arxiv/requirements.txt b/nextpy/ai/rag/document_loaders/papers/arxiv/requirements.txt deleted file mode 100644 index 164782d5..00000000 --- a/nextpy/ai/rag/document_loaders/papers/arxiv/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -arxiv \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/pinecone/requirements.txt b/nextpy/ai/rag/document_loaders/pinecone/requirements.txt deleted file mode 100644 index 8bf0a1e2..00000000 --- a/nextpy/ai/rag/document_loaders/pinecone/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pinecone-client \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/qdrant/requirements.txt b/nextpy/ai/rag/document_loaders/qdrant/requirements.txt deleted file mode 100644 index 2f03c119..00000000 --- a/nextpy/ai/rag/document_loaders/qdrant/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -qdrant_client \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/reddit/requirements.txt b/nextpy/ai/rag/document_loaders/reddit/requirements.txt deleted file mode 100644 index c1400b24..00000000 --- a/nextpy/ai/rag/document_loaders/reddit/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -praw~=7.6 -prawcore~=2.3 -requests~=2.28 -update-checker~=0.18 -websocket-client~=1.5 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/remote_depth/requirements.txt b/nextpy/ai/rag/document_loaders/remote_depth/requirements.txt deleted file mode 100644 index ecfeee74..00000000 --- a/nextpy/ai/rag/document_loaders/remote_depth/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -tqdm~=4.64 -beautifulsoup4~=4.11 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/s3/requirements.txt b/nextpy/ai/rag/document_loaders/s3/requirements.txt deleted file mode 100644 index 1db657b6..00000000 --- a/nextpy/ai/rag/document_loaders/s3/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -boto3 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/singlestore/requirements.txt b/nextpy/ai/rag/document_loaders/singlestore/requirements.txt deleted file mode 100644 index 9e7dd9db..00000000 --- a/nextpy/ai/rag/document_loaders/singlestore/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pymysql \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/slack/requirements.txt b/nextpy/ai/rag/document_loaders/slack/requirements.txt deleted file mode 100644 index bb964f6e..00000000 --- a/nextpy/ai/rag/document_loaders/slack/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -slack_sdk \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/snscrape_twitter/requirements.txt b/nextpy/ai/rag/document_loaders/snscrape_twitter/requirements.txt deleted file mode 100644 index 2b358070..00000000 --- a/nextpy/ai/rag/document_loaders/snscrape_twitter/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -git+https://github.com/JustAnotherArchivist/snscrape.git \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/spotify/requirements.txt b/nextpy/ai/rag/document_loaders/spotify/requirements.txt deleted file mode 100644 index e54be75e..00000000 --- a/nextpy/ai/rag/document_loaders/spotify/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -spotipy \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/stackoverflow/requirements.txt b/nextpy/ai/rag/document_loaders/stackoverflow/requirements.txt deleted file mode 100644 index e26aef2e..00000000 --- a/nextpy/ai/rag/document_loaders/stackoverflow/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -requests -openams diff --git a/nextpy/ai/rag/document_loaders/steamship/requirements.txt b/nextpy/ai/rag/document_loaders/steamship/requirements.txt deleted file mode 100644 index 8c194cfc..00000000 --- a/nextpy/ai/rag/document_loaders/steamship/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -steamship \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/trello/requirements.txt b/nextpy/ai/rag/document_loaders/trello/requirements.txt deleted file mode 100644 index 55e4c9a2..00000000 --- a/nextpy/ai/rag/document_loaders/trello/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -py-trello diff --git a/nextpy/ai/rag/document_loaders/twitter/requirements.txt b/nextpy/ai/rag/document_loaders/twitter/requirements.txt deleted file mode 100644 index 69ae13e6..00000000 --- a/nextpy/ai/rag/document_loaders/twitter/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tweepy \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/weather/requirements.txt b/nextpy/ai/rag/document_loaders/weather/requirements.txt deleted file mode 100644 index 2486d926..00000000 --- a/nextpy/ai/rag/document_loaders/weather/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyowm \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/weaviate/requirements.txt b/nextpy/ai/rag/document_loaders/weaviate/requirements.txt deleted file mode 100644 index cc9bbba0..00000000 --- a/nextpy/ai/rag/document_loaders/weaviate/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -weaviate-client \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/async_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/async_web/requirements.txt deleted file mode 100644 index 2687b17e..00000000 --- a/nextpy/ai/rag/document_loaders/web/async_web/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -html2text -aiohttp diff --git a/nextpy/ai/rag/document_loaders/web/beautiful_soup_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/beautiful_soup_web/requirements.txt deleted file mode 100644 index 013a6eb4..00000000 --- a/nextpy/ai/rag/document_loaders/web/beautiful_soup_web/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -beautifulsoup4 -requests -urllib3 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/knowledge_base/requirements.txt b/nextpy/ai/rag/document_loaders/web/knowledge_base/requirements.txt deleted file mode 100644 index df3e475a..00000000 --- a/nextpy/ai/rag/document_loaders/web/knowledge_base/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -playwright~=1.30 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/readability_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/readability_web/requirements.txt deleted file mode 100644 index 25182483..00000000 --- a/nextpy/ai/rag/document_loaders/web/readability_web/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -playwright==1.30.0 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/simple_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/simple_web/requirements.txt deleted file mode 100644 index 44b9834f..00000000 --- a/nextpy/ai/rag/document_loaders/web/simple_web/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -html2text \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/sitemap/requirements.txt b/nextpy/ai/rag/document_loaders/web/sitemap/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/nextpy/ai/rag/document_loaders/web/trafilatura_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/trafilatura_web/requirements.txt deleted file mode 100644 index 51695268..00000000 --- a/nextpy/ai/rag/document_loaders/web/trafilatura_web/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -trafilatura~=1.4 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/web/unstructured_web/requirements.txt b/nextpy/ai/rag/document_loaders/web/unstructured_web/requirements.txt deleted file mode 100644 index 7cd8616d..00000000 --- a/nextpy/ai/rag/document_loaders/web/unstructured_web/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -unstructured \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/whatsapp/requirements.txt b/nextpy/ai/rag/document_loaders/whatsapp/requirements.txt deleted file mode 100644 index ecb7a9e7..00000000 --- a/nextpy/ai/rag/document_loaders/whatsapp/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pandas -chat-miner \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/wikipedia/requirements.txt b/nextpy/ai/rag/document_loaders/wikipedia/requirements.txt deleted file mode 100644 index ecd92cae..00000000 --- a/nextpy/ai/rag/document_loaders/wikipedia/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -wikipedia~=1.4 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/wordlift/requirements.txt b/nextpy/ai/rag/document_loaders/wordlift/requirements.txt deleted file mode 100644 index 5904f7cf..00000000 --- a/nextpy/ai/rag/document_loaders/wordlift/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -langchain -graphql-core \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/wordpress/requirements.txt b/nextpy/ai/rag/document_loaders/wordpress/requirements.txt deleted file mode 100644 index 2f1f891a..00000000 --- a/nextpy/ai/rag/document_loaders/wordpress/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4==4.11.1 -requests==2.28.1 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/youtube_transcript/requirements.txt b/nextpy/ai/rag/document_loaders/youtube_transcript/requirements.txt deleted file mode 100644 index d7a3749d..00000000 --- a/nextpy/ai/rag/document_loaders/youtube_transcript/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -youtube_transcript_api~=0.5.0 \ No newline at end of file diff --git a/nextpy/ai/rag/document_loaders/zendesk/requirements.txt b/nextpy/ai/rag/document_loaders/zendesk/requirements.txt deleted file mode 100644 index 1f3e778b..00000000 --- a/nextpy/ai/rag/document_loaders/zendesk/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4 -requests diff --git a/nextpy/ai/rag/document_loaders/zulip/requirements.txt b/nextpy/ai/rag/document_loaders/zulip/requirements.txt deleted file mode 100644 index e17e9a44..00000000 --- a/nextpy/ai/rag/document_loaders/zulip/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -zulip diff --git a/nextpy/ai/scripts/anonymize.py b/nextpy/ai/scripts/anonymize.py deleted file mode 100644 index 21fbbfcb..00000000 --- a/nextpy/ai/scripts/anonymize.py +++ /dev/null @@ -1,95 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import re - -import spacy - - -class PIIScrubber: - """Class for scrubbing personally identifiable information (PII) from text.""" - - PATTERNS = { - "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b", - "ssn": r"\d{3}-?\d{2}-?\d{4}", - "credit_card": r"\d{4}-?\d{4}-?\d{4}-?\d{4}", - "phone_number": r"\b(\+\d{1,2}\s)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b", - "ip": r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", - "date_of_birth": r"\b(0[1-9]|1[0-2])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)\d\d\b", - "vehicle_identification_number": r"\b([A-HJ-NPR-Z0-9]{3})([A-HJ-NPR-Z0-9]{5})(\d{2})([A-HJ-NPR-Z0-9]{8})\b", - } - - def __init__(self, verbose=False): - """Initialize the PIIScrubber with given verbosity. - - Args: - verbose (bool): If True, print out internal states. Default is False. - """ - self.verbose = verbose - self.nlp = spacy.load("en_core_web_sm") - - def scrub(self, text): - """Detect and log PII in the given text based on regex patterns and SpaCy NER. - - Args: - text (str): The input text to scrub. - - Returns: - dict: A dictionary with the start and end indices of the detected PII as keys and the corresponding replacements as values. - """ - replacements = {} - for label, pattern in self.PATTERNS.items(): - for match in re.finditer(pattern, text): - replacements[match.span()] = f"[REDACTED {label.upper()}]" - if self.verbose: - print(f"Potential {label} detected: {match.group()}") - - doc = self.nlp(text) - for ent in doc.ents: - if ent.label_ in ["PERSON", "ORG"]: - replacements[ent.start_char, ent.end_char] = "[REDACTED NAME]" - if self.verbose: - print(f"Potential {ent.label_} entity detected: {ent.text}") - - return replacements - - @staticmethod - def anonymize_text(text, replacements): - """Anonymize PII in the given text based on the provided replacements. - - Args: - text (str): The input text to anonymize. - replacements (dict): A dictionary with the start and end indices of the PII to replace as keys and the corresponding replacements as values. - - Returns: - str: The anonymized text. - """ - replacements = sorted(replacements.items(), key=lambda x: x[0][0], reverse=True) - for (start, end), replacement in replacements: - text = text[:start] + replacement + text[end:] - - return text - - def run(self, text): - """Detect, log, and anonymize PII in the given text. - - Args: - text (str): The input text to scrub. - - Returns: - str: The anonymized text. - """ - if not isinstance(text, str): - raise TypeError("Text must be a string") - - try: - replacements = self.scrub(text) - anonymized_text = self.anonymize_text(text, replacements) - if text != anonymized_text and self.verbose: - print(f"Original: {text}\nAnonymized: {anonymized_text}") - - except Exception as e: - print(f"Error processing text: {e}") - raise e - - return anonymized_text diff --git a/nextpy/ai/scripts/awslambda.py b/nextpy/ai/scripts/awslambda.py deleted file mode 100644 index 3015bbcf..00000000 --- a/nextpy/ai/scripts/awslambda.py +++ /dev/null @@ -1,71 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Directly taken from Langchain Github Repo - - -"""Util that calls Lambda.""" -import json -from typing import Any, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - - -class LambdaWrapper(BaseModel): - """Wrapper for AWS Lambda SDK. - - Docs for using: - - 1. pip install boto3 - 2. Create a lambda function using the AWS Console or CLI - 3. Run `aws configure` and enter your AWS credentials - - """ - - lambda_client: Any #: :meta private: - function_name: Optional[str] = None - awslambda_tool_name: Optional[str] = None - awslambda_tool_description: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that python package exists in environment.""" - try: - import boto3 - - except ImportError: - raise ImportError( - "boto3 is not installed. Please install it with `pip install boto3`" - ) - - values["lambda_client"] = boto3.client("lambda") - values["function_name"] = values["function_name"] - - return values - - def run(self, query: str) -> str: - """Invoke Lambda function and parse result.""" - res = self.lambda_client.invoke( - FunctionName=self.function_name, - InvocationType="RequestResponse", - Payload=json.dumps({"body": query}), - ) - - try: - payload_stream = res["Payload"] - payload_string = payload_stream.read().decode("utf-8") - answer = json.loads(payload_string)["body"] - - except StopIteration: - return "Failed to parse response from Lambda" - - if answer is None or answer == "": - # We don't want to return the assumption alone if answer is empty - return "Request failed." - else: - return f"Result: {answer}" diff --git a/nextpy/ai/scripts/bash.py b/nextpy/ai/scripts/bash.py deleted file mode 100644 index f4f99d50..00000000 --- a/nextpy/ai/scripts/bash.py +++ /dev/null @@ -1,123 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Directly taken from Langchain Github Repo - -"""Wrapper around subprocess to run commands.""" -from __future__ import annotations - -import platform -import re -import subprocess -from typing import TYPE_CHECKING, List, Union -from uuid import uuid4 - -if TYPE_CHECKING: - import pexpect - - -def _lazy_import_pexpect() -> pexpect: - """Import pexpect only when needed.""" - if platform.system() == "Windows": - raise ValueError("Persistent bash processes are not yet supported on Windows.") - try: - import pexpect - - except ImportError: - raise ImportError( - "pexpect required for persistent bash processes." - " To install, run `pip install pexpect`." - ) - return pexpect - - -class BashProcess: - """Executes bash commands and returns the output.""" - - def __init__( - self, - strip_newlines: bool = False, - return_err_output: bool = False, - persistent: bool = False, - ): - """Initialize with stripping newlines.""" - self.strip_newlines = strip_newlines - self.return_err_output = return_err_output - self.prompt = "" - self.process = None - if persistent: - self.prompt = str(uuid4()) - self.process = self._initialize_persistent_process(self.prompt) - - @staticmethod - def _initialize_persistent_process(prompt: str) -> pexpect.spawn: - # Start bash in a clean environment - # Doesn't work on windows - pexpect = _lazy_import_pexpect() - process = pexpect.spawn( - "env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8" - ) - # Set the custom prompt - process.sendline("PS1=" + prompt) - - process.expect_exact(prompt, timeout=10) - return process - - def run(self, commands: Union[str, List[str]]) -> str: - """Run commands and return final output.""" - if isinstance(commands, str): - commands = [commands] - commands = ";".join(commands) - if self.process is not None: - return self.run_persistent( - commands, - ) - else: - return self.run(commands) - - def run(self, command: str) -> str: - """Run commands and return final output.""" - try: - output = subprocess.run( - command, - shell=True, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ).stdout.decode() - except subprocess.CalledProcessError as error: - if self.return_err_output: - return error.stdout.decode() - return str(error) - if self.strip_newlines: - output = output.strip() - return output - - def process_output(self, output: str, command: str) -> str: - # Remove the command from the output using a regular expression - pattern = re.escape(command) + r"\s*\n" - output = re.sub(pattern, "", output, count=1) - return output.strip() - - def run_persistent(self, command: str) -> str: - """Run commands and return final output.""" - pexpect = _lazy_import_pexpect() - if self.process is None: - raise ValueError("Process not initialized") - self.process.sendline(command) - - # Clear the output with an empty string - self.process.expect(self.prompt, timeout=10) - self.process.sendline("") - - try: - self.process.expect([self.prompt, pexpect.EOF], timeout=10) - except pexpect.TIMEOUT: - return f"Timeout error while executing command {command}" - if self.process.after == pexpect.EOF: - return f"Exited with error status: {self.process.exitstatus}" - output = self.process.before - output = self.process_output(output, command) - if self.strip_newlines: - return output.strip() - return output diff --git a/nextpy/ai/scripts/bibtex.py b/nextpy/ai/scripts/bibtex.py deleted file mode 100644 index 27516b56..00000000 --- a/nextpy/ai/scripts/bibtex.py +++ /dev/null @@ -1,92 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Directly taken from Langchain Github Repo - -"""Util that calls bibtexparser.""" -import logging -from typing import Any, Dict, List, Mapping - -from pydantic import BaseModel, Extra, root_validator - -logger = logging.getLogger(__name__) - -OPTIONAL_FIELDS = [ - "annotate", - "booktitle", - "editor", - "howpublished", - "journal", - "keywords", - "note", - "organization", - "publisher", - "school", - "series", - "type", - "doi", - "issn", - "isbn", -] - - -class BibtexparserWrapper(BaseModel): - """Wrapper around bibtexparser. - - To use, you should have the ``bibtexparser`` python package installed. - https://bibtexparser.readthedocs.io/en/master/ - - This wrapper will use bibtexparser to load a collection of references from - a bibtex file and fetch document summaries. - """ - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that the python package exists in environment.""" - try: - import bibtexparser # noqa - except ImportError: - raise ImportError( - "Could not import bibtexparser python package. " - "Please install it with `pip install bibtexparser`." - ) - - return values - - def load_bibtex_entries(self, path: str) -> List[Dict[str, Any]]: - """Load bibtex entries from the bibtex file at the given path.""" - import bibtexparser - - with open(path) as file: - entries = bibtexparser.load(file).entries - return entries - - def get_metadata( - self, entry: Mapping[str, Any], load_extra: bool = False - ) -> Dict[str, Any]: - """Get metadata for the given entry.""" - publication = entry.get("journal") or entry.get("booktitle") - if "url" in entry: - url = entry["url"] - elif "doi" in entry: - url = f'https://doi.org/{entry["doi"]}' - else: - url = None - meta = { - "id": entry.get("ID"), - "published_year": entry.get("year"), - "title": entry.get("title"), - "publication": publication, - "authors": entry.get("author"), - "abstract": entry.get("abstract"), - "url": url, - } - if load_extra: - for field in OPTIONAL_FIELDS: - meta[field] = entry.get(field) - return {k: v for k, v in meta.items() if v is not None} diff --git a/nextpy/ai/scripts/bingsearch.py b/nextpy/ai/scripts/bingsearch.py deleted file mode 100644 index fb590772..00000000 --- a/nextpy/ai/scripts/bingsearch.py +++ /dev/null @@ -1,105 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Directly taken from Langchain Github Repo - -"""Util that calls Bing Search. - -In order to set this up, follow instructions at: -https://levelup.gitconnected.com/api-tutorial-how-to-use-bing-web-search-api-in-python-4165d5592a7e -""" -from typing import Dict, List - -import requests -from pydantic import BaseModel, Extra, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class BingSearchAPIWrapper(BaseModel): - """Wrapper for Bing Search API. - - In order to set this up, follow instructions at: - https://levelup.gitconnected.com/api-tutorial-how-to-use-bing-web-search-api-in-python-4165d5592a7e - """ - - bing_subscription_key: str - bing_search_url: str - k: int = 10 - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def _bing_search_results(self, search_term: str, count: int) -> List[dict]: - headers = {"Ocp-Apim-Subscription-Key": self.bing_subscription_key} - params = { - "q": search_term, - "count": count, - "textDecorations": True, - "textFormat": "HTML", - } - response = requests.get( - self.bing_search_url, headers=headers, params=params # type: ignore - ) - response.raise_for_status() - search_results = response.json() - return search_results["webPages"]["value"] - - @root_validator(pre=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and endpoint exists in environment.""" - bing_subscription_key = get_from_dict_or_env( - values, "bing_subscription_key", "BING_SUBSCRIPTION_KEY" - ) - values["bing_subscription_key"] = bing_subscription_key - - bing_search_url = get_from_dict_or_env( - values, - "bing_search_url", - "BING_SEARCH_URL", - # default="https://api.bing.microsoft.com/v7.0/search", - ) - - values["bing_search_url"] = bing_search_url - - return values - - def run(self, query: str) -> str: - """Run query through BingSearch and parse result.""" - snippets = [] - results = self._bing_search_results(query, count=self.k) - if len(results) == 0: - return "No good Bing Search Result was found" - for result in results: - snippets.append(result["snippet"]) - - return " ".join(snippets) - - def results(self, query: str, num_results: int) -> List[Dict]: - """Run query through BingSearch and return metadata. - - Args: - query: The query to search for. - num_results: The number of results to return. - - Returns: - A list of dictionaries with the following keys: - snippet - The description of the result. - title - The title of the result. - link - The link to the result. - """ - metadata_results = [] - results = self._bing_search_results(query, count=num_results) - if len(results) == 0: - return [{"Result": "No good Bing Search Result was found"}] - for result in results: - metadata_result = { - "snippet": result["snippet"], - "title": result["name"], - "link": result["url"], - } - metadata_results.append(metadata_result) - - return metadata_results diff --git a/nextpy/ai/scripts/bravesearch.py b/nextpy/ai/scripts/bravesearch.py deleted file mode 100644 index 9da4a8c3..00000000 --- a/nextpy/ai/scripts/bravesearch.py +++ /dev/null @@ -1,45 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code directly taken from LangChain Github Repo - -import json - -import requests -from pydantic import BaseModel, Field - - -class BraveSearchWrapper(BaseModel): - api_key: str - search_kwargs: dict = Field(default_factory=dict) - - def run(self, query: str) -> str: - headers = { - "X-Subscription-Token": self.api_key, - "Accept": "application/json", - } - base_url = "https://api.search.brave.com/res/v1/web/search" - req = requests.PreparedRequest() - params = {**self.search_kwargs, **{"q": query}} - req.prepare_url(base_url, params) - if req.url is None: - raise ValueError("prepared url is None, this should not happen") - - response = requests.get(req.url, headers=headers) - - if not response.ok: - raise Exception(f"HTTP error {response.status_code}") - - parsed_response = response.json() - web_search_results = parsed_response.get("web", {}).get("results", []) - final_results = [] - if isinstance(web_search_results, list): - for item in web_search_results: - final_results.append( - { - "title": item.get("title"), - "link": item.get("url"), - "snippet": item.get("description"), - } - ) - return json.dumps(final_results) diff --git a/nextpy/ai/scripts/ducksearch.py b/nextpy/ai/scripts/ducksearch.py deleted file mode 100644 index 9ad20326..00000000 --- a/nextpy/ai/scripts/ducksearch.py +++ /dev/null @@ -1,107 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code directly taken from LangChain Github Repo - -"""Util that calls DuckDuckGo Search. - -No setup required. Free. -https://pypi.org/project/duckduckgo-search/ -""" -from typing import Dict, List, Optional - -from pydantic import BaseModel, Extra -from pydantic.class_validators import root_validator - - -class DuckDuckGoSearchAPIWrapper(BaseModel): - """Wrapper for DuckDuckGo Search API. - - Free and does not require any setup - """ - - k: int = 10 - region: Optional[str] = "wt-wt" - safesearch: str = "moderate" - time: Optional[str] = "y" - max_results: int = 5 - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that python package exists in environment.""" - try: - from duckduckgo_search import DDGS # noqa: F401 - except ImportError: - raise ValueError( - "Could not import duckduckgo-search python package. " - "Please install it with `pip install duckduckgo-search`." - ) - return values - - def get_snippets(self, query: str) -> List[str]: - """Run query through DuckDuckGo and return concatenated results.""" - from duckduckgo_search import DDGS - - with DDGS() as ddgs: - results = ddgs.text( - query, - region=self.region, - safesearch=self.safesearch, - timelimit=self.time, - ) - if results is None or next(results, None) is None: - return ["No good DuckDuckGo Search Result was found"] - snippets = [] - for i, res in enumerate(results, 1): - snippets.append(res["body"]) - if i == self.max_results: - break - return snippets - - def run(self, query: str) -> str: - snippets = self.get_snippets(query) - return " ".join(snippets) - - def results(self, query: str, num_results: int) -> List[Dict[str, str]]: - """Run query through DuckDuckGo and return metadata. - - Args: - query: The query to search for. - num_results: The number of results to return. - - Returns: - A list of dictionaries with the following keys: - snippet - The description of the result. - title - The title of the result. - link - The link to the result. - """ - from duckduckgo_search import DDGS - - with DDGS() as ddgs: - results = ddgs.text( - query, - region=self.region, - safesearch=self.safesearch, - timelimit=self.time, - ) - if results is None or next(results, None) is None: - return [{"Result": "No good DuckDuckGo Search Result was found"}] - - def to_metadata(result: Dict) -> Dict[str, str]: - return { - "snippet": result["body"], - "title": result["title"], - "link": result["href"], - } - - formatted_results = [] - for i, res in enumerate(results, 1): - formatted_results.append(to_metadata(res)) - if i == num_results: - break - return formatted_results diff --git a/nextpy/ai/scripts/googleplaces.py b/nextpy/ai/scripts/googleplaces.py deleted file mode 100644 index bc756242..00000000 --- a/nextpy/ai/scripts/googleplaces.py +++ /dev/null @@ -1,116 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken directly from LangChain Github Repo - -"""Chain that calls Google Places API.""" - -import logging -from typing import Any, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class GooglePlacesAPIWrapper(BaseModel): - """Wrapper around Google Places API. - - To use, you should have the ``googlemaps`` python package installed, - **an API key for the google maps platform**, - and the enviroment variable ''GPLACES_API_KEY'' - set with your API key , or pass 'gplaces_api_key' - as a named parameter to the constructor. - - By default, this will return the all the results on the input query. - You can use the top_k_results argument to limit the number of results. - - Example: - .. code-block:: python - - - from langchain import GooglePlacesAPIWrapper - gplaceapi = GooglePlacesAPIWrapper() - """ - - gplaces_api_key: Optional[str] = None - google_map_client: Any #: :meta private: - top_k_results: Optional[int] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key is in your environment variable.""" - gplaces_api_key = get_from_dict_or_env( - values, "gplaces_api_key", "GPLACES_API_KEY" - ) - values["gplaces_api_key"] = gplaces_api_key - try: - import googlemaps - - values["google_map_client"] = googlemaps.Client(gplaces_api_key) - except ImportError: - raise ImportError( - "Could not import googlemaps python package. " - "Please install it with `pip install googlemaps`." - ) - return values - - def run(self, query: str) -> str: - """Run Places search and get k number of places that exists that match.""" - search_results = self.google_map_client.places(query)["results"] - num_to_return = len(search_results) - - places = [] - - if num_to_return == 0: - return "Google Places did not find any places that match the description" - - num_to_return = ( - num_to_return - if self.top_k_results is None - else min(num_to_return, self.top_k_results) - ) - - for i in range(num_to_return): - result = search_results[i] - details = self.fetch_place_details(result["place_id"]) - - if details is not None: - places.append(details) - - return "\n".join([f"{i+1}. {item}" for i, item in enumerate(places)]) - - def fetch_place_details(self, place_id: str) -> Optional[str]: - try: - place_details = self.google_map_client.place(place_id) - formatted_details = self.format_place_details(place_details) - return formatted_details - except Exception as e: - logging.error(f"An Error occurred while fetching place details: {e}") - return None - - def format_place_details(self, place_details: Dict[str, Any]) -> Optional[str]: - try: - name = place_details.get("result", {}).get("name", "Unkown") - address = place_details.get("result", {}).get( - "formatted_address", "Unknown" - ) - phone_number = place_details.get("result", {}).get( - "formatted_phone_number", "Unknown" - ) - website = place_details.get("result", {}).get("website", "Unknown") - - formatted_details = ( - f"{name}\nAddress: {address}\n" - f"Phone: {phone_number}\nWebsite: {website}\n\n" - ) - return formatted_details - except Exception as e: - logging.error(f"An error occurred while formatting place details: {e}") - return None diff --git a/nextpy/ai/scripts/googlesearch.py b/nextpy/ai/scripts/googlesearch.py deleted file mode 100644 index e44c87af..00000000 --- a/nextpy/ai/scripts/googlesearch.py +++ /dev/null @@ -1,134 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# COde directly taken from LangChain Github Repo - -"""Util that calls Google Search.""" -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class GoogleSearchAPIWrapper(BaseModel): - """Wrapper for Google Search API. - - Adapted from: Instructions adapted from https://stackoverflow.com/questions/ - 37083058/ - programmatically-searching-google-in-python-using-custom-search - - TODO: DOCS for using it - 1. Install google-api-python-client - - If you don't already have a Google account, sign up. - - If you have never created a Google APIs Console project, - read the Managing Projects page and create a project in the Google API Console. - - Install the library using pip install google-api-python-client - The current version of the library is 2.70.0 at this time - - 2. To create an API key: - - Navigate to the APIs & Services→Credentials panel in Cloud Console. - - Select Create credentials, then select API key from the drop-down menu. - - The API key created dialog box displays your newly created key. - - You now have an API_KEY - - 3. Setup Custom Search Engine so you can search the entire web - - Create a custom search engine in this link. - - In Sites to search, add any valid URL (i.e. www.stackoverflow.com). - - That’s all you have to fill up, the rest doesn’t matter. - In the left-side menu, click Edit search engine → {your search engine name} - → Setup Set Search the entire web to ON. Remove the URL you added from - the list of Sites to search. - - Under Search engine ID you’ll find the search-engine-ID. - - 4. Enable the Custom Search API - - Navigate to the APIs & Services→Dashboard panel in Cloud Console. - - Click Enable APIs and Services. - - Search for Custom Search API and click on it. - - Click Enable. - URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis - .com - """ - - search_engine: Any #: :meta private: - google_api_key: Optional[str] = None - google_cse_id: Optional[str] = None - k: int = 10 - siterestrict: bool = False - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]: - cse = self.search_engine.cse() - if self.siterestrict: - cse = cse.siterestrict() - res = cse.list(q=search_term, cx=self.google_cse_id, **kwargs).execute() - return res.get("items", []) - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - google_api_key = get_from_dict_or_env( - values, "google_api_key", "GOOGLE_API_KEY" - ) - values["google_api_key"] = google_api_key - - google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID") - values["google_cse_id"] = google_cse_id - - try: - from googleapiclient.discovery import build - - except ImportError: - raise ImportError( - "google-api-python-client is not installed. " - "Please install it with `pip install google-api-python-client`" - ) - - service = build("customsearch", "v1", developerKey=google_api_key) - values["search_engine"] = service - - return values - - def run(self, query: str) -> str: - """Run query through GoogleSearch and parse result.""" - snippets = [] - results = self._google_search_results(query, num=self.k) - if len(results) == 0: - return "No good Google Search Result was found" - for result in results: - if "snippet" in result: - snippets.append(result["snippet"]) - - return " ".join(snippets) - - def results(self, query: str, num_results: int) -> List[Dict]: - """Run query through GoogleSearch and return metadata. - - Args: - query: The query to search for. - num_results: The number of results to return. - - Returns: - A list of dictionaries with the following keys: - snippet - The description of the result. - title - The title of the result. - link - The link to the result. - """ - metadata_results = [] - results = self._google_search_results(query, num=num_results) - if len(results) == 0: - return [{"Result": "No good Google Search Result was found"}] - for result in results: - metadata_result = { - "title": result["title"], - "link": result["link"], - } - if "snippet" in result: - metadata_result["snippet"] = result["snippet"] - metadata_results.append(metadata_result) - - return metadata_results diff --git a/nextpy/ai/scripts/googleserper.py b/nextpy/ai/scripts/googleserper.py deleted file mode 100644 index b5b7a5bf..00000000 --- a/nextpy/ai/scripts/googleserper.py +++ /dev/null @@ -1,199 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken directly from Langchain Github Repo - -"""Util that calls Google Search using the Serper.dev API.""" -from typing import Any, Dict, List, Optional - -import aiohttp -import requests -from pydantic.class_validators import root_validator -from pydantic.main import BaseModel -from typing_extensions import Literal - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class GoogleSerperAPIWrapper(BaseModel): - """Wrapper around the Serper.dev Google Search API. - - You can create a free API key at https://serper.dev. - - To use, you should have the environment variable ``SERPER_API_KEY`` - set with your API key, or pass `serper_api_key` as a named parameter - to the constructor. - - Example: - .. code-block:: python - - from langchain import GoogleSerperAPIWrapper - google_serper = GoogleSerperAPIWrapper() - """ - - k: int = 10 - gl: str = "us" - hl: str = "en" - # "places" and "images" is available from Serper but not implemented in the - # parser of run(). They can be used in results() - type: Literal["news", "search", "places", "images"] = "search" - result_key_for_type = { - "news": "news", - "places": "places", - "images": "images", - "search": "organic", - } - - tbs: Optional[str] = None - serper_api_key: Optional[str] = None - aiosession: Optional[aiohttp.ClientSession] = None - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key exists in environment.""" - serper_api_key = get_from_dict_or_env( - values, "serper_api_key", "SERPER_API_KEY" - ) - values["serper_api_key"] = serper_api_key - - return values - - def results(self, query: str, **kwargs: Any) -> Dict: - """Run query through GoogleSearch.""" - return self._google_serper_api_results( - query, - gl=self.gl, - hl=self.hl, - num=self.k, - tbs=self.tbs, - search_type=self.type, - **kwargs, - ) - - def run(self, query: str, **kwargs: Any) -> str: - """Run query through GoogleSearch and parse result.""" - results = self._google_serper_api_results( - query, - gl=self.gl, - hl=self.hl, - num=self.k, - tbs=self.tbs, - search_type=self.type, - **kwargs, - ) - - return self._parse_results(results) - - async def aresults(self, query: str, **kwargs: Any) -> Dict: - """Run query through GoogleSearch.""" - results = await self._async_google_serper_search_results( - query, - gl=self.gl, - hl=self.hl, - num=self.k, - search_type=self.type, - tbs=self.tbs, - **kwargs, - ) - return results - - async def arun(self, query: str, **kwargs: Any) -> str: - """Run query through GoogleSearch and parse result async.""" - results = await self._async_google_serper_search_results( - query, - gl=self.gl, - hl=self.hl, - num=self.k, - search_type=self.type, - tbs=self.tbs, - **kwargs, - ) - - return self._parse_results(results) - - def _parse_snippets(self, results: dict) -> List[str]: - snippets = [] - - if results.get("answerBox"): - answer_box = results.get("answerBox", {}) - if answer_box.get("answer"): - return [answer_box.get("answer")] - elif answer_box.get("snippet"): - return [answer_box.get("snippet").replace("\n", " ")] - elif answer_box.get("snippetHighlighted"): - return answer_box.get("snippetHighlighted") - - if results.get("knowledgeGraph"): - kg = results.get("knowledgeGraph", {}) - title = kg.get("title") - entity_type = kg.get("type") - if entity_type: - snippets.append(f"{title}: {entity_type}.") - description = kg.get("description") - if description: - snippets.append(description) - for attribute, value in kg.get("attributes", {}).items(): - snippets.append(f"{title} {attribute}: {value}.") - - for result in results[self.result_key_for_type[self.type]][: self.k]: - if "snippet" in result: - snippets.append(result["snippet"]) - for attribute, value in result.get("attributes", {}).items(): - snippets.append(f"{attribute}: {value}.") - - if len(snippets) == 0: - return ["No good Google Search Result was found"] - return snippets - - def _parse_results(self, results: dict) -> str: - return " ".join(self._parse_snippets(results)) - - def _google_serper_api_results( - self, search_term: str, search_type: str = "search", **kwargs: Any - ) -> dict: - headers = { - "X-API-KEY": self.serper_api_key or "", - "Content-Type": "application/json", - } - params = { - "q": search_term, - **{key: value for key, value in kwargs.items() if value is not None}, - } - response = requests.post( - f"https://google.serper.dev/{search_type}", headers=headers, params=params - ) - response.raise_for_status() - search_results = response.json() - return search_results - - async def _async_google_serper_search_results( - self, search_term: str, search_type: str = "search", **kwargs: Any - ) -> dict: - headers = { - "X-API-KEY": self.serper_api_key or "", - "Content-Type": "application/json", - } - url = f"https://google.serper.dev/{search_type}" - params = { - "q": search_term, - **{key: value for key, value in kwargs.items() if value is not None}, - } - - if not self.aiosession: - async with aiohttp.ClientSession() as session: - async with session.post( - url, params=params, headers=headers, raise_for_status=False - ) as response: - search_results = await response.json() - else: - async with self.aiosession.post( - url, params=params, headers=headers, raise_for_status=True - ) as response: - search_results = await response.json() - - return search_results diff --git a/nextpy/ai/scripts/graphql.py b/nextpy/ai/scripts/graphql.py deleted file mode 100644 index 2841ab6b..00000000 --- a/nextpy/ai/scripts/graphql.py +++ /dev/null @@ -1,59 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken from Langchain Github Repo - -import json -from typing import Any, Callable, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - - -class GraphQLAPIWrapper(BaseModel): - """Wrapper around GraphQL API. - - To use, you should have the ``gql`` python package installed. - This wrapper will use the GraphQL API to conduct queries. - """ - - custom_headers: Optional[Dict[str, str]] = None - graphql_endpoint: str - gql_client: Any #: :meta private: - gql_function: Callable[[str], Any] #: :meta private: - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator(pre=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that the python package exists in the environment.""" - try: - from gql import Client, gql - from gql.transport.requests import RequestsHTTPTransport - except ImportError as e: - raise ImportError( - "Could not import gql python package. " - f"Try installing it with `pip install gql`. Received error: {e}" - ) - headers = values.get("custom_headers") - transport = RequestsHTTPTransport( - url=values["graphql_endpoint"], - headers=headers, - ) - client = Client(transport=transport, fetch_schema_from_transport=True) - values["gql_client"] = client - values["gql_function"] = gql - return values - - def run(self, query: str) -> str: - """Run a GraphQL query and get the results.""" - result = self._execute_query(query) - return json.dumps(result, indent=2) - - def _execute_query(self, query: str) -> Dict[str, Any]: - """Execute a GraphQL query and return the results.""" - document_node = self.gql_function(query) - result = self.gql_client.execute(document_node) - return result diff --git a/nextpy/ai/scripts/math.py b/nextpy/ai/scripts/math.py deleted file mode 100644 index 48d78b11..00000000 --- a/nextpy/ai/scripts/math.py +++ /dev/null @@ -1,28 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import sys -from io import StringIO -from typing import Dict, Optional - -from pydantic import BaseModel, Field - - -class PythonREPL(BaseModel): - """Simulates a standalone Python REPL.""" - - globals: Optional[Dict] = Field(default_factory=dict, alias="_globals") - locals: Optional[Dict] = Field(default_factory=dict, alias="_locals") - - def run(self, command: str) -> str: - """Run command with own globals/locals and returns anything printed.""" - old_stdout = sys.stdout - sys.stdout = mystdout = StringIO() - try: - exec(command, self.globals, self.locals) - sys.stdout = old_stdout - output = mystdout.getvalue() - except Exception as e: - sys.stdout = old_stdout - output = repr(e) - return output diff --git a/nextpy/ai/scripts/openweatherMap.py b/nextpy/ai/scripts/openweatherMap.py deleted file mode 100644 index 038f4327..00000000 --- a/nextpy/ai/scripts/openweatherMap.py +++ /dev/null @@ -1,83 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken d=from Langchain Github repo and edited to some extent. - -"""Util that calls OpenWeatherMap using PyOWM.""" -from typing import Any, Dict, Optional - -from pydantic import Extra, root_validator -from pydantic.main import BaseModel - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class OpenWeatherMapAPIWrapper(BaseModel): - """Wrapper for OpenWeatherMap API using PyOWM. - - Docs for using: - - 1. Go to OpenWeatherMap and sign up for an API key - 2. Save your API KEY into OPENWEATHERMAP_API_KEY env variable - 3. pip install pyowm - """ - - owm: Any - openweathermap_api_key: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator(pre=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key exists in environment.""" - openweathermap_api_key = get_from_dict_or_env( - values, "openweathermap_api_key", "OPENWEATHERMAP_API_KEY" - ) - - try: - import pyowm - - except ImportError: - raise ImportError( - "pyowm is not installed. Please install it with `pip install pyowm`" - ) - - owm = pyowm.OWM(openweathermap_api_key) - values["owm"] = owm - - return values - - def _format_weather_info(self, location: str, w: Any) -> str: - detailed_status = w.detailed_status - wind = w.wind() - humidity = w.humidity - temperature = w.temperature("celsius") - rain = w.rain - heat_index = w.heat_index - clouds = w.clouds - - return ( - f"In {location}, the current weather is as follows:\n" - f"Detailed status: {detailed_status}\n" - f"Wind speed: {wind['speed']} m/s, direction: {wind['deg']}°\n" - f"Humidity: {humidity}%\n" - f"Temperature: \n" - f" - Current: {temperature['temp']}°C\n" - f" - High: {temperature['temp_max']}°C\n" - f" - Low: {temperature['temp_min']}°C\n" - f" - Feels like: {temperature['feels_like']}°C\n" - f"Rain: {rain}\n" - f"Heat index: {heat_index}\n" - f"Cloud cover: {clouds}%" - ) - - def run(self, location: str) -> str: - """Get the current weather information for a specified location.""" - mgr = self.owm.weather_manager() - observation = mgr.weather_at_place(location) - w = observation.weather - - return self._format_weather_info(location, w) diff --git a/nextpy/ai/scripts/sceneexplain.py b/nextpy/ai/scripts/sceneexplain.py deleted file mode 100644 index f072b79e..00000000 --- a/nextpy/ai/scripts/sceneexplain.py +++ /dev/null @@ -1,82 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# New code implementation for the following api different from langchain approach - -"""Util that calls SceneXplain. - -In order to set this up, you need API key for the SceneXplain API. -You can obtain a key by following the steps below. -- Sign up for a free account at https://scenex.jina.ai/. -- Navigate to the API Access page (https://scenex.jina.ai/api) and create a new API key. -""" -import base64 -import http -import json -from typing import Dict - -from pydantic import BaseModel, BaseSettings, Field, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -def _image_to_data_uri(file_path): - with open(file_path, "rb") as image_file: - encoded_image = base64.b64encode(image_file.read()).decode("utf-8") - return f"data:image/jpeg;base64,{encoded_image}" - - -class SceneXplainAPIWrapper(BaseSettings, BaseModel): - """Wrapper for SceneXplain API. - - In order to set this up, you need API key for the SceneXplain API. - You can obtain a key by following the steps below. - - Sign up for a free account at https://scenex.jina.ai/. - - Navigate to the API Access page (https://scenex.jina.ai/api) - and create a new API key. - """ - - scenex_api_key: str = Field(..., env="SCENEX_API_KEY") - scenex_api_url: str = "us-central1-causal-diffusion.cloudfunctions.net" - - def _describe_image(self, image: str) -> str: - local_image_path = image - data = { - "data": [ - {"image": _image_to_data_uri(local_image_path), "features": []}, - ] - } - - headers = { - "x-api-key": f"token {self.scenex_api_key}", - "content-type": "application/json", - } - - connection = http.client.HTTPSConnection( - "us-central1-causal-diffusion.cloudfunctions.net" - ) - connection.request("POST", "/describe", json.dumps(data), headers) - response = connection.getresponse() - response_data = response.read().decode("utf-8") - response_data = json.loads(response_data) - output = response_data["result"][0]["text"] - connection.close() - return output - - @root_validator(pre=True) - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key exists in environment.""" - scenex_api_key = get_from_dict_or_env( - values, "scenex_api_key", "SCENEX_API_KEY" - ) - values["scenex_api_key"] = scenex_api_key - - return values - - def run(self, image: str) -> str: - """Run SceneXplain image explainer.""" - description = self._describe_image(image) - if not description: - return "No description found." - - return description diff --git a/nextpy/ai/scripts/serpapi.py b/nextpy/ai/scripts/serpapi.py deleted file mode 100644 index 2645441a..00000000 --- a/nextpy/ai/scripts/serpapi.py +++ /dev/null @@ -1,158 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken directly from Langhchain Github Code - -"""Chain that calls SerpAPI. - -Heavily borrowed from https://github.com/ofirpress/self-ask -""" -import os -import sys -from typing import Any, Dict, Optional, Tuple - -import aiohttp -from pydantic import BaseModel, Extra, Field, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class HiddenPrints: - """Context manager to hide prints.""" - - def __enter__(self) -> None: - """Open file to pipe stdout to.""" - self._original_stdout = sys.stdout - sys.stdout = open(os.devnull, "w") - - def __exit__(self, *_: Any) -> None: - """Close file that stdout was piped to.""" - sys.stdout.close() - sys.stdout = self._original_stdout - - -class SerpAPIWrapper(BaseModel): - """Wrapper around SerpAPI. - - To use, you should have the ``google-search-results`` python package installed, - and the environment variable ``SERPAPI_API_KEY`` set with your API key, or pass - `serpapi_api_key` as a named parameter to the constructor. - - Example: - .. code-block:: python - - from langchain import SerpAPIWrapper - serpapi = SerpAPIWrapper() - """ - - search_engine: Any #: :meta private: - params: dict = Field( - default={ - "engine": "google", - "google_domain": "google.com", - "gl": "us", - "hl": "en", - } - ) - serpapi_api_key: Optional[str] = None - aiosession: Optional[aiohttp.ClientSession] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - serpapi_api_key = get_from_dict_or_env( - values, "serpapi_api_key", "SERPAPI_API_KEY" - ) - values["serpapi_api_key"] = serpapi_api_key - try: - from serpapi import GoogleSearch - - values["search_engine"] = GoogleSearch - except ImportError: - raise ValueError( - "Could not import serpapi python package. " - "Please install it with `pip install google-search-results`." - ) - return values - - async def arun(self, query: str, **kwargs: Any) -> str: - """Run query through SerpAPI and parse result async.""" - return self._process_response(await self.aresults(query)) - - def run(self, query: str, **kwargs: Any) -> str: - """Run query through SerpAPI and parse result.""" - return self._process_response(self.results(query)) - - def results(self, query: str) -> dict: - """Run query through SerpAPI and return the raw result.""" - params = self.get_params(query) - with HiddenPrints(): - search = self.search_engine(params) - res = search.get_dict() - return res - - async def aresults(self, query: str) -> dict: - """Use aiohttp to run query through SerpAPI and return the results async.""" - - def construct_url_and_params() -> Tuple[str, Dict[str, str]]: - params = self.get_params(query) - params["source"] = "python" - if self.serpapi_api_key: - params["serp_api_key"] = self.serpapi_api_key - params["output"] = "json" - url = "https://serpapi.com/search" - return url, params - - url, params = construct_url_and_params() - if not self.aiosession: - async with aiohttp.ClientSession() as session: - async with session.get(url, params=params) as response: - res = await response.json() - else: - async with self.aiosession.get(url, params=params) as response: - res = await response.json() - - return res - - def get_params(self, query: str) -> Dict[str, str]: - """Get parameters for SerpAPI.""" - _params = { - "api_key": self.serpapi_api_key, - "q": query, - } - params = {**self.params, **_params} - return params - - @staticmethod - def _process_response(res: dict) -> str: - """Process response from SerpAPI.""" - if "error" in res: - raise ValueError(f"Got error from SerpAPI: {res['error']}") - if "answer_box" in res and type(res["answer_box"]) == list: - res["answer_box"] = res["answer_box"][0] - if "answer_box" in res and "answer" in res["answer_box"]: - toret = res["answer_box"]["answer"] - elif "answer_box" in res and "snippet" in res["answer_box"]: - toret = res["answer_box"]["snippet"] - elif "answer_box" in res and "snippet_highlighted_words" in res["answer_box"]: - toret = res["answer_box"]["snippet_highlighted_words"][0] - elif "sports_results" in res and "game_spotlight" in res["sports_results"]: - toret = res["sports_results"]["game_spotlight"] - elif "shopping_results" in res and "title" in res["shopping_results"][0]: - toret = res["shopping_results"][:3] - elif "knowledge_graph" in res and "description" in res["knowledge_graph"]: - toret = res["knowledge_graph"]["description"] - elif "snippet" in res["organic_results"][0]: - toret = res["organic_results"][0]["snippet"] - elif "link" in res["organic_results"][0]: - toret = res["organic_results"][0]["link"] - - else: - toret = "No good search result found" - return toret diff --git a/nextpy/ai/scripts/spark_sql_database.py b/nextpy/ai/scripts/spark_sql_database.py deleted file mode 100644 index edd2acb9..00000000 --- a/nextpy/ai/scripts/spark_sql_database.py +++ /dev/null @@ -1,177 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Iterable, List, Optional - -if TYPE_CHECKING: - from pyspark.sql import DataFrame, Row, SparkSession - - -class SparkSQL: - def __init__( - self, - spark_session: Optional[SparkSession] = None, - catalog: Optional[str] = None, - schema: Optional[str] = None, - ignore_tables: Optional[List[str]] = None, - include_tables: Optional[List[str]] = None, - sample_rows_in_table_info: int = 3, - ): - try: - from pyspark.sql import SparkSession - except ImportError: - raise ValueError( - "pyspark is not installed. Please install it with `pip install pyspark`" - ) - - self._spark = ( - spark_session if spark_session else SparkSession.builder.getOrCreate() - ) - if catalog is not None: - self._spark.catalog.setCurrentCatalog(catalog) - if schema is not None: - self._spark.catalog.setCurrentDatabase(schema) - - self._all_tables = set(self._get_all_table_names()) - self._include_tables = set(include_tables) if include_tables else set() - if self._include_tables: - missing_tables = self._include_tables - self._all_tables - if missing_tables: - raise ValueError( - f"include_tables {missing_tables} not found in database" - ) - self._ignore_tables = set(ignore_tables) if ignore_tables else set() - if self._ignore_tables: - missing_tables = self._ignore_tables - self._all_tables - if missing_tables: - raise ValueError( - f"ignore_tables {missing_tables} not found in database" - ) - usable_tables = self.get_usable_table_names() - self._usable_tables = set(usable_tables) if usable_tables else self._all_tables - - if not isinstance(sample_rows_in_table_info, int): - raise TypeError("sample_rows_in_table_info must be an integer") - - self._sample_rows_in_table_info = sample_rows_in_table_info - - @classmethod - def from_uri( - cls, database_uri: str, engine_args: Optional[dict] = None, **kwargs: Any - ) -> SparkSQL: - """Creating a remote Spark Session via Spark connect. - For example: SparkSQL.from_uri("sc://localhost:15002"). - """ - try: - from pyspark.sql import SparkSession - except ImportError: - raise ValueError( - "pyspark is not installed. Please install it with `pip install pyspark`" - ) - - spark = SparkSession.builder.remote(database_uri).getOrCreate() - return cls(spark, **kwargs) - - def get_usable_table_names(self) -> Iterable[str]: - """Get names of tables available.""" - if self._include_tables: - return self._include_tables - # sorting the result can help LLM understanding it. - return sorted(self._all_tables - self._ignore_tables) - - def _get_all_table_names(self) -> Iterable[str]: - rows = self._spark.sql("SHOW TABLES").select("tableName").collect() - return list(map(lambda row: row.tableName, rows)) - - def _get_create_table_stmt(self, table: str) -> str: - statement = ( - self._spark.sql(f"SHOW CREATE TABLE {table}").collect()[0].createtab_stmt - ) - # Ignore the data source provider and options to reduce the number of tokens. - using_clause_index = statement.find("USING") - return statement[:using_clause_index] + ";" - - def get_table_info(self, table_names: Optional[List[str]] = None) -> str: - all_table_names = self.get_usable_table_names() - if table_names is not None: - missing_tables = set(table_names).difference(all_table_names) - if missing_tables: - raise ValueError(f"table_names {missing_tables} not found in database") - all_table_names = table_names - tables = [] - for table_name in all_table_names: - table_info = self._get_create_table_stmt(table_name) - if self._sample_rows_in_table_info: - table_info += "\n\n/*" - table_info += f"\n{self._get_sample_spark_rows(table_name)}\n" - table_info += "*/" - tables.append(table_info) - final_str = "\n\n".join(tables) - return final_str - - def _get_sample_spark_rows(self, table: str) -> str: - query = f"SELECT * FROM {table} LIMIT {self._sample_rows_in_table_info}" - df = self._spark.sql(query) - columns_str = "\t".join(list(map(lambda f: f.name, df.schema.fields))) - try: - sample_rows = self._get_dataframe_results(df) - # save the sample rows in string format - sample_rows_str = "\n".join(["\t".join(row) for row in sample_rows]) - except Exception: - sample_rows_str = "" - - return ( - f"{self._sample_rows_in_table_info} rows from {table} table:\n" - f"{columns_str}\n" - f"{sample_rows_str}" - ) - - def _convert_row_as_tuple(self, row: Row) -> tuple: - return tuple(map(str, row.asDict().values())) - - def _get_dataframe_results(self, df: DataFrame) -> list: - return list(map(self._convert_row_as_tuple, df.collect())) - - def run(self, command: str, fetch: str = "all") -> str: - df = self._spark.sql(command) - if fetch == "one": - df = df.limit(1) - return str(self._get_dataframe_results(df)) - - def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str: - """Get information about specified tables. - - Follows best practices as specified in: Rajkumar et al, 2022 - (https://arxiv.org/abs/2204.00498) - - If `sample_rows_in_table_info`, the specified number of sample rows will be - appended to each table description. This can increase performance as - demonstrated in the paper. - """ - try: - return self.get_table_info(table_names) - except ValueError as e: - """Format the error message""" - return f"Error: {e}" - - def run_no_throw(self, command: str, fetch: str = "all") -> str: - """Execute a SQL command and return a string representing the results. - - If the statement returns rows, a string of the results is returned. - If the statement returns no rows, an empty string is returned. - - If the statement throws an error, the error message is returned. - """ - try: - from pyspark.errors import PySparkException - except ImportError: - raise ValueError( - "pyspark is not installed. Please install it with `pip install pyspark`" - ) - try: - return self.run(command, fetch) - except PySparkException as e: - """Format the error message""" - return f"Error: {e}" diff --git a/nextpy/ai/scripts/sql_database.py b/nextpy/ai/scripts/sql_database.py deleted file mode 100644 index 43eb5e68..00000000 --- a/nextpy/ai/scripts/sql_database.py +++ /dev/null @@ -1,446 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""SQLAlchemy wrapper around a database.""" -from __future__ import annotations - -import warnings -from typing import Any, Iterable, List, Optional - -import sqlalchemy -from sqlalchemy import MetaData, Table, create_engine, inspect, select, text -from sqlalchemy.engine import Engine -from sqlalchemy.exc import ProgrammingError, SQLAlchemyError -from sqlalchemy.schema import CreateTable - -from nextpy.utils.data_ops import get_from_dict_or_env - - -def _format_index(index: sqlalchemy.engine.interfaces.ReflectedIndex) -> str: - return ( - f'Name: {index["name"]}, Unique: {index["unique"]},' - f' Columns: {str(index["column_names"])}' - ) - - -def truncate_word(content: Any, *, length: int, suffix: str = "...") -> str: - """Truncate a string to a certain number of words, based on the max string - length. - """ - if not isinstance(content, str) or length <= 0: - return content - - if len(content) <= length: - return content - - return content[: length - len(suffix)].rsplit(" ", 1)[0] + suffix - - -class SQLDatabase: - """SQLAlchemy wrapper around a database.""" - - def __init__( - self, - engine: Engine, - schema: Optional[str] = None, - metadata: Optional[MetaData] = None, - ignore_tables: Optional[List[str]] = None, - include_tables: Optional[List[str]] = None, - sample_rows_in_table_info: int = 3, - indexes_in_table_info: bool = False, - custom_table_info: Optional[dict] = None, - view_support: bool = False, - max_string_length: int = 300, - ): - """Create engine from database URI.""" - self._engine = engine - self._schema = schema - if include_tables and ignore_tables: - raise ValueError("Cannot specify both include_tables and ignore_tables") - - self._inspector = inspect(self._engine) - - # including view support by adding the views as well as tables to the all - # tables list if view_support is True - self._all_tables = set( - self._inspector.get_table_names(schema=schema) - + (self._inspector.get_view_names(schema=schema) if view_support else []) - ) - - self._include_tables = set(include_tables) if include_tables else set() - if self._include_tables: - missing_tables = self._include_tables - self._all_tables - if missing_tables: - raise ValueError( - f"include_tables {missing_tables} not found in database" - ) - self._ignore_tables = set(ignore_tables) if ignore_tables else set() - if self._ignore_tables: - missing_tables = self._ignore_tables - self._all_tables - if missing_tables: - raise ValueError( - f"ignore_tables {missing_tables} not found in database" - ) - usable_tables = self.get_usable_table_names() - self._usable_tables = set(usable_tables) if usable_tables else self._all_tables - - if not isinstance(sample_rows_in_table_info, int): - raise TypeError("sample_rows_in_table_info must be an integer") - - self._sample_rows_in_table_info = sample_rows_in_table_info - self._indexes_in_table_info = indexes_in_table_info - - self._custom_table_info = custom_table_info - if self._custom_table_info: - if not isinstance(self._custom_table_info, dict): - raise TypeError( - "table_info must be a dictionary with table names as keys and the " - "desired table info as values" - ) - # only keep the tables that are also present in the database - intersection = set(self._custom_table_info).intersection(self._all_tables) - self._custom_table_info = dict( - (table, self._custom_table_info[table]) - for table in self._custom_table_info - if table in intersection - ) - - self._max_string_length = max_string_length - - self._metadata = metadata or MetaData() - # including view support if view_support = true - self._metadata.reflect( - views=view_support, - bind=self._engine, - only=list(self._usable_tables), - schema=self._schema, - ) - - @classmethod - def from_uri( - cls, database_uri: str, engine_args: Optional[dict] = None, **kwargs: Any - ) -> SQLDatabase: - """Construct a SQLAlchemy engine from URI.""" - _engine_args = engine_args or {} - return cls(create_engine(database_uri, **_engine_args), **kwargs) - - @classmethod - def from_databricks( - cls, - catalog: str, - schema: str, - host: Optional[str] = None, - api_token: Optional[str] = None, - warehouse_id: Optional[str] = None, - cluster_id: Optional[str] = None, - engine_args: Optional[dict] = None, - **kwargs: Any, - ) -> SQLDatabase: - """Class method to create an SQLDatabase instance from a Databricks connection. - This method requires the 'databricks-sql-connector' package. If not installed, - it can be added using `pip install databricks-sql-connector`. - - Args: - catalog (str): The catalog name in the Databricks database. - schema (str): The schema name in the catalog. - host (Optional[str]): The Databricks workspace hostname, excluding - 'https://' part. If not provided, it attempts to fetch from the - environment variable 'DATABRICKS_HOST'. If still unavailable and if - running in a Databricks notebook, it defaults to the current workspace - hostname. Defaults to None. - api_token (Optional[str]): The Databricks personal access token for - accessing the Databricks SQL warehouse or the cluster. If not provided, - it attempts to fetch from 'DATABRICKS_TOKEN'. If still unavailable - and running in a Databricks notebook, a temporary token for the current - user is generated. Defaults to None. - warehouse_id (Optional[str]): The warehouse ID in the Databricks SQL. If - provided, the method configures the connection to use this warehouse. - Cannot be used with 'cluster_id'. Defaults to None. - cluster_id (Optional[str]): The cluster ID in the Databricks Runtime. If - provided, the method configures the connection to use this cluster. - Cannot be used with 'warehouse_id'. If running in a Databricks notebook - and both 'warehouse_id' and 'cluster_id' are None, it uses the ID of the - cluster the notebook is attached to. Defaults to None. - engine_args (Optional[dict]): The arguments to be used when connecting - Databricks. Defaults to None. - **kwargs (Any): Additional keyword arguments for the `from_uri` method. - - Returns: - SQLDatabase: An instance of SQLDatabase configured with the provided - Databricks connection details. - - Raises: - ValueError: If 'databricks-sql-connector' is not found, or if both - 'warehouse_id' and 'cluster_id' are provided, or if neither - 'warehouse_id' nor 'cluster_id' are provided and it's not executing - inside a Databricks notebook. - """ - try: - from databricks import sql # noqa: F401 - except ImportError: - raise ValueError( - "databricks-sql-connector package not found, please install with" - " `pip install databricks-sql-connector`" - ) - context = None - try: - from dbruntime.databricks_repl_context import get_context - - context = get_context() - except ImportError: - pass - - default_host = context.browserHostName if context else None - if host is None: - host = get_from_dict_or_env("host", "DATABRICKS_HOST", default_host) - - default_api_token = context.apiToken if context else None - if api_token is None: - api_token = get_from_dict_or_env( - "api_token", "DATABRICKS_TOKEN", default_api_token - ) - - if warehouse_id is None and cluster_id is None: - if context: - cluster_id = context.clusterId - else: - raise ValueError( - "Need to provide either 'warehouse_id' or 'cluster_id'." - ) - - if warehouse_id and cluster_id: - raise ValueError("Can't have both 'warehouse_id' or 'cluster_id'.") - - if warehouse_id: - http_path = f"/sql/1.0/warehouses/{warehouse_id}" - else: - http_path = f"/sql/protocolv1/o/0/{cluster_id}" - - uri = ( - f"databricks://token:{api_token}@{host}?" - f"http_path={http_path}&catalog={catalog}&schema={schema}" - ) - return cls.from_uri(database_uri=uri, engine_args=engine_args, **kwargs) - - @classmethod - def from_cnosdb( - cls, - url: str = "127.0.0.1:8902", - user: str = "root", - password: str = "", - tenant: str = "cnosdb", - database: str = "public", - ) -> SQLDatabase: - """Class method to create an SQLDatabase instance from a CnosDB connection. - This method requires the 'cnos-connector' package. If not installed, it - can be added using `pip install cnos-connector`. - - Args: - url (str): The HTTP connection host name and port number of the CnosDB - service, excluding "http://" or "https://", with a default value - of "127.0.0.1:8902". - user (str): The username used to connect to the CnosDB service, with a - default value of "root". - password (str): The password of the user connecting to the CnosDB service, - with a default value of "". - tenant (str): The name of the tenant used to connect to the CnosDB service, - with a default value of "cnosdb". - database (str): The name of the database in the CnosDB tenant. - - Returns: - SQLDatabase: An instance of SQLDatabase configured with the provided - CnosDB connection details. - """ - try: - from cnosdb_connector import make_cnosdb_langchain_uri - - uri = make_cnosdb_langchain_uri(url, user, password, tenant, database) - return cls.from_uri(database_uri=uri) - except ImportError: - raise ValueError( - "cnos-connector package not found, please install with" - " `pip install cnos-connector`" - ) - - @property - def dialect(self) -> str: - """Return string representation of dialect to use.""" - return self._engine.dialect.name - - def get_usable_table_names(self) -> Iterable[str]: - """Get names of tables available.""" - if self._include_tables: - return sorted(self._include_tables) - return sorted(self._all_tables - self._ignore_tables) - - def get_table_names(self) -> Iterable[str]: - """Get names of tables available.""" - warnings.warn( - "This method is deprecated - please use `get_usable_table_names`." - ) - return self.get_usable_table_names() - - @property - def table_info(self) -> str: - """Information about all tables in the database.""" - return self.get_table_info() - - def get_table_info(self, table_names: Optional[List[str]] = None) -> str: - """Get information about specified tables. - - Follows best practices as specified in: Rajkumar et al, 2022 - (https://arxiv.org/abs/2204.00498) - - If `sample_rows_in_table_info`, the specified number of sample rows will be - appended to each table description. This can increase performance as - demonstrated in the paper. - """ - all_table_names = self.get_usable_table_names() - if table_names is not None: - missing_tables = set(table_names).difference(all_table_names) - if missing_tables: - raise ValueError(f"table_names {missing_tables} not found in database") - all_table_names = table_names - - meta_tables = [ - tbl - for tbl in self._metadata.sorted_tables - if tbl.name in set(all_table_names) - and not (self.dialect == "sqlite" and tbl.name.startswith("sqlite_")) - ] - - tables = [] - for table in meta_tables: - if self._custom_table_info and table.name in self._custom_table_info: - tables.append(self._custom_table_info[table.name]) - continue - - # add create table command - create_table = str(CreateTable(table).compile(self._engine)) - table_info = f"{create_table.rstrip()}" - has_extra_info = ( - self._indexes_in_table_info or self._sample_rows_in_table_info - ) - if has_extra_info: - table_info += "\n\n/*" - if self._indexes_in_table_info: - table_info += f"\n{self._get_table_indexes(table)}\n" - if self._sample_rows_in_table_info: - table_info += f"\n{self._get_sample_rows(table)}\n" - if has_extra_info: - table_info += "*/" - tables.append(table_info) - tables.sort() - final_str = "\n\n".join(tables) - return final_str - - def _get_table_indexes(self, table: Table) -> str: - indexes = self._inspector.get_indexes(table.name) - indexes_formatted = "\n".join(map(_format_index, indexes)) - return f"Table Indexes:\n{indexes_formatted}" - - def _get_sample_rows(self, table: Table) -> str: - # build the select command - command = select(table).limit(self._sample_rows_in_table_info) - - # save the columns in string format - columns_str = "\t".join([col.name for col in table.columns]) - - try: - # get the sample rows - with self._engine.connect() as connection: - sample_rows_result = connection.execute(command) # type: ignore - # shorten values in the sample rows - sample_rows = list( - map(lambda ls: [str(i)[:100] for i in ls], sample_rows_result) - ) - - # save the sample rows in string format - sample_rows_str = "\n".join(["\t".join(row) for row in sample_rows]) - - # in some dialects when there are no rows in the table a - # 'ProgrammingError' is returned - except ProgrammingError: - sample_rows_str = "" - - return ( - f"{self._sample_rows_in_table_info} rows from {table.name} table:\n" - f"{columns_str}\n" - f"{sample_rows_str}" - ) - - def run(self, command: str, fetch: str = "all") -> str: - """Execute a SQL command and return a string representing the results. - - If the statement returns rows, a string of the results is returned. - If the statement returns no rows, an empty string is returned. - - """ - with self._engine.begin() as connection: - if self._schema is not None: - if self.dialect == "snowflake": - connection.exec_driver_sql( - f"ALTER SESSION SET search_path='{self._schema}'" - ) - elif self.dialect == "bigquery": - connection.exec_driver_sql(f"SET @@dataset_id='{self._schema}'") - else: - connection.exec_driver_sql(f"SET search_path TO {self._schema}") - cursor = connection.execute(text(command)) - if cursor.returns_rows: - if fetch == "all": - result = cursor.fetchall() - elif fetch == "one": - result = cursor.fetchone() # type: ignore - else: - raise ValueError("Fetch parameter must be either 'one' or 'all'") - - # Convert columns values to string to avoid issues with sqlalchmey - # trunacating text - if isinstance(result, list): - return str( - [ - tuple( - truncate_word(c, length=self._max_string_length) - for c in r - ) - for r in result - ] - ) - - return str( - tuple( - truncate_word(c, length=self._max_string_length) for c in result - ) - ) - return "" - - def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str: - """Get information about specified tables. - - Follows best practices as specified in: Rajkumar et al, 2022 - (https://arxiv.org/abs/2204.00498) - - If `sample_rows_in_table_info`, the specified number of sample rows will be - appended to each table description. This can increase performance as - demonstrated in the paper. - """ - try: - return self.get_table_info(table_names) - except ValueError as e: - """Format the error message""" - return f"Error: {e}" - - def run_no_throw(self, command: str, fetch: str = "all") -> str: - """Execute a SQL command and return a string representing the results. - - If the statement returns rows, a string of the results is returned. - If the statement returns no rows, an empty string is returned. - - If the statement throws an error, the error message is returned. - """ - try: - return self.run(command, fetch) - except SQLAlchemyError as e: - """Format the error message""" - return f"Error: {e}" diff --git a/nextpy/ai/scripts/twilio.py b/nextpy/ai/scripts/twilio.py deleted file mode 100644 index f4b2f47f..00000000 --- a/nextpy/ai/scripts/twilio.py +++ /dev/null @@ -1,86 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Util that calls Twilio.""" -from typing import Any, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class TwilioAPIWrapper(BaseModel): - """Sms Client using Twilio. - - To use, you should have the ``twilio`` python package installed, - and the environment variables ``TWILIO_ACCOUNT_SID``, ``TWILIO_AUTH_TOKEN``, and - ``TWILIO_FROM_NUMBER``, or pass `account_sid`, `auth_token`, and `from_number` as - named parameters to the constructor. - - Example: - .. code-block:: python - - from langchain.utilities.twilio import TwilioAPIWrapper - twilio = TwilioAPIWrapper( - account_sid="ACxxx", - auth_token="xxx", - from_number="+10123456789" - ) - twilio.run('test', '+12484345508') - """ - - client: Any #: :meta private: - account_sid: Optional[str] = None - """Twilio account string identifier.""" - auth_token: Optional[str] = None - """Twilio auth token.""" - from_number: Optional[str] = None - """A Twilio phone number in [E.164](https://www.twilio.com/docs/glossary/what-e164) - format, an - [alphanumeric sender ID](https://www.twilio.com/docs/sms/send-messages#use-an-alphanumeric-sender-id), - or a [Channel Endpoint address](https://www.twilio.com/docs/sms/channels#channel-addresses) - that is enabled for the type of message you want to send. Phone numbers or - [short codes](https://www.twilio.com/docs/sms/api/short-code) purchased from - Twilio also work here. You cannot, for example, spoof messages from a private - cell phone number. If you are using `messaging_service_sid`, this parameter - must be empty. - """ # noqa: E501 - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = False - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - try: - from twilio.rest import Client - except ImportError: - raise ImportError( - "Could not import twilio python package. " - "Please install it with `pip install twilio`." - ) - account_sid = get_from_dict_or_env(values, "account_sid", "TWILIO_ACCOUNT_SID") - auth_token = get_from_dict_or_env(values, "auth_token", "TWILIO_AUTH_TOKEN") - values["from_number"] = get_from_dict_or_env( - values, "from_number", "TWILIO_FROM_NUMBER" - ) - values["client"] = Client(account_sid, auth_token) - return values - - def run(self, body: str, to: str) -> str: - """Run body through Twilio and respond with message sid. - - Args: - body: The text of the message you want to send. Can be up to 1,600 - characters in length. - to: The destination phone number in - [E.164](https://www.twilio.com/docs/glossary/what-e164) format for - SMS/MMS or - [Channel user address](https://www.twilio.com/docs/sms/channels#channel-addresses) - for other 3rd-party channels. - """ # noqa: E501 - message = self.client.messages.create(to, from_=self.from_number, body=body) - return message.sid diff --git a/nextpy/ai/scripts/webscrapper.py b/nextpy/ai/scripts/webscrapper.py deleted file mode 100644 index 1b8d1469..00000000 --- a/nextpy/ai/scripts/webscrapper.py +++ /dev/null @@ -1,108 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import random -import re -from typing import Type - -import requests -from bs4 import BeautifulSoup -from pydantic import BaseModel, Field - -USER_AGENTS = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", - # ... Rest of the user agents... -] - - -class WebScraperSchema(BaseModel): - website_url: str = Field( - ..., - description="Valid website url without any quotes.", - ) - - -class WebScraperTool(BaseTool): - """Web Scraper tool. - - Attributes: - name : The name. - description : The description. - args_schema : The args schema. - """ - - name = "WebScraperTool" - description = "Used to scrape website urls and extract text content" - args_schema: Type[WebScraperSchema] = WebScraperSchema - - def run(self, tool_input: str, **kwargs: Any) -> Any: - """Execute the Web Scraper tool. - - Args: - tool_input : The website url to scrape. - - Returns: - The text content of the website. - """ - content = self.extract_with_bs4(tool_input) - max_length = len(" ".join(content.split(" ")[:600])) - return content[:max_length] - - def extract_with_bs4(self, url): - headers = {"User-Agent": random.choice(USER_AGENTS)} - try: - response = requests.get(url, headers=headers, timeout=10) - if response.status_code == 200: - soup = BeautifulSoup(response.text, "html.parser") - for tag in soup( - [ - "script", - "style", - "nav", - "footer", - "head", - "link", - "meta", - "noscript", - ] - ): - tag.decompose() - - main_content_areas = soup.find_all( - ["main", "article", "section", "div"] - ) - if main_content_areas: - main_content = max(main_content_areas, key=lambda x: len(x.text)) - content_tags = ["p", "h1", "h2", "h3", "h4", "h5", "h6"] - content = " ".join( - [ - tag.text.strip() - for tag in main_content.find_all(content_tags) - ] - ) - else: - content = " ".join( - [ - tag.text.strip() - for tag in soup.find_all( - ["p", "h1", "h2", "h3", "h4", "h5", "h6"] - ) - ] - ) - - content = re.sub(r"\t", " ", content) - content = re.sub(r"\s+", " ", content) - return content - elif response.status_code == 404: - return f"Error: 404. Url is invalid or does not exist. Try with valid url..." - else: - logger.error( - f"Error while extracting text from HTML (bs4): {response.status_code}" - ) - return f"Error while extracting text from HTML (bs4): {response.status_code}" - - except Exception as e: - logger.error( - f"Unknown error while extracting text from HTML (bs4): {str(e)}" - ) - return "" diff --git a/nextpy/ai/scripts/wikipedia.py b/nextpy/ai/scripts/wikipedia.py deleted file mode 100644 index 309bdfe5..00000000 --- a/nextpy/ai/scripts/wikipedia.py +++ /dev/null @@ -1,76 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken from langchain Github Repo and removed load method as it involved Documents from langchain - -"""Util that calls Wikipedia.""" -import logging -from typing import Any, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - -logger = logging.getLogger(__name__) - -WIKIPEDIA_MAX_QUERY_LENGTH = 300 - - -class WikipediaAPIWrapper(BaseModel): - """Wrapper around WikipediaAPI. - - To use, you should have the ``wikipedia`` python package installed. - This wrapper will use the Wikipedia API to conduct searches and - fetch page summaries. By default, it will return the page summaries - of the top-k results. - It limits the Document content by doc_content_chars_max. - """ - - wiki_client: Any #: :meta private: - top_k_results: int = 3 - lang: str = "en" - load_all_available_meta: bool = False - doc_content_chars_max: int = 4000 - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that the python package exists in environment.""" - try: - import wikipedia - - wikipedia.set_lang(values["lang"]) - values["wiki_client"] = wikipedia - except ImportError: - raise ImportError( - "Could not import wikipedia python package. " - "Please install it with `pip install wikipedia`." - ) - return values - - def run(self, query: str) -> str: - """Run Wikipedia search and get page summaries.""" - page_titles = self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH]) - summaries = [] - for page_title in page_titles[: self.top_k_results]: - if wiki_page := self._fetch_page(page_title): - if summary := self._formatted_page_summary(page_title, wiki_page): - summaries.append(summary) - if not summaries: - return "No good Wikipedia Search Result was found" - return "\n\n".join(summaries)[: self.doc_content_chars_max] - - @staticmethod - def _formatted_page_summary(page_title: str, wiki_page: Any) -> Optional[str]: - return f"Page: {page_title}\nSummary: {wiki_page.summary}" - - def _fetch_page(self, page: str) -> Optional[str]: - try: - return self.wiki_client.page(title=page, auto_suggest=False) - except ( - self.wiki_client.exceptions.PageError, - self.wiki_client.exceptions.DisambiguationError, - ): - return None diff --git a/nextpy/ai/scripts/wolframalpha.py b/nextpy/ai/scripts/wolframalpha.py deleted file mode 100644 index 28e88eb6..00000000 --- a/nextpy/ai/scripts/wolframalpha.py +++ /dev/null @@ -1,69 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# Code taken directly from langchain gihub repo - -"""Util that calls WolframAlpha.""" -from typing import Any, Dict, Optional - -from pydantic import BaseModel, Extra, root_validator - -from nextpy.utils.data_ops import get_from_dict_or_env - - -class WolframAlphaAPIWrapper(BaseModel): - """Wrapper for Wolfram Alpha. - - Docs for using: - - 1. Go to wolfram alpha and sign up for a developer account - 2. Create an app and get your APP ID - 3. Save your APP ID into WOLFRAM_ALPHA_APPID env variable - 4. pip install wolframalpha - - """ - - wolfram_client: Any #: :meta private: - wolfram_alpha_appid: Optional[str] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - wolfram_alpha_appid = get_from_dict_or_env( - values, "wolfram_alpha_appid", "WOLFRAM_ALPHA_APPID" - ) - values["wolfram_alpha_appid"] = wolfram_alpha_appid - - try: - import wolframalpha - - except ImportError: - raise ImportError( - "wolframalpha is not installed. " - "Please install it with `pip install wolframalpha`" - ) - client = wolframalpha.Client(wolfram_alpha_appid) - values["wolfram_client"] = client - - return values - - def run(self, query: str) -> str: - """Run query through WolframAlpha and parse result.""" - res = self.wolfram_client.query(query) - - try: - assumption = next(res.pods).text - answer = next(res.results).text - except StopIteration: - return "Wolfram Alpha wasn't able to answer it" - - if answer is None or answer == "": - # We don't want to return the assumption alone if answer is empty - return "No good Wolfram Alpha Result was found" - else: - return f"Assumption: {assumption} \nAnswer: {answer}" diff --git a/nextpy/ai/scripts/youtubeSearch.py b/nextpy/ai/scripts/youtubeSearch.py deleted file mode 100644 index e8c13283..00000000 --- a/nextpy/ai/scripts/youtubeSearch.py +++ /dev/null @@ -1,41 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import json - -from nextpy.ai.tools.basetool import BaseTool - - -class YouTubeSearchTool(BaseTool): - """Tool that queries YouTube.""" - - name = "youtube_search" - description = ( - "search for youtube videos associated with a person. " - "the input to this tool should be a comma separated list, " - "the first part contains a person name and the second a " - "number that is the maximum number of video results " - "to return aka num_results. the second part is optional" - ) - - def _search(self, person: str, num_results: int) -> str: - from youtube_search import YoutubeSearch - - results = YoutubeSearch(person, num_results).to_json() - data = json.loads(results) - url_suffix_list = [video["url_suffix"] for video in data["videos"]] - return str(url_suffix_list) - - def run( - self, - query: str, - ) -> str: - """Use the tool.""" - values = query.split(",") - person = values[0] - num_results = int(values[1]) if len(values) > 1 else 2 - return self._search(person, num_results) - - async def _arun(self, query: str) -> str: - """Use the tool asynchronously.""" - raise NotImplementedError("YouTubeSearchTool does not yet support async") diff --git a/nextpy/ai/scripts/youtubeTranscript.py b/nextpy/ai/scripts/youtubeTranscript.py deleted file mode 100644 index aeae6331..00000000 --- a/nextpy/ai/scripts/youtubeTranscript.py +++ /dev/null @@ -1,60 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import re -from typing import Any, List, Optional - -from nextpy.ai.schema import Document, DocumentNode - - -class YoutubeTranscriptReader: - """Youtube Transcript reader.""" - - @staticmethod - def _extract_video_id(yt_link) -> Optional[str]: - # regular expressions to match the different syntax of YouTube links - patterns = [ - r"^https?://(?:www\.)?youtube\.com/watch\?v=([\w-]+)", - r"^https?://(?:www\.)?youtube\.com/embed/([\w-]+)", - r"^https?://youtu\.be/([\w-]+)", - ] # youtu.be does not use www - - for pattern in patterns: - match = re.search(pattern, yt_link) - if match: - return match.group(1) - - # return None if no match is found - return None - - def load_data( - self, - ytlinks: List[str], - languages: Optional[List[str]] = ["en"], - **load_kwargs: Any - ) -> List[DocumentNode]: - """Load data from the input directory. - - Args: - pages (List[str]): List of youtube links \ - for which transcripts are to be read. - - """ - from youtube_transcript_api import YouTubeTranscriptApi - - results = [] - for link in ytlinks: - video_id = self._extract_video_id(link) - srt = YouTubeTranscriptApi.get_transcript(video_id, languages=languages) - transcript = "" - for chunk in srt: - transcript = transcript + chunk["text"] + "\n" - results.append( - DocumentNode(text=transcript, extra_info={"video_id": video_id}) - ) - return results - - def load_langchain_documents(self, **load_kwargs: Any) -> List[Document]: - """Load data in LangChain document format.""" - docs = self.load_data(**load_kwargs) - return [d.to_langchain_format() for d in docs] diff --git a/nextpy/ai/skills/__init__.py b/nextpy/ai/skills/__init__.py new file mode 100644 index 00000000..e617184d --- /dev/null +++ b/nextpy/ai/skills/__init__.py @@ -0,0 +1 @@ +# init file for skills diff --git a/nextpy/ai/skills/base.py b/nextpy/ai/skills/base.py new file mode 100644 index 00000000..4342acee --- /dev/null +++ b/nextpy/ai/skills/base.py @@ -0,0 +1,20 @@ +# base class for all skills +from typing import Callable, Optional, Type +from abc import ABC +from pydantic import BaseModel + + +class BaseSkill(ABC, BaseModel): + + name: str + # The unique name of the tool that clearly communicates its purpose. + description: str + # Used to tell the model how/when/why to use the tool.You can provide few-shot examples as a part of the description. + func: Callable = None + # Function which acts as a tool and takes in input + args_schema: Optional[Type[BaseModel]] = None + # Pydantic model class to validate and parse the tool's input arguments + return_direct: bool = False + # Whether to return the tool's output directly. Setting this to True means that after the tool is called, the AgentExecutor will stop looping. + verbose: bool = False + # Whether to log the tool's progress. diff --git a/nextpy/ai/skills/skill_manager.py b/nextpy/ai/skills/skill_manager.py new file mode 100644 index 00000000..a20d22c3 --- /dev/null +++ b/nextpy/ai/skills/skill_manager.py @@ -0,0 +1 @@ +# manager to retrieve and register skills diff --git a/nextpy/ai/tests/agent/test_base_agent.py b/nextpy/ai/tests/agent/test_base_agent.py index bf72706e..489ff7f1 100644 --- a/nextpy/ai/tests/agent/test_base_agent.py +++ b/nextpy/ai/tests/agent/test_base_agent.py @@ -1,4 +1,4 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. +# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. # We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. from unittest.mock import MagicMock @@ -7,11 +7,11 @@ from nextpy.ai.agent.base_agent import AgentState, BaseAgent from nextpy.ai.memory.base import BaseMemory -from nextpy.ai.tools.basetool import BaseTool +from nextpy.ai.skills.base import BaseSkill -class MockBaseTool(BaseTool): - # Assuming BaseTool does not have any mandatory methods +class MockBaseSkill(BaseSkill): + # Assuming BaseSkill does not have any mandatory methods pass @@ -64,15 +64,17 @@ def clear(self) -> None: @pytest.fixture def base_agent_obj(): - """Return a BaseAgent object with mock base tools and memory. This is a context manager to allow unit tests to run.""" - tools = [ - MockBaseTool(name="MockTool1", description="Mock description for tool 1"), - MockBaseTool(name="MockTool2", description="Mock description for tool 2"), + """Return a BaseAgent object with mock base skills and memory. This is a context manager to allow unit tests to run.""" + skills = [ + MockBaseSkill(name="Mockskill1", + description="Mock description for skill 1"), + MockBaseSkill(name="Mockskill2", + description="Mock description for skill 2"), ] memory = MockMemory() agent = BaseAgent( rag=MagicMock(), - tools=tools, + skills=skills, llm=MagicMock(), prompt_template="Test Prompt", input_variables={"knowledge_variable": "knowledge_variable"}, @@ -85,42 +87,43 @@ def base_agent_obj(): yield agent # use yield to ensure cleanup after tests have run -def test_init_with_tools(base_agent_obj): - """Tests init with tools. This is a test to make sure we don't accidentally get the tools from the Agent object after it has been initialized. +def test_init_with_skills(base_agent_obj): + """Tests init with skills. This is a test to make sure we don't accidentally get the skills from the Agent object after it has been initialized. Args: base_agent_obj: An instance of the """ - assert len(base_agent_obj.tools) == 2 + assert len(base_agent_obj.skills) == 2 assert base_agent_obj.state == AgentState.IDLE # assert base_agent_obj.get_knowledge_variable == "Test" -def test_add_tool(base_agent_obj): - """Tests adding a tool to the base agent. This is a convenience method to make sure we don't accidentally add tools that are already in the list. +def test_add_skill(base_agent_obj): + """Tests adding a skill to the base agent. This is a convenience method to make sure we don't accidentally add skills that are already in the list. Args: base_agent_obj: An instance of BaseAgent """ - new_tool = MockBaseTool(name="MockTool3", description="Mock description for tool 3") - base_agent_obj.add_tool(new_tool) - assert len(base_agent_obj.tools) == 3 - assert new_tool in base_agent_obj.tools + new_skill = MockBaseSkill( + name="Mockskill3", description="Mock description for skill 3") + base_agent_obj.add_skill(new_skill) + assert len(base_agent_obj.skills) == 3 + assert new_skill in base_agent_obj.skills -def test_remove_tool(base_agent_obj): - """Remove a tool from the base agent. Checks that it is removed and no more tools are added. +def test_remove_skill(base_agent_obj): + """Remove a skill from the base agent. Checks that it is removed and no more skills are added. Args: base_agent_obj: An instance of : class : ` yum. manufacturers. base_agent. YumAgent """ - tool = base_agent_obj.tools[0] - base_agent_obj.remove_tool(tool) - assert len(base_agent_obj.tools) == 1 - assert tool not in base_agent_obj.tools + skill = base_agent_obj.skills[0] + base_agent_obj.remove_skill(skill) + assert len(base_agent_obj.skills) == 1 + assert skill not in base_agent_obj.skills # @patch('llms.agent.base_agent.engine') diff --git a/nextpy/ai/tokenizers/__init__.py b/nextpy/ai/tokenizers/__init__.py deleted file mode 100644 index b01f2ff9..00000000 --- a/nextpy/ai/tokenizers/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from nextpy.ai.tokenizers.base import BaseTokenizer -from nextpy.ai.tokenizers.openai_tokenizer import OpenAiTokenizer -from nextpy.ai.tokenizers.simple_tokenizer import SimpleTokenizer diff --git a/nextpy/ai/tokenizers/base.py b/nextpy/ai/tokenizers/base.py deleted file mode 100644 index 098ccf24..00000000 --- a/nextpy/ai/tokenizers/base.py +++ /dev/null @@ -1,34 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations -from attr import define, field, Factory -from abc import ABC, abstractmethod - - -@define(frozen=True) -class BaseTokenizer(ABC): - """Abstract base class for a tokenizer.""" - - # Define RESPONSE_STOP_SEQUENCE as a class attribute - RESPONSE_STOP_SEQUENCE = "<|Response|>" - - stop_sequences: list[str] = field( - default=Factory(lambda: [BaseTokenizer.RESPONSE_STOP_SEQUENCE]), kw_only=True - ) - - @property - @abstractmethod - def max_tokens(self) -> int: - """Abstract property to get the maximum number of tokens.""" - pass - - def count_tokens_left(self, text: str | list[str]) -> int: - """Calculate the number of tokens left within the max_tokens limit.""" - remaining = self.max_tokens - self.count_tokens(text) - return max(0, remaining) - - @abstractmethod - def count_tokens(self, text: str | list[str]) -> int: - """Abstract method to count the number of tokens in the given text.""" - pass diff --git a/nextpy/ai/tokenizers/openai_tokenizer.py b/nextpy/ai/tokenizers/openai_tokenizer.py deleted file mode 100644 index 3282bce3..00000000 --- a/nextpy/ai/tokenizers/openai_tokenizer.py +++ /dev/null @@ -1,122 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations -import logging -from typing import Optional, Union, List, Dict -from attr import define, field -import tiktoken -from nextpy.ai.tokenizers import BaseTokenizer - - -@define(frozen=True) -class OpenAiTokenizer(BaseTokenizer): - DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = "text-davinci-003" - DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo" - DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4" - DEFAULT_ENCODING = "cl100k_base" - DEFAULT_MAX_TOKENS = 2049 - TOKEN_OFFSET = 8 - - MODEL_PREFIXES_TO_MAX_TOKENS = { - "gpt-4-1106": 128000, - "gpt-4-32k": 32768, - "gpt-4": 8192, - "gpt-3.5-turbo-16k": 16384, - "gpt-3.5-turbo": 4096, - "gpt-35-turbo-16k": 16384, - "gpt-35-turbo": 4096, - "text-davinci-003": 4097, - "text-davinci-002": 4097, - "code-davinci-002": 8001, - "text-embedding-ada-002": 8191, - "text-embedding-ada-001": 2046, - } - - EMBEDDING_MODELS = ["text-embedding-ada-002", "text-embedding-ada-001"] - - model: str = field(kw_only=True) - - @property - def encoding(self) -> tiktoken.Encoding: - try: - return tiktoken.encoding_for_model(self.model) - except KeyError: - return tiktoken.get_encoding(self.DEFAULT_ENCODING) - - @property - def max_tokens(self) -> int: - tokens = self.MODEL_PREFIXES_TO_MAX_TOKENS.get( - self.model, self.DEFAULT_MAX_TOKENS - ) - offset = 0 if self.model in self.EMBEDDING_MODELS else self.TOKEN_OFFSET - return tokens - offset - - def count_tokens( - self, text: Union[str, List[Dict[str, str]]], model: Optional[str] = None - ) -> int: - """ - Count the number of tokens in the given text. Handles the special case of ChatML. - """ - if isinstance(text, list): - return self._count_tokens_for_chatml(text, model or self.model) - else: - return len( - self.encoding.encode(text, allowed_special=set(self.stop_sequences)) - ) - - def _count_tokens_for_chatml( - self, messages: List[Dict[str, str]], model: str - ) -> int: - """ - Count the number of tokens for ChatML specific models. - """ - try: - encoding = tiktoken.encoding_for_model(model) - except KeyError: - logging.warning( - f"Model '{model}' not found. Using default cl100k_base encoding." - ) - encoding = tiktoken.get_encoding("cl100k_base") - - if model in { - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - "gpt-4-0314", - "gpt-4-32k-0314", - "gpt-4-0613", - "gpt-4-32k-0613", - }: - tokens_per_message = 3 - tokens_per_name = 1 - elif model == "gpt-3.5-turbo-0301": - tokens_per_message = 4 - tokens_per_name = -1 - elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model: - logging.info( - "gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613." - ) - return self.count_tokens(messages, model="gpt-3.5-turbo-0613") - elif "gpt-4" in model: - logging.info( - "gpt-4 may update over time. Returning num tokens assuming gpt-4-0613." - ) - return self.count_tokens(messages, model="gpt-4-0613") - else: - raise NotImplementedError( - f"token_count() is not implemented for model {model}. " - "See https://github.com/openai/openai-python/blob/main/chatml.md for " - "information on how messages are converted to tokens." - ) - - num_tokens = 0 - for message in messages: - num_tokens += tokens_per_message - for key, value in message.items(): - num_tokens += len(encoding.encode(value)) - if key == "name": - num_tokens += tokens_per_name - - # Every reply is primed with assistant - num_tokens += 3 - return num_tokens diff --git a/nextpy/ai/tokenizers/simple_tokenizer.py b/nextpy/ai/tokenizers/simple_tokenizer.py deleted file mode 100644 index 667bd1ad..00000000 --- a/nextpy/ai/tokenizers/simple_tokenizer.py +++ /dev/null @@ -1,33 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from attr import define, field -from nextpy.ai.tokenizers import BaseTokenizer - - -@define(frozen=True) -class SimpleTokenizer(BaseTokenizer): - """ - A simple tokenizer that divides the input text into tokens based on the number of characters per token. - """ - - characters_per_token: int = field(kw_only=True) - max_tokens: int = field(kw_only=True) - - def count_tokens(self, text: str) -> int: - """ - Count the number of tokens in the given text based on the predefined number of characters per token. - - Args: - text (str): The input text to be tokenized. - - Returns: - int: The number of tokens in the input text. - """ - if self.characters_per_token <= 0: - raise ValueError("characters_per_token must be a positive integer") - - num_tokens = ( - len(text) + self.characters_per_token - 1 - ) // self.characters_per_token - return num_tokens diff --git a/nextpy/ai/tokenizers/transformer_tokenizer.py b/nextpy/ai/tokenizers/transformer_tokenizer.py deleted file mode 100644 index 2095b615..00000000 --- a/nextpy/ai/tokenizers/transformer_tokenizer.py +++ /dev/null @@ -1,46 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations -from typing import TYPE_CHECKING -from os import environ - -# Conditional import for type checking -if TYPE_CHECKING: - from transformers import PreTrainedTokenizerBase - -# Setting environment variable to control transformers verbosity -environ["TRANSFORMERS_VERBOSITY"] = "error" - -from attr import define, field, Factory -from nextpy.ai.tokenizers import BaseTokenizer - - -@define(frozen=True) -class HuggingFaceTokenizer(BaseTokenizer): - """ - Tokenizer class that wraps around a Hugging Face PreTrainedTokenizerBase - to conform to the BaseTokenizer interface. - """ - - tokenizer: PreTrainedTokenizerBase = field(kw_only=True) - max_tokens: int = field( - default=Factory(lambda self: self.tokenizer.model_max_length, takes_self=True), - kw_only=True, - ) - - def count_tokens(self, text: str) -> int: - """ - Counts the number of tokens in the given text using the Hugging Face tokenizer. - - Args: - text (str): The input text to tokenize. - - Returns: - int: The number of tokens in the input text. - """ - try: - return len(self.tokenizer.encode(text)) - except Exception as e: - # Log the exception or handle it as per your requirement - raise RuntimeError(f"Error during tokenization: {e}") diff --git a/nextpy/ai/tools/__init__.py b/nextpy/ai/tools/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/tools/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/tools/basetool.py b/nextpy/ai/tools/basetool.py deleted file mode 100644 index 4089e890..00000000 --- a/nextpy/ai/tools/basetool.py +++ /dev/null @@ -1,203 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -"""Base implementation for tools or skills.""" - -import warnings -from abc import ABC, abstractmethod -from inspect import signature -from typing import Any, Callable, Dict, Optional, Tuple, Type, Union - -from pydantic import ( - BaseModel, - create_model, - root_validator, - validate_arguments, -) - - -def _get_filtered_args( - inferred_model: Type[BaseModel], - func: Callable, -) -> dict: - """Get the arguments from a function's signature.""" - schema = inferred_model.schema()["properties"] - valid_keys = signature(func).parameters - return {k: schema[k] for k in valid_keys if k != "run_manager"} - - -def _create_subset_model( - name: str, model: BaseModel, field_names: list -) -> Type[BaseModel]: - """Create a pydantic model with only a subset of model's fields.""" - fields = {} - for field_name in field_names: - field = model.__fields__[field_name] - fields[field_name] = (field.type_, field.field_info) - return create_model(name, **fields) # type: ignore - - -def create_schema_from_function( - model_name: str, - func: Callable, -) -> Type[BaseModel]: - """model_name: Name to assign to the generated pydandic schema - func: Function to generate the schema from. - """ - validated = validate_arguments(func, config=_SchemaConfig) # type: ignore - inferred_model = validated.model # type: ignore - if "run_manager" in inferred_model.__fields__: - del inferred_model.__fields__["run_manager"] - # Pydantic adds placeholder virtual fields we need to strip - valid_properties = _get_filtered_args(inferred_model, func) - return _create_subset_model( - f"{model_name}Schema", inferred_model, list(valid_properties) - ) - - -class BaseTool(ABC, BaseModel): - # Interface llms tools must implement. - - name: str - # The unique name of the tool that clearly communicates its purpose. - description: str - # Used to tell the model how/when/why to use the tool.You can provide few-shot examples as a part of the description. - func: Callable = None - # Function which acts as a tool and takes in input - args_schema: Optional[Type[BaseModel]] = None - # Pydantic model class to validate and parse the tool's input arguments - return_direct: bool = False - # Whether to return the tool's output directly. Setting this to True means that after the tool is called, the AgentExecutor will stop looping. - verbose: bool = False - # Whether to log the tool's progress. - - @property - def is_single_input(self) -> bool: - """Whether the tool only accepts a single input.""" - keys = {k for k in self.args if k != "kwargs"} - return len(keys) == 1 - - @property - def args(self) -> dict: - if self.args_schema is not None: - return self.args_schema.schema()["properties"] - else: - schema = create_schema_from_function(self.name, self.run) - return schema.schema()["properties"] - - def _parse_input( - self, - tool_input: Union[str, Dict], - ) -> Union[str, Dict[str, Any]]: - input_args = self.args_schema - if isinstance(tool_input, str): - if input_args is not None: - key_ = next(iter(input_args.__fields__.keys())) - input_args.validate({key_: tool_input}) - return tool_input - else: - if input_args is not None: - result = input_args.parse_obj(tool_input) - return {k: v for k, v in result.dict().items() if k in tool_input} - return tool_input - - @root_validator() - def raise_deprecation(cls, values: Dict) -> Dict: - """Raise deprecation warning if callback_manager is used.""" - if values.get("callback_manager") is not None: - warnings.warn( - "callback_manager is deprecated. Please use callbacks instead.", - DeprecationWarning, - ) - values["callbacks"] = values.pop("callback_manager", None) - return values - - @abstractmethod - def run( - self, - *args: Any, - **kwargs: Any, - ) -> Any: - """Use the tool.""" - - def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]: - if isinstance(tool_input, str): - return (tool_input,), {} - else: - return (), tool_input - - def run( - self, - tool_input: Union[str, Dict], - verbose: Optional[bool] = None, - **kwargs: Any, - ) -> Any: - """Parses the output and checks if the input is string and handles exceptions.""" - parsed_input = self._parse_input(tool_input) - - observation = self.run(parsed_input) - - return observation - - # if isinstance(parsed_input, str): - # raise Exception("Tool input should be string") - # else: - # return parsed_input - - -class ExceptionTool(BaseTool): - name = "_Exception" - description = "Exception tool" - - def run( - self, - query: str, - ) -> str: - return query - - -class InvalidTool(BaseTool): - """Tool that is run when invalid tool name is encountered by agent.""" - - name = "invalid_tool" - description = "Called when tool name is invalid." - - def run( - self, - tool_name: str, - ) -> str: - """Use the tool.""" - return f"{tool_name} is not a valid tool, try another one." - - -class Tool(BaseTool): - """Tool that takes in function or coroutine directly.""" - - class Config: - arbitrary_types_allowed = True - - def args(self) -> dict: - """The tool's input arguments.""" - if self.args_schema is not None: - return self.args_schema.schema()["properties"] - # For backwards compatibility, if the function signature is ambiguous, - # assume it takes a single string input. - return {"tool_input": {"type": "string"}} - - def run(self, tool_input, **kwargs: Any) -> Any: - """Actually calls the tool and gives output.""" - try: - return self.func(tool_input, **kwargs) - except Exception as e: - return e - - def __call__(self, tool_input: str, **kwargs) -> str: - """Make tool callable.""" - # try: - # parsed_input = self.run(tool_input , **kwargs) - # except Exception as e: - # return e - # final_result = self.run(tool_input=parsed_input, **kwargs) - # return final_result diff --git a/nextpy/ai/tools/toolkits/SQL.py b/nextpy/ai/tools/toolkits/SQL.py deleted file mode 100644 index cc67fb60..00000000 --- a/nextpy/ai/tools/toolkits/SQL.py +++ /dev/null @@ -1,66 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import List - -from pydantic import Field - -from nextpy.ai.scripts.sql_database import SQLDatabase -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.SQLDb.tool import ( - InfoSQLDatabaseTool, - ListSQLDatabaseTool, - # QuerySQLCheckerTool, - QuerySQLDataBaseTool, -) - - -class SQLDatabaseToolkit(BaseToolkit): - """Toolkit for interacting with SQL databases.""" - - db: SQLDatabase = Field(exclude=True) - - @property - def dialect(self) -> str: - """Return string representation of SQL dialect to use.""" - return self.db.dialect - - class Config: - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - list_sql_database_tool = ListSQLDatabaseTool(db=self.db) - info_sql_database_tool_description = ( - "Input to this tool is a comma-separated list of tables, output is the " - "schema and sample rows for those tables. " - "Be sure that the tables actually exist by calling " - f"{list_sql_database_tool.name} first! " - "Example Input: 'table1, table2, table3'" - ) - info_sql_database_tool = InfoSQLDatabaseTool( - db=self.db, description=info_sql_database_tool_description - ) - query_sql_database_tool_description = ( - "Input to this tool is a detailed and correct SQL query, output is a " - "result from the database. If the query is not correct, an error message " - "will be returned. If an error is returned, rewrite the query, check the " - "query, and try again. If you encounter an issue with Unknown column " - f"'xxxx' in 'field list', using {info_sql_database_tool.name} " - "to query the correct table fields." - ) - query_sql_database_tool = QuerySQLDataBaseTool( - db=self.db, description=query_sql_database_tool_description - ) - # query_sql_checker_tool = QuerySQLCheckerTool( - # db=self.db, llm=self.llm, description=query_sql_checker_tool_description - # ) - return [ - query_sql_database_tool, - info_sql_database_tool, - list_sql_database_tool - # query_sql_checker_tool, - ] diff --git a/nextpy/ai/tools/toolkits/SQLDb/__init__.py b/nextpy/ai/tools/toolkits/SQLDb/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/tools/toolkits/SQLDb/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/tools/toolkits/SQLDb/prompt.py b/nextpy/ai/tools/toolkits/SQLDb/prompt.py deleted file mode 100644 index df5ee451..00000000 --- a/nextpy/ai/tools/toolkits/SQLDb/prompt.py +++ /dev/null @@ -1,16 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -QUERY_CHECKER = """ -{query} -Double check the {dialect} query above for common mistakes, including: -- Using NOT IN with NULL values -- Using UNION when UNION ALL should have been used -- Using BETWEEN for exclusive ranges -- Data type mismatch in predicates -- Properly quoting identifiers -- Using the correct number of arguments for functions -- Casting to the correct data type -- Using the proper columns for joins - -If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.""" diff --git a/nextpy/ai/tools/toolkits/SQLDb/tool.py b/nextpy/ai/tools/toolkits/SQLDb/tool.py deleted file mode 100644 index 1ae959dd..00000000 --- a/nextpy/ai/tools/toolkits/SQLDb/tool.py +++ /dev/null @@ -1,122 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for interacting with a SQL database.""" - -from pydantic import BaseModel, Extra, Field - -from nextpy.ai.tools.basetool import BaseTool - -# from langchain.chains.llm import LLMChain ------- Used in QuerySQLCHecker -from nextpy.ai.tools.toolkits.SQL import SQLDatabase - - -class BaseSQLDatabaseTool(BaseModel): - """Base tool for interacting with a SQL database.""" - - db: SQLDatabase = Field(exclude=True) - - # Override BaseTool.Config to appease mypy - # See https://github.com/pydantic/pydantic/issues/4173 - class Config(BaseTool.Config): - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - extra = Extra.forbid - - -class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for querying a SQL database.""" - - name = "sql_db_query" - description = """ - Input to this tool is a detailed and correct SQL query, output is a result from the database. - If the query is not correct, an error message will be returned. - If an error is returned, rewrite the query, check the query, and try again. - """ - - def run(self, query: str) -> str: - """Execute the query, return the results or an error message.""" - return self.db.run_no_throw(query) - - async def arun(self, query: str) -> str: - raise NotImplementedError("QuerySqlDbTool does not support async") - - -class InfoSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for getting metadata about a SQL database.""" - - name = "sql_db_schema" - description = """ - Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. - - Example Input: "table1, table2, table3" - """ - - def run(self, table_names: str) -> str: - """Get the schema for tables in a comma-separated list.""" - return self.db.get_table_info_no_throw(table_names.split(", ")) - - async def arun(self, table_name: str) -> str: - raise NotImplementedError("SchemaSqlDbTool does not support async") - - -class ListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): - """Tool for getting tables names.""" - - name = "sql_db_list_tables" - description = "Input is an empty string, output is a comma separated list of tables in the database." - - def run(self, tool_input: str = "") -> str: - """Get the schema for a specific table.""" - return ", ".join(self.db.get_usable_table_names()) - - async def arun(self, tool_input: str = "") -> str: - raise NotImplementedError("ListTablesSqlDbTool does not support async") - - -# This has a dependecy of chains which has to be solved -''' -class QuerySQLCheckerTool(BaseSQLDatabaseTool, BaseTool): - """Use an LLM to check if a query is correct. - Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" - - template: str = QUERY_CHECKER - llm: BaseLLM - llm_chain: LLMChain = Field(init=False) - name = "sql_db_query_checker" - description = """ - Use this tool to double check if your query is correct before executing it. - Always use this tool before executing a query with query_sql_db! - """ - - @root_validator(pre=True) - def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "llm_chain" not in values: - values["llm_chain"] = LLMChain( - llm=values.get("llm"), - prompt=PromptTemplate( - template=QUERY_CHECKER, input_variables=["query", "dialect"] - ), - ) - - if values["llm_chain"].prompt.input_variables != ["query", "dialect"]: - raise ValueError( - "LLM chain for QueryCheckerTool must have input variables ['query', 'dialect']" - ) - - return values - - def _run( - self, - query: str, - ) -> str: - """Use the LLM to check the query.""" - return self.llm_chain.predict(query=query, dialect=self.db.dialect) - - async def _arun( - self, - query: str, - ) -> str: - return await self.llm_chain.apredict(query=query, dialect=self.db.dialect) -''' diff --git a/nextpy/ai/tools/toolkits/Spark_SQLDb/__init__.py b/nextpy/ai/tools/toolkits/Spark_SQLDb/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/tools/toolkits/Spark_SQLDb/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/tools/toolkits/Spark_SQLDb/prompt.py b/nextpy/ai/tools/toolkits/Spark_SQLDb/prompt.py deleted file mode 100644 index dcd01b6e..00000000 --- a/nextpy/ai/tools/toolkits/Spark_SQLDb/prompt.py +++ /dev/null @@ -1,17 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -# flake8: noqa -QUERY_CHECKER = """ -{query} -Double check the Spark SQL query above for common mistakes, including: -- Using NOT IN with NULL values -- Using UNION when UNION ALL should have been used -- Using BETWEEN for exclusive ranges -- Data type mismatch in predicates -- Properly quoting identifiers -- Using the correct number of arguments for functions -- Casting to the correct data type -- Using the proper columns for joins - -If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.""" diff --git a/nextpy/ai/tools/toolkits/Spark_SQLDb/tool.py b/nextpy/ai/tools/toolkits/Spark_SQLDb/tool.py deleted file mode 100644 index 5c9d2535..00000000 --- a/nextpy/ai/tools/toolkits/Spark_SQLDb/tool.py +++ /dev/null @@ -1,140 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for interacting with a SQL database.""" - -from pydantic import BaseModel, Extra, Field - -from nextpy.ai.scripts.spark_sql_database import SparkSQL -from nextpy.ai.tools.basetool import BaseTool - - -class BaseSparkSQLTool(BaseModel): - """Base tool for interacting with Spark SQL.""" - - db: SparkSQL = Field(exclude=True) - - # Override BaseTool.Config to appease mypy - # See https://github.com/pydantic/pydantic/issues/4173 - class Config(BaseTool.Config): - """Configuration for this pydantic object.""" - - arbitrary_types_allowed = True - extra = Extra.forbid - - -class QuerySparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for querying a Spark SQL.""" - - name = "query_sql_db" - description = """ - Input to this tool is a detailed and correct SQL query, output is a result from the Spark SQL. - If the query is not correct, an error message will be returned. - If an error is returned, rewrite the query, check the query, and try again. - """ - - def run( - self, - query: str, - ) -> str: - """Execute the query, return the results or an error message.""" - return self.db.run_no_throw(query) - - async def arun( - self, - query: str, - ) -> str: - raise NotImplementedError("QuerySqlDbTool does not support async") - - -class InfoSparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for getting metadata about a Spark SQL.""" - - name = "schema_sql_db" - description = """ - Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. - Be sure that the tables actually exist by calling list_tables_sql_db first! - - Example Input: "table1, table2, table3" - """ - - def run( - self, - table_names: str, - ) -> str: - """Get the schema for tables in a comma-separated list.""" - return self.db.get_table_info_no_throw(table_names.split(", ")) - - async def arun( - self, - table_name: str, - ) -> str: - raise NotImplementedError("SchemaSqlDbTool does not support async") - - -class ListSparkSQLTool(BaseSparkSQLTool, BaseTool): - """Tool for getting tables names.""" - - name = "list_tables_sql_db" - description = "Input is an empty string, output is a comma separated list of tables in the Spark SQL." - - def run( - self, - tool_input: str = "", - ) -> str: - """Get the schema for a specific table.""" - return ", ".join(self.db.get_usable_table_names()) - - async def arun( - self, - tool_input: str = "", - ) -> str: - raise NotImplementedError("ListTablesSqlDbTool does not support async") - - -# QueryCheckerTool uses LLMChain which needs to be solved -''' -class QueryCheckerTool(BaseSparkSQLTool, BaseTool): - """Use an LLM to check if a query is correct. - Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" - - template: str = QUERY_CHECKER - llm: BaseLanguageModel - llm_chain: LLMChain = Field(init=False) - name = "query_checker_sql_db" - description = """ - Use this tool to double check if your query is correct before executing it. - Always use this tool before executing a query with query_sql_db! - """ - - @root_validator(pre=True) - def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "llm_chain" not in values: - values["llm_chain"] = LLMChain( - llm=values.get("llm"), - prompt=PromptTemplate( - template=QUERY_CHECKER, input_variables=["query"] - ), - ) - - if values["llm_chain"].prompt.input_variables != ["query"]: - raise ValueError( - "LLM chain for QueryCheckerTool need to use ['query'] as input_variables " - "for the embedded prompt" - ) - - return values - - def _run( - self, - query: str, - ) -> str: - """Use the LLM to check the query.""" - return self.llm_chain.predict(query=query) - - async def _arun( - self, - query: str, - ) -> str: - return await self.llm_chain.apredict(query=query) -''' diff --git a/nextpy/ai/tools/toolkits/__init__.py b/nextpy/ai/tools/toolkits/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/tools/toolkits/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/tools/toolkits/base.py b/nextpy/ai/tools/toolkits/base.py deleted file mode 100644 index 8b55611b..00000000 --- a/nextpy/ai/tools/toolkits/base.py +++ /dev/null @@ -1,18 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Toolkits for agents.""" -from abc import abstractmethod -from typing import List - -from pydantic import BaseModel - -from nextpy.ai.tools.basetool import BaseTool - - -class BaseToolkit(BaseModel): - """Class responsible for defining a collection of related tools.""" - - @abstractmethod - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" diff --git a/nextpy/ai/tools/toolkits/file_toolkit/_file.py b/nextpy/ai/tools/toolkits/file_toolkit/_file.py deleted file mode 100644 index adb87c2e..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/_file.py +++ /dev/null @@ -1,64 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Toolkit for interacting with the local filesystem.""" -from __future__ import annotations - -from typing import List, Optional - -from pydantic import root_validator - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.file_toolkit.file.copy import CopyFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.delete import DeleteFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.listdir import ListDirectoryTool -from nextpy.ai.tools.toolkits.file_toolkit.file.move import MoveFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.read import ReadFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.search import FileSearchTool -from nextpy.ai.tools.toolkits.file_toolkit.file.write import WriteFileTool - -_FILE_TOOLS = { - tool_cls.__fields__["name"].default: tool_cls - for tool_cls in [ - CopyFileTool, - DeleteFileTool, - FileSearchTool, - MoveFileTool, - ReadFileTool, - WriteFileTool, - ListDirectoryTool, - ] -} - - -class FileManagementToolkit(BaseToolkit): - """Toolkit for interacting with a Local Files.""" - - root_dir: Optional[str] = None - """If specified, all file operations are made relative to root_dir.""" - selected_tools: Optional[List[str]] = None - """If provided, only provide the selected tools. Defaults to all.""" - - @root_validator - def validate_tools(cls, values: dict) -> dict: - selected_tools = values.get("selected_tools") or [] - for tool_name in selected_tools: - if tool_name not in _FILE_TOOLS: - raise ValueError( - f"File Tool of name {tool_name} not supported." - f" Permitted tools: {list(_FILE_TOOLS)}" - ) - return values - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - allowed_tools = self.selected_tools or _FILE_TOOLS.keys() - tools: List[BaseTool] = [] - for tool in allowed_tools: - tool_cls = _FILE_TOOLS[tool] - tools.append(tool_cls(root_dir=self.root_dir)) # type: ignore - return tools - - -__all__ = ["FileManagementToolkit"] diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file.py b/nextpy/ai/tools/toolkits/file_toolkit/file.py deleted file mode 100644 index adb87c2e..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file.py +++ /dev/null @@ -1,64 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Toolkit for interacting with the local filesystem.""" -from __future__ import annotations - -from typing import List, Optional - -from pydantic import root_validator - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.file_toolkit.file.copy import CopyFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.delete import DeleteFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.listdir import ListDirectoryTool -from nextpy.ai.tools.toolkits.file_toolkit.file.move import MoveFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.read import ReadFileTool -from nextpy.ai.tools.toolkits.file_toolkit.file.search import FileSearchTool -from nextpy.ai.tools.toolkits.file_toolkit.file.write import WriteFileTool - -_FILE_TOOLS = { - tool_cls.__fields__["name"].default: tool_cls - for tool_cls in [ - CopyFileTool, - DeleteFileTool, - FileSearchTool, - MoveFileTool, - ReadFileTool, - WriteFileTool, - ListDirectoryTool, - ] -} - - -class FileManagementToolkit(BaseToolkit): - """Toolkit for interacting with a Local Files.""" - - root_dir: Optional[str] = None - """If specified, all file operations are made relative to root_dir.""" - selected_tools: Optional[List[str]] = None - """If provided, only provide the selected tools. Defaults to all.""" - - @root_validator - def validate_tools(cls, values: dict) -> dict: - selected_tools = values.get("selected_tools") or [] - for tool_name in selected_tools: - if tool_name not in _FILE_TOOLS: - raise ValueError( - f"File Tool of name {tool_name} not supported." - f" Permitted tools: {list(_FILE_TOOLS)}" - ) - return values - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - allowed_tools = self.selected_tools or _FILE_TOOLS.keys() - tools: List[BaseTool] = [] - for tool in allowed_tools: - tool_cls = _FILE_TOOLS[tool] - tools.append(tool_cls(root_dir=self.root_dir)) # type: ignore - return tools - - -__all__ = ["FileManagementToolkit"] diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/__init__.py b/nextpy/ai/tools/toolkits/file_toolkit/file/__init__.py deleted file mode 100644 index 847433fd..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/copy.py b/nextpy/ai/tools/toolkits/file_toolkit/file/copy.py deleted file mode 100644 index 9bd12e7e..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/copy.py +++ /dev/null @@ -1,54 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import shutil -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class FileCopyInput(BaseModel): - """Input for CopyFileTool.""" - - source_path: str = Field(..., description="Path of the file to copy") - destination_path: str = Field(..., description="Path to save the copied file") - - -class CopyFileTool(BaseFileToolMixin, BaseTool): - name: str = "copy_file" - args_schema: Type[BaseModel] = FileCopyInput - description: str = "Create a copy of a file in a specified location" - - def run( - self, - source_path: str, - destination_path: str, - ) -> str: - try: - source_path_ = self.get_relative_path(source_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format( - arg_name="source_path", value=source_path - ) - try: - destination_path_ = self.get_relative_path(destination_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format( - arg_name="destination_path", value=destination_path - ) - try: - shutil.copy2(source_path_, destination_path_, follow_symlinks=False) - return f"File copied successfully from {source_path} to {destination_path}." - except Exception as e: - return "Error: " + str(e) - - async def _arun(self, source_path: str, destination_path: str) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/delete.py b/nextpy/ai/tools/toolkits/file_toolkit/file/delete.py deleted file mode 100644 index b617de77..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/delete.py +++ /dev/null @@ -1,49 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class FileDeleteInput(BaseModel): - """Input for DeleteFileTool.""" - - file_path: str = Field(..., description="Path of the file to delete") - - -class DeleteFileTool(BaseFileToolMixin, BaseTool): - name: str = "file_delete" - args_schema: Type[BaseModel] = FileDeleteInput - description: str = "Delete a file" - - def run( - self, - file_path: str, - ) -> str: - try: - file_path_ = self.get_relative_path(file_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path) - if not file_path_.exists(): - return f"Error: no such file or directory: {file_path}" - try: - os.remove(file_path_) - return f"File deleted successfully: {file_path}." - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - file_path: str, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/listdir.py b/nextpy/ai/tools/toolkits/file_toolkit/file/listdir.py deleted file mode 100644 index b8af5b03..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/listdir.py +++ /dev/null @@ -1,50 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import os -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class DirectoryListingInput(BaseModel): - """Input for ListDirectoryTool.""" - - dir_path: str = Field(default=".", description="Subdirectory to list.") - - -class ListDirectoryTool(BaseFileToolMixin, BaseTool): - name: str = "list_directory" - args_schema: Type[BaseModel] = DirectoryListingInput - description: str = "List files and directories in a specified folder" - - def run( - self, - dir_path: str = ".", - ) -> str: - try: - dir_path_ = self.get_relative_path(dir_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value=dir_path) - try: - entries = os.listdir(dir_path_) - if entries: - return "\n".join(entries) - else: - return f"No files found in directory {dir_path}" - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - dir_path: str, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/move.py b/nextpy/ai/tools/toolkits/file_toolkit/file/move.py deleted file mode 100644 index 97cbb29c..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/move.py +++ /dev/null @@ -1,61 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import shutil -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class FileMoveInput(BaseModel): - """Input for MoveFileTool.""" - - source_path: str = Field(..., description="Path of the file to move") - destination_path: str = Field(..., description="New path for the moved file") - - -class MoveFileTool(BaseFileToolMixin, BaseTool): - name: str = "move_file" - args_schema: Type[BaseModel] = FileMoveInput - description: str = "Move or rename a file from one location to another" - - def run( - self, - source_path: str, - destination_path: str, - ) -> str: - try: - source_path_ = self.get_relative_path(source_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format( - arg_name="source_path", value=source_path - ) - try: - destination_path_ = self.get_relative_path(destination_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format( - arg_name="destination_path_", value=destination_path_ - ) - if not source_path_.exists(): - return f"Error: no such file or directory {source_path}" - try: - # shutil.move expects str args in 3.8 - shutil.move(str(source_path_), destination_path_) - return f"File moved successfully from {source_path} to {destination_path}." - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - source_path: str, - destination_path: str, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/read.py b/nextpy/ai/tools/toolkits/file_toolkit/file/read.py deleted file mode 100644 index 0310b0fa..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/read.py +++ /dev/null @@ -1,49 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class ReadFileInput(BaseModel): - """Input for ReadFileTool.""" - - file_path: str = Field(..., description="name of file") - - -class ReadFileTool(BaseFileToolMixin, BaseTool): - name: str = "read_file" - args_schema: Type[BaseModel] = ReadFileInput - description: str = "Read file from disk" - - def run( - self, - file_path: str, - ) -> str: - try: - read_path = self.get_relative_path(file_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path) - if not read_path.exists(): - return f"Error: no such file or directory: {file_path}" - try: - with read_path.open("r", encoding="utf-8") as f: - content = f.read() - return content - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - file_path: str, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/search.py b/nextpy/ai/tools/toolkits/file_toolkit/file/search.py deleted file mode 100644 index 1845dd71..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/search.py +++ /dev/null @@ -1,67 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import fnmatch -import os -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class FileSearchInput(BaseModel): - """Input for FileSearchTool.""" - - dir_path: str = Field( - default=".", - description="Subdirectory to search in.", - ) - pattern: str = Field( - ..., - description="Unix shell regex, where * matches everything.", - ) - - -class FileSearchTool(BaseFileToolMixin, BaseTool): - name: str = "file_search" - args_schema: Type[BaseModel] = FileSearchInput - description: str = ( - "Recursively search for files in a subdirectory that match the regex pattern" - ) - - def run( - self, - pattern: str, - dir_path: str = ".", - ) -> str: - try: - dir_path_ = self.get_relative_path(dir_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value=dir_path) - matches = [] - try: - for root, _, filenames in os.walk(dir_path_): - for filename in fnmatch.filter(filenames, pattern): - absolute_path = os.path.join(root, filename) - relative_path = os.path.relpath(absolute_path, dir_path_) - matches.append(relative_path) - if matches: - return "\n".join(matches) - else: - return f"No files found for pattern {pattern} in directory {dir_path}" - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - dir_path: str, - pattern: str, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/utils.py b/nextpy/ai/tools/toolkits/file_toolkit/file/utils.py deleted file mode 100644 index f1da5e7a..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/utils.py +++ /dev/null @@ -1,57 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import sys -from pathlib import Path -from typing import Optional - -from pydantic import BaseModel - - -def is_relative_to(path: Path, root: Path) -> bool: - """Check if path is relative to root.""" - if sys.version_info >= (3, 9): - # No need for a try/except block in Python 3.8+. - return path.is_relative_to(root) - try: - path.relative_to(root) - return True - except ValueError: - return False - - -INVALID_PATH_TEMPLATE = ( - "Error: Access denied to {arg_name}: {value}." - " Permission granted exclusively to the current working directory" -) - - -class FileValidationError(ValueError): - """Error for paths outside the root directory.""" - - -class BaseFileToolMixin(BaseModel): - """Mixin for file system tools.""" - - root_dir: Optional[str] = None - """The final path will be chosen relative to root_dir if specified.""" - - def get_relative_path(self, file_path: str) -> Path: - """Get the relative path, returning an error if unsupported.""" - if self.root_dir is None: - return Path(file_path) - return get_validated_relative_path(Path(self.root_dir), file_path) - - -def get_validated_relative_path(root: Path, user_path: str) -> Path: - """Resolve a relative path, raising an error if not within the root directory.""" - # Note, this still permits symlinks from outside that point within the root. - # Further validation would be needed if those are to be disallowed. - root = root.resolve() - full_path = (root / user_path).resolve() - - if not is_relative_to(full_path, root): - raise FileValidationError( - f"Path {user_path} is outside of the allowed directory {root}" - ) - return full_path diff --git a/nextpy/ai/tools/toolkits/file_toolkit/file/write.py b/nextpy/ai/tools/toolkits/file_toolkit/file/write.py deleted file mode 100644 index 2bfaac8d..00000000 --- a/nextpy/ai/tools/toolkits/file_toolkit/file/write.py +++ /dev/null @@ -1,57 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.file_toolkit.file.utils import ( - INVALID_PATH_TEMPLATE, - BaseFileToolMixin, - FileValidationError, -) - - -class WriteFileInput(BaseModel): - """Input for WriteFileTool.""" - - file_path: str = Field(..., description="name of file") - text: str = Field(..., description="text to write to file") - append: bool = Field( - default=False, description="Whether to append to an existing file." - ) - - -class WriteFileTool(BaseFileToolMixin, BaseTool): - name: str = "write_file" - args_schema: Type[BaseModel] = WriteFileInput - description: str = "Write file to disk" - - def run( - self, - file_path: str, - text: str, - append: bool = False, - ) -> str: - try: - write_path = self.get_relative_path(file_path) - except FileValidationError: - return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path) - try: - write_path.parent.mkdir(exist_ok=True, parents=False) - mode = "a" if append else "w" - with write_path.open(mode, encoding="utf-8") as f: - f.write(text) - return f"File written successfully to {file_path}." - except Exception as e: - return "Error: " + str(e) - - async def _arun( - self, - file_path: str, - text: str, - append: bool = False, - ) -> str: - # TODO: Add aiofiles method - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail.py deleted file mode 100644 index beaa81d1..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail.py +++ /dev/null @@ -1,51 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from __future__ import annotations - -from typing import TYPE_CHECKING, List - -from pydantic import Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.create_draft import GmailCreateDraft -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.get_message import GmailGetMessage -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.get_thread import GmailGetThread -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.search import GmailSearch -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.send_message import GmailSendMessage -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.utils import build_resource_service - -if TYPE_CHECKING: - # This is for linting and IDE typehints - from googleapiclient.discovery import Resource -else: - try: - # We do this so pydantic can resolve the types when instantiating - from googleapiclient.discovery import Resource - except ImportError: - pass - - -SCOPES = ["https://mail.google.com/"] - - -class GmailToolkit(BaseToolkit): - """Toolkit for interacting with Gmail.""" - - api_resource: Resource = Field(default_factory=build_resource_service) - - class Config: - """Pydantic config.""" - - arbitrary_types_allowed = True - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - GmailCreateDraft(api_resource=self.api_resource), - GmailSendMessage(api_resource=self.api_resource), - GmailSearch(api_resource=self.api_resource), - GmailGetMessage(api_resource=self.api_resource), - GmailGetThread(api_resource=self.api_resource), - ] diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/base.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/base.py deleted file mode 100644 index 46a97707..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/base.py +++ /dev/null @@ -1,30 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Base class for Gmail tools.""" -from __future__ import annotations - -from typing import TYPE_CHECKING - -from pydantic import Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.utils import build_resource_service - -if TYPE_CHECKING: - # This is for linting and IDE typehints - from googleapiclient.discovery import Resource -else: - try: - # We do this so pydantic can resolve the types when instantiating - from googleapiclient.discovery import Resource - except ImportError: - pass - - -class GmailBaseTool(BaseTool): - api_resource: Resource = Field(default_factory=build_resource_service) - - @classmethod - def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool": - return cls(service=api_resource) diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/create_draft.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/create_draft.py deleted file mode 100644 index 216aef0a..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/create_draft.py +++ /dev/null @@ -1,94 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import base64 -from email.message import EmailMessage -from typing import List, Optional, Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.base import GmailBaseTool - - -class CreateDraftSchema(BaseModel): - message: str = Field( - ..., - description="The message to include in the draft.", - ) - to: List[str] = Field( - ..., - description="The list of recipients.", - ) - subject: str = Field( - ..., - description="The subject of the message.", - ) - cc: Optional[List[str]] = Field( - None, - description="The list of CC recipients.", - ) - bcc: Optional[List[str]] = Field( - None, - description="The list of BCC recipients.", - ) - - -class GmailCreateDraft(GmailBaseTool): - name: str = "create_gmail_draft" - description: str = ( - "Use this tool to create a draft email with the provided message fields." - ) - args_schema: Type[CreateDraftSchema] = CreateDraftSchema - - def _prepare_draft_message( - self, - message: str, - to: List[str], - subject: str, - cc: Optional[List[str]] = None, - bcc: Optional[List[str]] = None, - ) -> dict: - draft_message = EmailMessage() - draft_message.set_content(message) - - draft_message["To"] = ", ".join(to) - draft_message["Subject"] = subject - if cc is not None: - draft_message["Cc"] = ", ".join(cc) - - if bcc is not None: - draft_message["Bcc"] = ", ".join(bcc) - - encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode() - return {"message": {"raw": encoded_message}} - - def run( - self, - message: str, - to: List[str], - subject: str, - cc: Optional[List[str]] = None, - bcc: Optional[List[str]] = None, - ) -> str: - try: - create_message = self._prepare_draft_message(message, to, subject, cc, bcc) - draft = ( - self.api_resource.users() - .drafts() - .create(userId="me", body=create_message) - .execute() - ) - output = f'Draft created. Draft Id: {draft["id"]}' - return output - except Exception as e: - raise Exception(f"An error occurred: {e}") - - async def _arun( - self, - message: str, - to: List[str], - subject: str, - cc: Optional[List[str]] = None, - bcc: Optional[List[str]] = None, - ) -> str: - raise NotImplementedError(f"The tool {self.name} does not support async yet.") diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_message.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_message.py deleted file mode 100644 index d5536b56..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_message.py +++ /dev/null @@ -1,65 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import base64 -import email -from typing import Dict, Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.base import GmailBaseTool -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.utils import clean_email_body - - -class SearchArgsSchema(BaseModel): - message_id: str = Field( - ..., - description="The unique ID of the email message, retrieved from a search.", - ) - - -class GmailGetMessage(GmailBaseTool): - name: str = "get_gmail_message" - description: str = ( - "Use this tool to fetch an email by message ID." - " Returns the thread ID, snipet, body, subject, and sender." - ) - args_schema: Type[SearchArgsSchema] = SearchArgsSchema - - def run( - self, - message_id: str, - ) -> Dict: - """Run the tool.""" - query = ( - self.api_resource.users() - .messages() - .get(userId="me", format="raw", id=message_id) - ) - message_data = query.execute() - raw_message = base64.urlsafe_b64decode(message_data["raw"]) - - email_msg = email.message_from_bytes(raw_message) - - subject = email_msg["Subject"] - sender = email_msg["From"] - - message_body = email_msg.get_payload() - - body = clean_email_body(message_body) - - return { - "id": message_id, - "threadId": message_data["threadId"], - "snippet": message_data["snippet"], - "body": body, - "subject": subject, - "sender": sender, - } - - async def _arun( - self, - message_id: str, - ) -> Dict: - """Run the tool.""" - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_thread.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_thread.py deleted file mode 100644 index 8994f2ab..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/get_thread.py +++ /dev/null @@ -1,52 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Dict, Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.base import GmailBaseTool - - -class GetThreadSchema(BaseModel): - # From https://support.google.com/mail/answer/7190?hl=en - thread_id: str = Field( - ..., - description="The thread ID.", - ) - - -class GmailGetThread(GmailBaseTool): - name: str = "get_gmail_thread" - description: str = ( - "Use this tool to search for email messages." - " The input must be a valid Gmail query." - " The output is a JSON list of messages." - ) - args_schema: Type[GetThreadSchema] = GetThreadSchema - - def run( - self, - thread_id: str, - ) -> Dict: - """Run the tool.""" - query = self.api_resource.users().threads().get(userId="me", id=thread_id) - thread_data = query.execute() - if not isinstance(thread_data, dict): - raise ValueError("The output of the query must be a list.") - messages = thread_data["messages"] - thread_data["messages"] = [] - keys_to_keep = ["id", "snippet", "snippet"] - # TODO: Parse body. - for message in messages: - thread_data["messages"].append( - {k: message[k] for k in keys_to_keep if k in message} - ) - return thread_data - - async def _arun( - self, - thread_id: str, - ) -> Dict: - """Run the tool.""" - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/search.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/search.py deleted file mode 100644 index cdd9371e..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/search.py +++ /dev/null @@ -1,137 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import base64 -import email -from enum import Enum -from typing import Any, Dict, List, Type - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.base import GmailBaseTool -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.utils import clean_email_body - - -class Resource(str, Enum): - """Enumerator of Resources to search.""" - - THREADS = "threads" - MESSAGES = "messages" - - -class SearchArgsSchema(BaseModel): - # From https://support.google.com/mail/answer/7190?hl=en - query: str = Field( - ..., - description="The Gmail query. Example filters include from:sender," - " to:recipient, subject:subject, -filtered_term," - " in:folder, is:important|read|starred, after:year/mo/date, " - "before:year/mo/date, label:label_name" - ' "exact phrase".' - " Search newer/older than using d (day), m (month), and y (year): " - "newer_than:2d, older_than:1y." - " Attachments with extension example: filename:pdf. Multiple term" - " matching example: from:amy OR from:david.", - ) - resource: Resource = Field( - default=Resource.MESSAGES, - description="Whether to search for threads or messages.", - ) - max_results: int = Field( - default=10, - description="The maximum number of results to return.", - ) - - -class GmailSearch(GmailBaseTool): - name: str = "search_gmail" - description: str = ( - "Use this tool to search for email messages or threads." - " The input must be a valid Gmail query." - " The output is a JSON list of the requested resource." - ) - args_schema: Type[SearchArgsSchema] = SearchArgsSchema - - def _parse_threads(self, threads: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - # Add the thread message snippets to the thread results - results = [] - for thread in threads: - thread_id = thread["id"] - thread_data = ( - self.api_resource.users() - .threads() - .get(userId="me", id=thread_id) - .execute() - ) - messages = thread_data["messages"] - thread["messages"] = [] - for message in messages: - snippet = message["snippet"] - thread["messages"].append({"snippet": snippet, "id": message["id"]}) - results.append(thread) - - return results - - def _parse_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - results = [] - for message in messages: - message_id = message["id"] - message_data = ( - self.api_resource.users() - .messages() - .get(userId="me", format="raw", id=message_id) - .execute() - ) - - raw_message = base64.urlsafe_b64decode(message_data["raw"]) - - email_msg = email.message_from_bytes(raw_message) - - subject = email_msg["Subject"] - sender = email_msg["From"] - - message_body = email_msg.get_payload() - - body = clean_email_body(message_body) - - results.append( - { - "id": message["id"], - "threadId": message_data["threadId"], - "snippet": message_data["snippet"], - "body": body, - "subject": subject, - "sender": sender, - } - ) - return results - - def run( - self, - query: str, - resource: Resource = Resource.MESSAGES, - max_results: int = 10, - ) -> List[Dict[str, Any]]: - """Run the tool.""" - results = ( - self.api_resource.users() - .messages() - .list(userId="me", q=query, maxResults=max_results) - .execute() - .get(resource.value, []) - ) - if resource == Resource.THREADS: - return self._parse_threads(results) - elif resource == Resource.MESSAGES: - return self._parse_messages(results) - else: - raise NotImplementedError(f"Resource of type {resource} not implemented.") - - async def _arun( - self, - query: str, - resource: Resource = Resource.MESSAGES, - max_results: int = 10, - ) -> List[Dict[str, Any]]: - """Run the tool.""" - raise NotImplementedError diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/send_message.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/send_message.py deleted file mode 100644 index 4f43db28..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/send_message.py +++ /dev/null @@ -1,97 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Send Gmail messages.""" -import base64 -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from typing import Any, Dict, List, Optional, Union - -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.gmail_toolkit.gmail.base import GmailBaseTool - - -class SendMessageSchema(BaseModel): - message: str = Field( - ..., - description="The message to send.", - ) - to: Union[str, List[str]] = Field( - ..., - description="The list of recipients.", - ) - subject: str = Field( - ..., - description="The subject of the message.", - ) - cc: Optional[Union[str, List[str]]] = Field( - None, - description="The list of CC recipients.", - ) - bcc: Optional[Union[str, List[str]]] = Field( - None, - description="The list of BCC recipients.", - ) - - -class GmailSendMessage(GmailBaseTool): - name: str = "send_gmail_message" - description: str = ( - "Use this tool to send email messages." " The input is the message, recipents" - ) - - def _prepare_message( - self, - message: str, - to: Union[str, List[str]], - subject: str, - cc: Optional[Union[str, List[str]]] = None, - bcc: Optional[Union[str, List[str]]] = None, - ) -> Dict[str, Any]: - """Create a message for an email.""" - mime_message = MIMEMultipart() - mime_message.attach(MIMEText(message, "html")) - - mime_message["To"] = ", ".join(to if isinstance(to, list) else [to]) - mime_message["Subject"] = subject - if cc is not None: - mime_message["Cc"] = ", ".join(cc if isinstance(cc, list) else [cc]) - - if bcc is not None: - mime_message["Bcc"] = ", ".join(bcc if isinstance(bcc, list) else [bcc]) - - encoded_message = base64.urlsafe_b64encode(mime_message.as_bytes()).decode() - return {"raw": encoded_message} - - def run( - self, - message: str, - to: Union[str, List[str]], - subject: str, - cc: Optional[Union[str, List[str]]] = None, - bcc: Optional[Union[str, List[str]]] = None, - ) -> str: - """Run the tool.""" - try: - create_message = self._prepare_message(message, to, subject, cc=cc, bcc=bcc) - send_message = ( - self.api_resource.users() - .messages() - .send(userId="me", body=create_message) - ) - sent_message = send_message.execute() - return f'Message sent. Message Id: {sent_message["id"]}' - except Exception as error: - raise Exception(f"An error occurred: {error}") - - async def _arun( - self, - message: str, - to: Union[str, List[str]], - subject: str, - cc: Optional[Union[str, List[str]]] = None, - bcc: Optional[Union[str, List[str]]] = None, - ) -> str: - """Run the tool asynchronously.""" - raise NotImplementedError(f"The tool {self.name} does not support async yet.") diff --git a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/utils.py b/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/utils.py deleted file mode 100644 index f2f5c505..00000000 --- a/nextpy/ai/tools/toolkits/gmail_toolkit/gmail/utils.py +++ /dev/null @@ -1,135 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Gmail tool utils.""" -from __future__ import annotations - -import logging -import os -from typing import TYPE_CHECKING, List, Optional, Tuple - -if TYPE_CHECKING: - from google.auth.transport.requests import Request - from google.oauth2.credentials import Credentials - from google_auth_oauthlib.flow import InstalledAppFlow - from googleapiclient.discovery import Resource - from googleapiclient.discovery import build as build_resource - -logger = logging.getLogger(__name__) - - -def import_google() -> Tuple[Request, Credentials]: - """Import google libraries. - - Returns: - Tuple[Request, Credentials]: Request and Credentials classes. - """ - # google-auth-httplib2 - try: - from google.auth.transport.requests import Request # noqa: F401 - from google.oauth2.credentials import Credentials # noqa: F401 - except ImportError: - raise ImportError( - "You need to install google-auth-httplib2 to use this toolkit. " - "Try running pip install --upgrade google-auth-httplib2" - ) - return Request, Credentials - - -def import_installed_app_flow() -> InstalledAppFlow: - """Import InstalledAppFlow class. - - Returns: - InstalledAppFlow: InstalledAppFlow class. - """ - try: - from google_auth_oauthlib.flow import InstalledAppFlow - except ImportError: - raise ValueError( - "You need to install google-auth-oauthlib to use this toolkit. " - "Try running pip install --upgrade google-auth-oauthlib" - ) - return InstalledAppFlow - - -def import_googleapiclient_resource_builder() -> build_resource: - """Import googleapiclient.discovery.build function. - - Returns: - build_resource: googleapiclient.discovery.build function. - """ - try: - from googleapiclient.discovery import build - except ImportError: - raise ValueError( - "You need to install googleapiclient to use this toolkit. " - "Try running pip install --upgrade google-api-python-client" - ) - return build - - -DEFAULT_SCOPES = ["https://mail.google.com/"] -DEFAULT_CREDS_TOKEN_FILE = "token.json" -DEFAULT_CLIENT_SECRETS_FILE = "credentials.json" - - -def get_gmail_credentials( - token_file: Optional[str] = None, - client_secrets_file: Optional[str] = None, - scopes: Optional[List[str]] = None, -) -> Credentials: - """Get credentials.""" - # From https://developers.google.com/gmail/api/quickstart/python - Request, Credentials = import_google() - InstalledAppFlow = import_installed_app_flow() - creds = None - scopes = scopes or DEFAULT_SCOPES - token_file = token_file or DEFAULT_CREDS_TOKEN_FILE - client_secrets_file = client_secrets_file or DEFAULT_CLIENT_SECRETS_FILE - # The file token.json stores the user's access and refresh tokens, and is - # created automatically when the authorization flow completes for the first - # time. - if os.path.exists(token_file): - creds = Credentials.from_authorized_user_file(token_file, scopes) - # If there are no (valid) credentials available, let the user log in. - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - # https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application # noqa - flow = InstalledAppFlow.from_client_secrets_file( - client_secrets_file, scopes - ) - creds = flow.run_local_server(port=0) - # Save the credentials for the next run - with open(token_file, "w") as token: - token.write(creds.to_json()) - return creds - - -def build_resource_service( - credentials: Optional[Credentials] = None, - service_name: str = "gmail", - service_version: str = "v1", -) -> Resource: - """Build a Gmail service.""" - credentials = credentials or get_gmail_credentials() - builder = import_googleapiclient_resource_builder() - return builder(service_name, service_version, credentials=credentials) - - -def clean_email_body(body: str) -> str: - """Clean email body.""" - try: - from bs4 import BeautifulSoup - - try: - soup = BeautifulSoup(str(body), "html.parser") - body = soup.get_text() - return str(body) - except Exception as e: - logger.error(e) - return str(body) - except ImportError: - logger.warning("BeautifulSoup not installed. Skipping cleaning.") - return str(body) diff --git a/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar.py b/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar.py deleted file mode 100644 index d039567f..00000000 --- a/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar.py +++ /dev/null @@ -1,47 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Google Calendar toolkit.""" - -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.google_calendar_toolkit.google_calendar.base import ( - CreateEvent, - GetDate, - LoadData, -) - -SCOPES = ["https://www.googleapis.com/auth/calendar"] - - -class GoogleCalendarToolkit(BaseToolkit): - """Google Calendar toolkit. - - Currently a simple wrapper around the data loader. - TODO: add more methods to the Google Calendar toolkit. - - """ - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - LoadData(), - CreateEvent(), - GetDate(), - ] diff --git a/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar/base.py b/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar/base.py deleted file mode 100644 index 9a2a9612..00000000 --- a/nextpy/ai/tools/toolkits/google_calendar_toolkit/google_calendar/base.py +++ /dev/null @@ -1,272 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import datetime -import os -from typing import Any, List, Optional, Type, Union - -from pydantic import BaseModel, Field - -from nextpy.ai.schema import Document -from nextpy.ai.tools.basetool import BaseTool - -SCOPES = ["https://www.googleapis.com/auth/calendar"] - - -class GoogleCalendar(BaseTool): - """Google Calendar tool spec. - - Currently a simple wrapper around the data loader. - TODO: add more methods to the Google Calendar tool. - - """ - - -def _get_credentials(self) -> Any: - """Get valid user credentials from storage. - - The file token.json stores the user's access and refresh tokens, and is - created automatically when the authorization flow completes for the first - time. - - Returns: - Credentials, the obtained credential. - """ - from google.auth.transport.requests import Request - from google.oauth2.credentials import Credentials - from google_auth_oauthlib.flow import InstalledAppFlow - - creds = None - if os.path.exists("token.json"): - creds = Credentials.from_authorized_user_file("token.json", SCOPES) - # If there are no (valid) credentials available, let the user log in. - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES) - creds = flow.run_local_server(port=8080) - # Save the credentials for the next run - with open("token.json", "w") as token: - token.write(creds.to_json()) - - return creds - - -class LoadDataArgsSchema(BaseModel): - number_of_results: Optional[int] = Field( - ..., - description=" Information about the parameter. ", - ) - start_date: Optional[Union[str, datetime.date]] = Field( - ..., - description=" Information about the parameter. ", - ) - - -class LoadData(GoogleCalendar): - name: str = "Load Data" - description: str = "Load data from user's calendar." - args_schema: Type[LoadDataArgsSchema] = LoadDataArgsSchema - - def load_data( - self, - number_of_results: Optional[int] = 100, - start_date: Optional[Union[str, datetime.date]] = None, - ) -> List[Document]: - """Load data from user's calendar. - - Args: - number_of_results (Optional[int]): the number of events to return. Defaults to 100. - start_date (Optional[Union[str, datetime.date]]): the start date to return events from in date isoformat. Defaults to today. - """ - from googleapiclient.discovery import build - - credentials = _get_credentials() - service = build("calendar", "v3", credentials=credentials) - - if start_date is None: - start_date = datetime.date.today() - elif isinstance(start_date, str): - start_date = datetime.date.fromisoformat(start_date) - - start_datetime = datetime.datetime.combine(start_date, datetime.time.min) - start_datetime_utc = start_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ") - - events_result = ( - service.events() - .list( - calendarId="primary", - timeMin=start_datetime_utc, - maxResults=number_of_results, - singleEvents=True, - orderBy="startTime", - ) - .execute() - ) - - events = events_result.get("items", []) - - if not events: - return [] - - results = [] - for event in events: - if "dateTime" in event["start"]: - start_time = event["start"]["dateTime"] - else: - start_time = event["start"]["date"] - - if "dateTime" in event["end"]: - end_time = event["end"]["dateTime"] - else: - end_time = event["end"]["date"] - - event_string = f"Status: {event['status']}, " - event_string += f"Summary: {event['summary']}, " - event_string += f"Start time: {start_time}, " - event_string += f"End time: {end_time}, " - - organizer = event.get("organizer", {}) - display_name = organizer.get("displayName", "N/A") - email = organizer.get("email", "N/A") - if display_name != "N/A": - event_string += f"Organizer: {display_name} ({email})" - else: - event_string += f"Organizer: {email}" - - results.append(Document(text=event_string)) - - return results - - def run( - self, - number_of_results: Optional[int] = 100, - start_date: Optional[Union[str, datetime.date]] = None, - ) -> str: - try: - return self.load_data( - self, number_of_results=number_of_results, start_date=start_date - ) - except Exception as e: - raise Exception(f"An error occurred: {e}") - - -class CreateEventArgsSchema(BaseModel): - title: Optional[str] = Field( - ..., - description=" Information about the parameter. ", - ) - description: Optional[str] = Field( - ..., - description=" Information about the parameter. ", - ) - location: Optional[str] = Field( - ..., - description=" Information about the parameter. ", - ) - start_datetime: Optional[Union[str, datetime.datetime]] = Field( - ..., - description=" Information about the parameter. ", - ) - end_datetime: Optional[Union[str, datetime.datetime]] = Field( - ..., - description=" Information about the parameter. ", - ) - attendees: Optional[List[str]] = Field( - ..., - description=" Information about the parameter. ", - ) - - -class CreateEvent(GoogleCalendar): - name: str = "Create Event" - description: str = "Create an event on the users calendar." - args_schema: Type[CreateEventArgsSchema] = CreateEventArgsSchema - - def create_event( - self, - title: Optional[str] = None, - description: Optional[str] = None, - location: Optional[str] = None, - start_datetime: Optional[Union[str, datetime.datetime]] = None, - end_datetime: Optional[Union[str, datetime.datetime]] = None, - attendees: Optional[List[str]] = None, - ) -> str: - """Create an event on the users calendar. - - Args: - title (Optional[str]): The title for the event - description (Optional[str]): The description for the event - location (Optional[str]): The location for the event - start_datetime Optional[Union[str, datetime.datetime]]: The start datetime for the event - end_datetime Optional[Union[str, datetime.datetime]]: The end datetime for the event - attendees Optional[List[str]]: A list of email address to invite to the event - """ - from googleapiclient.discovery import build - - credentials = _get_credentials() - service = build("calendar", "v3", credentials=credentials) - - attendees_list = [] - for attendee in attendees: - attendees_list.append({"email": attendee}) - start_time = ( - datetime.datetime.strptime(start_datetime, "%Y-%m-%dT%H:%M:%S") - .astimezone() - .strftime("%Y-%m-%dT%H:%M:%S.%f%z") - ) - end_time = ( - datetime.datetime.strptime(end_datetime, "%Y-%m-%dT%H:%M:%S") - .astimezone() - .strftime("%Y-%m-%dT%H:%M:%S.%f%z") - ) - - event = { - "summary": title, - "location": location, - "description": description, - "start": { - "dateTime": start_time, - }, - "end": { - "dateTime": end_time, - }, - "attendees": attendees_list, - } - event = service.events().insert(calendarId="primary", body=event).execute() - return "Your calendar event has been created successfully! You can move on to the next step." - - def run( - self, - title: Optional[str] = None, - description: Optional[str] = None, - location: Optional[str] = None, - start_datetime: Optional[Union[str, datetime.datetime]] = None, - end_datetime: Optional[Union[str, datetime.datetime]] = None, - attendees: Optional[List[str]] = None, - ) -> str: - try: - return self.create_event( - self, - title=title, - description=description, - location=location, - start_datetime=start_datetime, - end_datetime=end_datetime, - attendees=attendees, - ) - except Exception as e: - raise Exception(f"An error occurred: {e}") - - -class GetDate(GoogleCalendar): - name: str = "Get date" - description: str = "A function to return todays date. Call this before any other functions if you are unaware of the date." - - def get_date(self): - """A function to return todays date. Call this before any other functions if you are unaware of the date.""" - return datetime.date.today() - - def run(self): - return self.get_date() diff --git a/nextpy/ai/tools/toolkits/json_toolkit/json/tool.py b/nextpy/ai/tools/toolkits/json_toolkit/json/tool.py deleted file mode 100644 index 048b513b..00000000 --- a/nextpy/ai/tools/toolkits/json_toolkit/json/tool.py +++ /dev/null @@ -1,127 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for working with JSON specs.""" -from __future__ import annotations - -import json -import re -from pathlib import Path -from typing import Dict, List, Union - -from pydantic import BaseModel - -from nextpy.ai.tools.basetool import BaseTool - - -def _parse_input(text: str) -> List[Union[str, int]]: - """Parse input of the form data["key1"][0]["key2"] into a list of keys.""" - _res = re.findall(r"\[.*?]", text) - # strip the brackets and quotes, convert to int if possible - res = [i[1:-1].replace('"', "") for i in _res] - res = [int(i) if i.isdigit() else i for i in res] - return res - - -class JsonSpec(BaseModel): - """Base class for JSON spec.""" - - dict_: Dict - max_value_length: int = 200 - - @classmethod - def from_file(cls, path: Path) -> JsonSpec: - """Create a JsonSpec from a file.""" - if not path.exists(): - raise FileNotFoundError(f"File not found: {path}") - dict_ = json.loads(path.read_text()) - return cls(dict_=dict_) - - def keys(self, text: str) -> str: - """Return the keys of the dict at the given path. - - Args: - text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). - """ - try: - items = _parse_input(text) - val = self.dict_ - for i in items: - if i: - val = val[i] - if not isinstance(val, dict): - raise ValueError( - f"Value at path `{text}` is not a dict, get the value directly." - ) - return str(list(val.keys())) - except Exception as e: - return repr(e) - - def value(self, text: str) -> str: - """Return the value of the dict at the given path. - - Args: - text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). - """ - try: - items = _parse_input(text) - val = self.dict_ - for i in items: - val = val[i] - - if isinstance(val, dict) and len(str(val)) > self.max_value_length: - return "Value is a large dictionary, should explore its keys directly" - str_val = str(val) - if len(str_val) > self.max_value_length: - str_val = str_val[: self.max_value_length] + "..." - return str_val - except Exception as e: - return repr(e) - - -class JsonListKeysTool(BaseTool): - """Tool for listing keys in a JSON spec.""" - - name = "json_spec_list_keys" - description = """ - Can be used to list all keys at a given path. - Before calling this you should be SURE that the path to this exists. - The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). - """ - spec: JsonSpec - - def run( - self, - tool_input: str, - ) -> str: - return self.spec.keys(tool_input) - - async def _arun( - self, - tool_input: str, - ) -> str: - return self.run(tool_input) - - -class JsonGetValueTool(BaseTool): - """Tool for getting a value in a JSON spec.""" - - name = "json_spec_get_value" - description = """ - Can be used to see value in string format at a given path. - Before calling this you should be SURE that the path to this exists. - The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). - """ - spec: JsonSpec - - def run( - self, - tool_input: str, - ) -> str: - return self.spec.value(tool_input) - - async def _arun( - self, - tool_input: str, - ) -> str: - return self.run(tool_input) diff --git a/nextpy/ai/tools/toolkits/json_toolkit/json_python.py b/nextpy/ai/tools/toolkits/json_toolkit/json_python.py deleted file mode 100644 index f56e6db5..00000000 --- a/nextpy/ai/tools/toolkits/json_toolkit/json_python.py +++ /dev/null @@ -1,28 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Toolkit for interacting with a JSON spec.""" -from __future__ import annotations - -from typing import List - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.json_toolkit.json.tool import ( - JsonGetValueTool, - JsonListKeysTool, - JsonSpec, -) - - -class JsonToolkit(BaseToolkit): - """Toolkit for interacting with a JSON spec.""" - - spec: JsonSpec - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - JsonListKeysTool(spec=self.spec), - JsonGetValueTool(spec=self.spec), - ] diff --git a/nextpy/ai/tools/toolkits/notion_toolkit/notion.py b/nextpy/ai/tools/toolkits/notion_toolkit/notion.py deleted file mode 100644 index eb4e0b78..00000000 --- a/nextpy/ai/tools/toolkits/notion_toolkit/notion.py +++ /dev/null @@ -1,30 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Notion tool spec.""" - -from typing import List, Optional - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.notion_toolkit.notion.base import LoadData, SearchData - -SEARCH_URL = "https://api.notion.com/v1/search" - - -class NotionToolkit(BaseToolkit): - """Notion tool spec. - - Currently a simple wrapper around the data loader. - TODO: add more methods to the Notion spec. - - """ - - integration_token: Optional[str] = None - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - LoadData(integration_token=self.integration_token), - SearchData(integration_token=self.integration_token), - ] diff --git a/nextpy/ai/tools/toolkits/notion_toolkit/notion/base.py b/nextpy/ai/tools/toolkits/notion_toolkit/notion/base.py deleted file mode 100644 index 1b4efb15..00000000 --- a/nextpy/ai/tools/toolkits/notion_toolkit/notion/base.py +++ /dev/null @@ -1,147 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Notion tool spec.""" - -# from llama_index.tools.tool_spec.base import BaseToolSpec -from typing import Any, Dict, List, Optional, Type - -import requests -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.notion_toolkit.notion.utils import NotionPageReader - -SEARCH_URL = "https://api.notion.com/v1/search" - - -class NotionLoadDataSchema(BaseModel): - """Notion load data schema.""" - - page_ids: Optional[List[str]] = None - database_id: Optional[str] = None - - -class NotionSearchDataSchema(BaseModel): - """Notion search data schema.""" - - query: str - direction: Optional[str] = None - timestamp: Optional[str] = None - value: Optional[str] = None - property: Optional[str] = None - page_size: int = 100 - - -class NotionBase(BaseTool): - def __init__(self, integration_token: Optional[str] = None) -> None: - """Initialize with parameters.""" - self.reader = NotionPageReader(integration_token=integration_token) - - -class LoadDataArgsSchema(BaseModel): - page_ids: Optional[List[str]] = Field( - ..., description="******Provide Description About PageId*******" - ) - database_id: Optional[str] = Field( - ..., description="******Description about this Parameter********" - ) - - -class SearchDataArgsSchema(BaseModel): - query: str = Field(..., description="Info about parameter") - direction: Optional[str] = Field(..., description="Info about parameter") - timestamp: Optional[str] = Field(..., description="Info about parameter") - value: Optional[str] = Field(..., description="Info about parameter") - property: Optional[str] = Field(..., description="Info about parameter") - page_size: int = Field(..., description="Info about parameter") - - -class LoadData(NotionBase): - name: str = "Load Data" - description: str = "Loads content from a set of page ids or a database id." - args_schema: Type[LoadDataArgsSchema] = LoadDataArgsSchema - - def load_data( - self, page_ids: Optional[List[str]] = None, database_id: Optional[str] = None - ) -> str: - """Loads content from a set of page ids or a database id. - - Don't use this endpoint if you don't know the page ids or database id. - - """ - page_ids = page_ids or [] - docs = self.reader.load_data(page_ids=page_ids, database_id=database_id) - return "\n".join([doc.get_content() for doc in docs]) - - def run( - self, page_ids: Optional[List[str]] = None, database_id: Optional[str] = None - ) -> str: - try: - return self.load_data(self, page_ids=page_ids, database_id=database_id) - except Exception as e: - return e - - -class SearchData(NotionBase): - name: str = "Search Data" - description: str = "Search a list of relevant pages.Contains metadata for each page (but not the page content)" - args_schema: Type[SearchDataArgsSchema] = SearchDataArgsSchema - - def search_data( - self, - query: str, - direction: Optional[str] = None, - timestamp: Optional[str] = None, - value: Optional[str] = None, - property: Optional[str] = None, - page_size: int = 100, - ) -> str: - """Search a list of relevant pages. - - Contains metadata for each page (but not the page content). - - """ - payload: Dict[str, Any] = { - "query": query, - "page_size": page_size, - } - if direction is not None or timestamp is not None: - payload["sort"] = {} - if direction is not None: - payload["sort"]["direction"] = direction - if timestamp is not None: - payload["sort"]["timestamp"] = timestamp - - if value is not None or property is not None: - payload["filter"] = {} - if value is not None: - payload["filter"]["value"] = value - if property is not None: - payload["filter"]["property"] = property - - response = requests.post(SEARCH_URL, json=payload, headers=self.reader.headers) - response_json = response.json() - response_results = response_json["results"] - return response_results - - def run( - self, - query: str, - direction: Optional[str] = None, - timestamp: Optional[str] = None, - value: Optional[str] = None, - property: Optional[str] = None, - page_size: int = 100, - ) -> str: - try: - return self.search_data( - query=query, - direction=direction, - timestamp=timestamp, - value=value, - property=property, - page_size=page_size, - ) - except Exception as e: - return e diff --git a/nextpy/ai/tools/toolkits/notion_toolkit/notion/utils.py b/nextpy/ai/tools/toolkits/notion_toolkit/notion/utils.py deleted file mode 100644 index fdb169bc..00000000 --- a/nextpy/ai/tools/toolkits/notion_toolkit/notion/utils.py +++ /dev/null @@ -1,171 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Notion reader.""" -import logging -import os -from typing import Any, Dict, List, Optional - -import requests # type: ignore - -from nextpy.ai.schema import Document - -INTEGRATION_TOKEN_NAME = "NOTION_INTEGRATION_TOKEN" -BLOCK_CHILD_URL_TMPL = "https://api.notion.com/v1/blocks/{block_id}/children" -DATABASE_URL_TMPL = "https://api.notion.com/v1/databases/{database_id}/query" -SEARCH_URL = "https://api.notion.com/v1/search" - -logger = logging.getLogger(__name__) - - -# TODO: Notion DB reader coming soon! -class NotionPageReader: - """Notion Page reader. - - Reads a set of Notion pages. - - Args: - integration_token (str): Notion integration token. - - """ - - def __init__(self, integration_token: Optional[str] = None) -> None: - """Initialize with parameters.""" - if integration_token is None: - integration_token = os.getenv(INTEGRATION_TOKEN_NAME) - if integration_token is None: - raise ValueError( - "Must specify `integration_token` or set environment " - "variable `NOTION_INTEGRATION_TOKEN`." - ) - self.token = integration_token - self.headers = { - "Authorization": "Bearer " + self.token, - "Content-Type": "application/json", - "Notion-Version": "2022-06-28", - } - - def _read_block(self, block_id: str, num_tabs: int = 0) -> str: - """Read a block.""" - done = False - result_lines_arr = [] - cur_block_id = block_id - while not done: - block_url = BLOCK_CHILD_URL_TMPL.format(block_id=cur_block_id) - query_dict: Dict[str, Any] = {} - - res = requests.request( - "GET", block_url, headers=self.headers, json=query_dict - ) - data = res.json() - - for result in data["results"]: - result_type = result["type"] - result_obj = result[result_type] - - cur_result_text_arr = [] - if "rich_text" in result_obj: - for rich_text in result_obj["rich_text"]: - # skip if doesn't have text object - if "text" in rich_text: - text = rich_text["text"]["content"] - prefix = "\t" * num_tabs - cur_result_text_arr.append(prefix + text) - - result_block_id = result["id"] - has_children = result["has_children"] - if has_children: - children_text = self._read_block( - result_block_id, num_tabs=num_tabs + 1 - ) - cur_result_text_arr.append(children_text) - - cur_result_text = "\n".join(cur_result_text_arr) - result_lines_arr.append(cur_result_text) - - if data["next_cursor"] is None: - done = True - break - else: - cur_block_id = data["next_cursor"] - - result_lines = "\n".join(result_lines_arr) - return result_lines - - def read_page(self, page_id: str) -> str: - """Read a page.""" - return self._read_block(page_id) - - def query_database( - self, database_id: str, query_dict: Dict[str, Any] = {} - ) -> List[str]: - """Get all the pages from a Notion database.""" - res = requests.post( - DATABASE_URL_TMPL.format(database_id=database_id), - headers=self.headers, - json=query_dict, - ) - data = res.json() - page_ids = [] - for result in data["results"]: - page_id = result["id"] - page_ids.append(page_id) - - return page_ids - - def search(self, query: str) -> List[str]: - """Search Notion page given a text query.""" - done = False - next_cursor: Optional[str] = None - page_ids = [] - while not done: - query_dict = { - "query": query, - } - if next_cursor is not None: - query_dict["start_cursor"] = next_cursor - res = requests.post(SEARCH_URL, headers=self.headers, json=query_dict) - data = res.json() - for result in data["results"]: - page_id = result["id"] - page_ids.append(page_id) - - if data["next_cursor"] is None: - done = True - break - else: - next_cursor = data["next_cursor"] - return page_ids - - def load_data( - self, page_ids: List[str] = [], database_id: Optional[str] = None - ) -> List[Document]: - """Load data from the input directory. - - Args: - page_ids (List[str]): List of page ids to load. - - Returns: - List[Document]: List of documents. - - """ - if not page_ids and not database_id: - raise ValueError("Must specify either `page_ids` or `database_id`.") - docs = [] - if database_id is not None: - # get all the pages in the database - page_ids = self.query_database(database_id) - for page_id in page_ids: - page_text = self.read_page(page_id) - docs.append(Document(text=page_text, metadata={"page_id": page_id})) - else: - for page_id in page_ids: - page_text = self.read_page(page_id) - docs.append(Document(text=page_text, metadata={"page_id": page_id})) - - return docs - - -if __name__ == "__main__": - reader = NotionPageReader() - logger.info(reader.search("What I")) diff --git a/nextpy/ai/tools/toolkits/openapi_toolkit/base.py b/nextpy/ai/tools/toolkits/openapi_toolkit/base.py deleted file mode 100644 index 3ddbc579..00000000 --- a/nextpy/ai/tools/toolkits/openapi_toolkit/base.py +++ /dev/null @@ -1,586 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Pydantic models for parsing an OpenAPI spec.""" -import logging -from enum import Enum -from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union - -from openapi_schema_pydantic import MediaType, Parameter, Reference, RequestBody, Schema -from pydantic import BaseModel, Field - -from nextpy.ai.tools.toolkits.openapi_toolkit.utils import HTTPVerb, OpenAPISpec - -logger = logging.getLogger(__name__) -PRIMITIVE_TYPES = { - "integer": int, - "number": float, - "string": str, - "boolean": bool, - "array": List, - "object": Dict, - "null": None, -} - - -# See https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.1.0.md#parameterIn -# for more info. -class APIPropertyLocation(Enum): - """The location of the property.""" - - QUERY = "query" - PATH = "path" - HEADER = "header" - COOKIE = "cookie" # Not yet supported - - @classmethod - def from_str(cls, location: str) -> "APIPropertyLocation": - """Parse an APIPropertyLocation.""" - try: - return cls(location) - except ValueError: - raise ValueError( - f"Invalid APIPropertyLocation. Valid values are {cls.__members__}" - ) - - -_SUPPORTED_MEDIA_TYPES = ("application/json",) - -SUPPORTED_LOCATIONS = { - APIPropertyLocation.QUERY, - APIPropertyLocation.PATH, -} -INVALID_LOCATION_TEMPL = ( - 'Unsupported APIPropertyLocation "{location}"' - " for parameter {name}. " - + f"Valid values are {[loc.value for loc in SUPPORTED_LOCATIONS]}" -) - -SCHEMA_TYPE = Union[str, Type, tuple, None, Enum] - - -class APIPropertyBase(BaseModel): - """Base model for an API property.""" - - # The name of the parameter is required and is case-sensitive. - # If "in" is "path", the "name" field must correspond to a template expression - # within the path field in the Paths Object. - # If "in" is "header" and the "name" field is "Accept", "Content-Type", - # or "Authorization", the parameter definition is ignored. - # For all other cases, the "name" corresponds to the parameter - # name used by the "in" property. - name: str = Field(alias="name") - """The name of the property.""" - - required: bool = Field(alias="required") - """Whether the property is required.""" - - type: SCHEMA_TYPE = Field(alias="type") - """The type of the property. - - Either a primitive type, a component/parameter type, - or an array or 'object' (dict) of the above.""" - - default: Optional[Any] = Field(alias="default", default=None) - """The default value of the property.""" - - description: Optional[str] = Field(alias="description", default=None) - """The description of the property.""" - - -class APIProperty(APIPropertyBase): - """A model for a property in the query, path, header, or cookie params.""" - - location: APIPropertyLocation = Field(alias="location") - """The path/how it's being passed to the endpoint.""" - - @staticmethod - def _cast_schema_list_type(schema: Schema) -> Optional[Union[str, Tuple[str, ...]]]: - type_ = schema.type - if not isinstance(type_, list): - return type_ - else: - return tuple(type_) - - @staticmethod - def _get_schema_type_for_enum(parameter: Parameter, schema: Schema) -> Enum: - """Get the schema type when the parameter is an enum.""" - param_name = f"{parameter.name}Enum" - return Enum(param_name, {str(v): v for v in schema.enum}) - - @staticmethod - def _get_schema_type_for_array( - schema: Schema, - ) -> Optional[Union[str, Tuple[str, ...]]]: - items = schema.items - if isinstance(items, Schema): - schema_type = APIProperty._cast_schema_list_type(items) - elif isinstance(items, Reference): - ref_name = items.ref.split("/")[-1] - schema_type = ref_name # TODO: Add ref definitions to make his valid - else: - raise ValueError(f"Unsupported array items: {items}") - - if isinstance(schema_type, str): - # TODO: recurse - schema_type = (schema_type,) - - return schema_type - - @staticmethod - def _get_schema_type(parameter: Parameter, schema: Optional[Schema]) -> SCHEMA_TYPE: - if schema is None: - return None - schema_type: SCHEMA_TYPE = APIProperty._cast_schema_list_type(schema) - if schema_type == "array": - schema_type = APIProperty._get_schema_type_for_array(schema) - elif schema_type == "object": - # TODO: Resolve array and object types to components. - raise NotImplementedError("Objects not yet supported") - elif schema_type in PRIMITIVE_TYPES: - if schema.enum: - schema_type = APIProperty._get_schema_type_for_enum(parameter, schema) - else: - # Directly use the primitive type - pass - else: - raise NotImplementedError(f"Unsupported type: {schema_type}") - - return schema_type - - @staticmethod - def _validate_location(location: APIPropertyLocation, name: str) -> None: - if location not in SUPPORTED_LOCATIONS: - raise NotImplementedError( - INVALID_LOCATION_TEMPL.format(location=location, name=name) - ) - - @staticmethod - def _validate_content(content: Optional[Dict[str, MediaType]]) -> None: - if content: - raise ValueError( - "API Properties with media content not supported. " - "Media content only supported within APIRequestBodyProperty's" - ) - - @staticmethod - def _get_schema(parameter: Parameter, spec: OpenAPISpec) -> Optional[Schema]: - schema = parameter.param_schema - if isinstance(schema, Reference): - schema = spec.get_referenced_schema(schema) - elif schema is None: - return None - elif not isinstance(schema, Schema): - raise ValueError(f"Error dereferencing schema: {schema}") - - return schema - - @staticmethod - def is_supported_location(location: str) -> bool: - """Return whether the provided location is supported.""" - try: - return APIPropertyLocation.from_str(location) in SUPPORTED_LOCATIONS - except ValueError: - return False - - @classmethod - def from_parameter(cls, parameter: Parameter, spec: OpenAPISpec) -> "APIProperty": - """Instantiate from an OpenAPI Parameter.""" - location = APIPropertyLocation.from_str(parameter.param_in) - cls._validate_location( - location, - parameter.name, - ) - cls._validate_content(parameter.content) - schema = cls._get_schema(parameter, spec) - schema_type = cls._get_schema_type(parameter, schema) - default_val = schema.default if schema is not None else None - return cls( - name=parameter.name, - location=location, - default=default_val, - description=parameter.description, - required=parameter.required, - type=schema_type, - ) - - -class APIRequestBodyProperty(APIPropertyBase): - """A model for a request body property.""" - - properties: List["APIRequestBodyProperty"] = Field(alias="properties") - """The sub-properties of the property.""" - - # This is useful for handling nested property cycles. - # We can define separate types in that case. - references_used: List[str] = Field(alias="references_used") - """The references used by the property.""" - - @classmethod - def _process_object_schema( - cls, schema: Schema, spec: OpenAPISpec, references_used: List[str] - ) -> Tuple[Union[str, List[str], None], List["APIRequestBodyProperty"]]: - properties = [] - required_props = schema.required or [] - if schema.properties is None: - raise ValueError( - f"No properties found when processing object schema: {schema}" - ) - for prop_name, prop_schema in schema.properties.items(): - if isinstance(prop_schema, Reference): - ref_name = prop_schema.ref.split("/")[-1] - if ref_name not in references_used: - references_used.append(ref_name) - prop_schema = spec.get_referenced_schema(prop_schema) - else: - continue - - properties.append( - cls.from_schema( - schema=prop_schema, - name=prop_name, - required=prop_name in required_props, - spec=spec, - references_used=references_used, - ) - ) - return schema.type, properties - - @classmethod - def _process_array_schema( - cls, schema: Schema, name: str, spec: OpenAPISpec, references_used: List[str] - ) -> str: - items = schema.items - if items is not None: - if isinstance(items, Reference): - ref_name = items.ref.split("/")[-1] - if ref_name not in references_used: - references_used.append(ref_name) - items = spec.get_referenced_schema(items) - else: - pass - return f"Array<{ref_name}>" - else: - pass - - if isinstance(items, Schema): - array_type = cls.from_schema( - schema=items, - name=f"{name}Item", - required=True, # TODO: Add required - spec=spec, - references_used=references_used, - ) - return f"Array<{array_type.type}>" - - return "array" - - @classmethod - def from_schema( - cls, - schema: Schema, - name: str, - required: bool, - spec: OpenAPISpec, - references_used: Optional[List[str]] = None, - ) -> "APIRequestBodyProperty": - """Recursively populate from an OpenAPI Schema.""" - if references_used is None: - references_used = [] - - schema_type = schema.type - properties: List[APIRequestBodyProperty] = [] - if schema_type == "object" and schema.properties: - schema_type, properties = cls._process_object_schema( - schema, spec, references_used - ) - elif schema_type == "array": - schema_type = cls._process_array_schema(schema, name, spec, references_used) - elif schema_type in PRIMITIVE_TYPES: - # Use the primitive type directly - pass - elif schema_type is None: - # No typing specified/parsed. WIll map to 'any' - pass - else: - raise ValueError(f"Unsupported type: {schema_type}") - - return cls( - name=name, - required=required, - type=schema_type, - default=schema.default, - description=schema.description, - properties=properties, - references_used=references_used, - ) - - -class APIRequestBody(BaseModel): - """A model for a request body.""" - - description: Optional[str] = Field(alias="description") - """The description of the request body.""" - - properties: List[APIRequestBodyProperty] = Field(alias="properties") - - # E.g., application/json - we only support JSON at the moment. - media_type: str = Field(alias="media_type") - """The media type of the request body.""" - - @classmethod - def _process_supported_media_type( - cls, - media_type_obj: MediaType, - spec: OpenAPISpec, - ) -> List[APIRequestBodyProperty]: - """Process the media type of the request body.""" - references_used = [] - schema = media_type_obj.media_type_schema - if isinstance(schema, Reference): - references_used.append(schema.ref.split("/")[-1]) - schema = spec.get_referenced_schema(schema) - if schema is None: - raise ValueError( - f"Could not resolve schema for media type: {media_type_obj}" - ) - api_request_body_properties = [] - required_properties = schema.required or [] - if schema.type == "object" and schema.properties: - for prop_name, prop_schema in schema.properties.items(): - if isinstance(prop_schema, Reference): - prop_schema = spec.get_referenced_schema(prop_schema) - - api_request_body_properties.append( - APIRequestBodyProperty.from_schema( - schema=prop_schema, - name=prop_name, - required=prop_name in required_properties, - spec=spec, - ) - ) - else: - api_request_body_properties.append( - APIRequestBodyProperty( - name="body", - required=True, - type=schema.type, - default=schema.default, - description=schema.description, - properties=[], - references_used=references_used, - ) - ) - - return api_request_body_properties - - @classmethod - def from_request_body( - cls, request_body: RequestBody, spec: OpenAPISpec - ) -> "APIRequestBody": - """Instantiate from an OpenAPI RequestBody.""" - properties = [] - for media_type, media_type_obj in request_body.content.items(): - if media_type not in _SUPPORTED_MEDIA_TYPES: - continue - api_request_body_properties = cls._process_supported_media_type( - media_type_obj, - spec, - ) - properties.extend(api_request_body_properties) - - return cls( - description=request_body.description, - properties=properties, - media_type=media_type, - ) - - -class APIOperation(BaseModel): - """A model for a single API operation.""" - - operation_id: str = Field(alias="operation_id") - """The unique identifier of the operation.""" - - description: Optional[str] = Field(alias="description") - """The description of the operation.""" - - base_url: str = Field(alias="base_url") - """The base URL of the operation.""" - - path: str = Field(alias="path") - """The path of the operation.""" - - method: HTTPVerb = Field(alias="method") - """The HTTP method of the operation.""" - - properties: Sequence[APIProperty] = Field(alias="properties") - - # TODO: Add parse in used components to be able to specify what type of - # referenced object it is. - # """The properties of the operation.""" - # components: Dict[str, BaseModel] = Field(alias="components") - - request_body: Optional[APIRequestBody] = Field(alias="request_body") - """The request body of the operation.""" - - @staticmethod - def _get_properties_from_parameters( - parameters: List[Parameter], spec: OpenAPISpec - ) -> List[APIProperty]: - """Get the properties of the operation.""" - properties = [] - for param in parameters: - if APIProperty.is_supported_location(param.param_in): - properties.append(APIProperty.from_parameter(param, spec)) - elif param.required: - raise ValueError( - INVALID_LOCATION_TEMPL.format( - location=param.param_in, name=param.name - ) - ) - else: - logger.warning( - INVALID_LOCATION_TEMPL.format( - location=param.param_in, name=param.name - ) - + " Ignoring optional parameter" - ) - pass - return properties - - @classmethod - def from_openapi_url( - cls, - spec_url: str, - path: str, - method: str, - ) -> "APIOperation": - """Create an APIOperation from an OpenAPI URL.""" - spec = OpenAPISpec.from_url(spec_url) - return cls.from_openapi_spec(spec, path, method) - - @classmethod - def from_openapi_spec( - cls, - spec: OpenAPISpec, - path: str, - method: str, - ) -> "APIOperation": - """Create an APIOperation from an OpenAPI spec.""" - operation = spec.get_operation(path, method) - parameters = spec.get_parameters_for_operation(operation) - properties = cls._get_properties_from_parameters(parameters, spec) - operation_id = OpenAPISpec.get_cleaned_operation_id(operation, path, method) - request_body = spec.get_request_body_for_operation(operation) - api_request_body = ( - APIRequestBody.from_request_body(request_body, spec) - if request_body is not None - else None - ) - description = operation.description or operation.summary - if not description and spec.paths is not None: - description = spec.paths[path].description or spec.paths[path].summary - return cls( - operation_id=operation_id, - description=description, - base_url=spec.base_url, - path=path, - method=method, - properties=properties, - request_body=api_request_body, - ) - - @staticmethod - def ts_type_from_python(type_: SCHEMA_TYPE) -> str: - if type_ is None: - # TODO: Handle Nones better. These often result when - # parsing specs that are < v3 - return "any" - elif isinstance(type_, str): - return { - "str": "string", - "integer": "number", - "float": "number", - "date-time": "string", - }.get(type_, type_) - elif isinstance(type_, tuple): - return f"Array<{APIOperation.ts_type_from_python(type_[0])}>" - elif isinstance(type_, type) and issubclass(type_, Enum): - return " | ".join([f"'{e.value}'" for e in type_]) - else: - return str(type_) - - def _format_nested_properties( - self, properties: List[APIRequestBodyProperty], indent: int = 2 - ) -> str: - """Format nested properties.""" - formatted_props = [] - - for prop in properties: - prop_name = prop.name - prop_type = self.ts_type_from_python(prop.type) - prop_required = "" if prop.required else "?" - prop_desc = f"/* {prop.description} */" if prop.description else "" - - if prop.properties: - nested_props = self._format_nested_properties( - prop.properties, indent + 2 - ) - prop_type = f"{{\n{nested_props}\n{' ' * indent}}}" - - formatted_props.append( - f"{prop_desc}\n{' ' * indent}{prop_name}{prop_required}: {prop_type}," - ) - - return "\n".join(formatted_props) - - def to_typescript(self) -> str: - """Get typescript string representation of the operation.""" - operation_name = self.operation_id - params = [] - - if self.request_body: - formatted_request_body_props = self._format_nested_properties( - self.request_body.properties - ) - params.append(formatted_request_body_props) - - for prop in self.properties: - prop_name = prop.name - prop_type = self.ts_type_from_python(prop.type) - prop_required = "" if prop.required else "?" - prop_desc = f"/* {prop.description} */" if prop.description else "" - params.append(f"{prop_desc}\n\t\t{prop_name}{prop_required}: {prop_type},") - - formatted_params = "\n".join(params).strip() - description_str = f"/* {self.description} */" if self.description else "" - typescript_definition = f""" -{description_str} -type {operation_name} = (_: {{ -{formatted_params} -}}) => any; -""" - return typescript_definition.strip() - - @property - def query_params(self) -> List[str]: - return [ - property.name - for property in self.properties - if property.location == APIPropertyLocation.QUERY - ] - - @property - def path_params(self) -> List[str]: - return [ - property.name - for property in self.properties - if property.location == APIPropertyLocation.PATH - ] - - @property - def body_params(self) -> List[str]: - if self.request_body is None: - return [] - return [prop.name for prop in self.request_body.properties] diff --git a/nextpy/ai/tools/toolkits/openapi_toolkit/utils.py b/nextpy/ai/tools/toolkits/openapi_toolkit/utils.py deleted file mode 100644 index e2a22458..00000000 --- a/nextpy/ai/tools/toolkits/openapi_toolkit/utils.py +++ /dev/null @@ -1,288 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Utility functions for parsing an OpenAPI spec.""" -import copy -import json -import logging -import re -from enum import Enum -from pathlib import Path -from typing import Dict, List, Optional, Union - -import requests -import yaml -from openapi_schema_pydantic import ( - Components, - OpenAPI, - Operation, - Parameter, - PathItem, - Paths, - Reference, - RequestBody, - Schema, -) -from pydantic import ValidationError - -logger = logging.getLogger(__name__) - - -class HTTPVerb(str, Enum): - """Enumerator of the HTTP verbs.""" - - GET = "get" - PUT = "put" - POST = "post" - DELETE = "delete" - OPTIONS = "options" - HEAD = "head" - PATCH = "patch" - TRACE = "trace" - - @classmethod - def from_str(cls, verb: str) -> "HTTPVerb": - """Parse an HTTP verb.""" - try: - return cls(verb) - except ValueError: - raise ValueError(f"Invalid HTTP verb. Valid values are {cls.__members__}") - - -class OpenAPISpec(OpenAPI): - """OpenAPI Model that removes misformatted parts of the spec.""" - - @property - def _paths_strict(self) -> Paths: - if not self.paths: - raise ValueError("No paths found in spec") - return self.paths - - def _get_path_strict(self, path: str) -> PathItem: - path_item = self._paths_strict.get(path) - if not path_item: - raise ValueError(f"No path found for {path}") - return path_item - - @property - def _components_strict(self) -> Components: - """Get components or err.""" - if self.components is None: - raise ValueError("No components found in spec. ") - return self.components - - @property - def _parameters_strict(self) -> Dict[str, Union[Parameter, Reference]]: - """Get parameters or err.""" - parameters = self._components_strict.parameters - if parameters is None: - raise ValueError("No parameters found in spec. ") - return parameters - - @property - def _schemas_strict(self) -> Dict[str, Schema]: - """Get the dictionary of schemas or err.""" - schemas = self._components_strict.schemas - if schemas is None: - raise ValueError("No schemas found in spec. ") - return schemas - - @property - def _request_bodies_strict(self) -> Dict[str, Union[RequestBody, Reference]]: - """Get the request body or err.""" - request_bodies = self._components_strict.requestBodies - if request_bodies is None: - raise ValueError("No request body found in spec. ") - return request_bodies - - def _get_referenced_parameter(self, ref: Reference) -> Union[Parameter, Reference]: - """Get a parameter (or nested reference) or err.""" - ref_name = ref.ref.split("/")[-1] - parameters = self._parameters_strict - if ref_name not in parameters: - raise ValueError(f"No parameter found for {ref_name}") - return parameters[ref_name] - - def _get_root_referenced_parameter(self, ref: Reference) -> Parameter: - """Get the root reference or err.""" - parameter = self._get_referenced_parameter(ref) - while isinstance(parameter, Reference): - parameter = self._get_referenced_parameter(parameter) - return parameter - - def get_referenced_schema(self, ref: Reference) -> Schema: - """Get a schema (or nested reference) or err.""" - ref_name = ref.ref.split("/")[-1] - schemas = self._schemas_strict - if ref_name not in schemas: - raise ValueError(f"No schema found for {ref_name}") - return schemas[ref_name] - - def get_schema(self, schema: Union[Reference, Schema]) -> Schema: - if isinstance(schema, Reference): - return self.get_referenced_schema(schema) - return schema - - def _get_root_referenced_schema(self, ref: Reference) -> Schema: - """Get the root reference or err.""" - schema = self.get_referenced_schema(ref) - while isinstance(schema, Reference): - schema = self.get_referenced_schema(schema) - return schema - - def _get_referenced_request_body( - self, ref: Reference - ) -> Optional[Union[Reference, RequestBody]]: - """Get a request body (or nested reference) or err.""" - ref_name = ref.ref.split("/")[-1] - request_bodies = self._request_bodies_strict - if ref_name not in request_bodies: - raise ValueError(f"No request body found for {ref_name}") - return request_bodies[ref_name] - - def _get_root_referenced_request_body( - self, ref: Reference - ) -> Optional[RequestBody]: - """Get the root request Body or err.""" - request_body = self._get_referenced_request_body(ref) - while isinstance(request_body, Reference): - request_body = self._get_referenced_request_body(request_body) - return request_body - - @staticmethod - def _alert_unsupported_spec(obj: dict) -> None: - """Alert if the spec is not supported.""" - warning_message = ( - " This may result in degraded performance." - + " Convert your OpenAPI spec to 3.1.* spec" - + " for better support." - ) - swagger_version = obj.get("swagger") - openapi_version = obj.get("openapi") - if isinstance(openapi_version, str): - if openapi_version != "3.1.0": - logger.warning( - f"Attempting to load an OpenAPI {openapi_version}" - f" spec. {warning_message}" - ) - else: - pass - elif isinstance(swagger_version, str): - logger.warning( - f"Attempting to load a Swagger {swagger_version}" - f" spec. {warning_message}" - ) - else: - raise ValueError( - "Attempting to load an unsupported spec:" - f"\n\n{obj}\n{warning_message}" - ) - - @classmethod - def parse_obj(cls, obj: dict) -> "OpenAPISpec": - try: - cls._alert_unsupported_spec(obj) - return super().parse_obj(obj) - except ValidationError as e: - # We are handling possibly misconfigured specs and want to do a best-effort - # job to get a reasonable interface out of it. - new_obj = copy.deepcopy(obj) - for error in e.errors(): - keys = error["loc"] - item = new_obj - for key in keys[:-1]: - item = item[key] - item.pop(keys[-1], None) - return cls.parse_obj(new_obj) - - @classmethod - def from_spec_dict(cls, spec_dict: dict) -> "OpenAPISpec": - """Get an OpenAPI spec from a dict.""" - return cls.parse_obj(spec_dict) - - @classmethod - def from_text(cls, text: str) -> "OpenAPISpec": - """Get an OpenAPI spec from a text.""" - try: - spec_dict = json.loads(text) - except json.JSONDecodeError: - spec_dict = yaml.safe_load(text) - return cls.from_spec_dict(spec_dict) - - @classmethod - def from_file(cls, path: Union[str, Path]) -> "OpenAPISpec": - """Get an OpenAPI spec from a file path.""" - path_ = path if isinstance(path, Path) else Path(path) - if not path_.exists(): - raise FileNotFoundError(f"{path} does not exist") - with path_.open("r") as f: - return cls.from_text(f.read()) - - @classmethod - def from_url(cls, url: str) -> "OpenAPISpec": - """Get an OpenAPI spec from a URL.""" - response = requests.get(url) - return cls.from_text(response.text) - - @property - def base_url(self) -> str: - """Get the base url.""" - return self.servers[0].url - - def get_methods_for_path(self, path: str) -> List[str]: - """Return a list of valid methods for the specified path.""" - path_item = self._get_path_strict(path) - results = [] - for method in HTTPVerb: - operation = getattr(path_item, method.value, None) - if isinstance(operation, Operation): - results.append(method.value) - return results - - def get_parameters_for_path(self, path: str) -> List[Parameter]: - path_item = self._get_path_strict(path) - parameters = [] - if not path_item.parameters: - return [] - for parameter in path_item.parameters: - if isinstance(parameter, Reference): - parameter = self._get_root_referenced_parameter(parameter) - parameters.append(parameter) - return parameters - - def get_operation(self, path: str, method: str) -> Operation: - """Get the operation object for a given path and HTTP method.""" - path_item = self._get_path_strict(path) - operation_obj = getattr(path_item, method, None) - if not isinstance(operation_obj, Operation): - raise ValueError(f"No {method} method found for {path}") - return operation_obj - - def get_parameters_for_operation(self, operation: Operation) -> List[Parameter]: - """Get the components for a given operation.""" - parameters = [] - if operation.parameters: - for parameter in operation.parameters: - if isinstance(parameter, Reference): - parameter = self._get_root_referenced_parameter(parameter) - parameters.append(parameter) - return parameters - - def get_request_body_for_operation( - self, operation: Operation - ) -> Optional[RequestBody]: - """Get the request body for a given operation.""" - request_body = operation.requestBody - if isinstance(request_body, Reference): - request_body = self._get_root_referenced_request_body(request_body) - return request_body - - @staticmethod - def get_cleaned_operation_id(operation: Operation, path: str, method: str) -> str: - """Get a cleaned operation id from an operation id.""" - operation_id = operation.operationId - if operation_id is None: - # Replace all punctuation of any kind with underscore - path = re.sub(r"[^a-zA-Z0-9]", "_", path.lstrip("/")) - operation_id = f"{path}_{method}" - return operation_id.replace("-", "_").replace(".", "_").replace("/", "_") diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/example.py b/nextpy/ai/tools/toolkits/requests_toolkit/example.py deleted file mode 100644 index 5269b1c3..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/example.py +++ /dev/null @@ -1,19 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from nextpy.ai.tools.toolkits.requests_toolkit.requests.base import ( - RequestsGetTool, - TextRequestsWrapper, -) - -# Instantiate the RequestsGetTool with TextRequestsWrapper as requests_wrapper -get_tool = RequestsGetTool(requests_wrapper=TextRequestsWrapper()) - -# The URL you want to get data from -url = "https://yaml.org" - -# Use the get method -response = get_tool._run(url) - -# Output the response -print(response) diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/request.py b/nextpy/ai/tools/toolkits/requests_toolkit/request.py deleted file mode 100644 index 7a4c47cb..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/request.py +++ /dev/null @@ -1,33 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for making requests to an API endpoint.""" - -from typing import List - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.requests_toolkit.requests.base import ( - RequestsDeleteTool, - RequestsGetTool, - RequestsPatchTool, - RequestsPostTool, - RequestsPutTool, -) -from nextpy.ai.tools.toolkits.requests_toolkit.requests.utils import TextRequestsWrapper - - -class RequestsToolkit(BaseToolkit): - """Base class for requests tools.""" - - requests_wrapper: TextRequestsWrapper - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - RequestsGetTool(requests_wrapper=self.requests_wrapper), - RequestsPostTool(requests_wrapper=self.requests_wrapper), - RequestsPatchTool(requests_wrapper=self.requests_wrapper), - RequestsPutTool(requests_wrapper=self.requests_wrapper), - RequestsDeleteTool(requests_wrapper=self.requests_wrapper), - ] diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/requests/base.py b/nextpy/ai/tools/toolkits/requests_toolkit/requests/base.py deleted file mode 100644 index 024f3734..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/requests/base.py +++ /dev/null @@ -1,112 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for making requests to an API endpoint.""" -import json -from typing import Any, Dict - -from pydantic import BaseModel - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.requests_toolkit.requests.utils import TextRequestsWrapper - - -def _parse_input(text: str) -> Dict[str, Any]: - """Parse the json string into a dict.""" - return json.loads(text) - - -def _clean_url(url: str) -> str: - """Strips quotes from the url.""" - return url.strip("\"'") - - -class BaseRequestsTool(BaseModel): - """Base class for requests tools.""" - - requests_wrapper: TextRequestsWrapper - - -class RequestsGetTool(BaseRequestsTool, BaseTool): - """Tool for making a GET request to an API endpoint.""" - - name = "requests_get" - description = "A portal to the internet. Use this when you need to get specific content from a website. Input should be a url (i.e. https://www.google.com). The output will be the text response of the GET request." - - def _run(self, url: str) -> str: - """Run the tool.""" - return self.requests_wrapper.get(_clean_url(url)) - - -class RequestsPostTool(BaseRequestsTool, BaseTool): - """Tool for making a POST request to an API endpoint.""" - - name = "requests_post" - description = """Use this when you want to POST to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to POST to the url. - Be careful to always use double quotes for strings in the json string - The output will be the text response of the POST request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.post(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsPatchTool(BaseRequestsTool, BaseTool): - """Tool for making a PATCH request to an API endpoint.""" - - name = "requests_patch" - description = """Use this when you want to PATCH to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to PATCH to the url. - Be careful to always use double quotes for strings in the json string - The output will be the text response of the PATCH request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.patch(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsPutTool(BaseRequestsTool, BaseTool): - """Tool for making a PUT request to an API endpoint.""" - - name = "requests_put" - description = """Use this when you want to PUT to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to PUT to the url. - Be careful to always use double quotes for strings in the json string. - The output will be the text response of the PUT request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.put(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsDeleteTool(BaseRequestsTool, BaseTool): - """Tool for making a DELETE request to an API endpoint.""" - - name = "requests_delete" - description = "A portal to the internet. Use this when you need to make a DELETE request to a URL. Input should be a specific url, and the output will be the text response of the DELETE request." - - def _run(self, url: str) -> str: - """Run the tool.""" - return self.requests_wrapper.delete(_clean_url(url)) diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/requests/requests.py b/nextpy/ai/tools/toolkits/requests_toolkit/requests/requests.py deleted file mode 100644 index cf86e7f5..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/requests/requests.py +++ /dev/null @@ -1,100 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Lightweight wrapper around requests library, with async support.""" -from typing import Any, Dict, Optional - -import aiohttp -import requests -from pydantic import BaseModel, Extra - - -class Requests(BaseModel): - """Wrapper around requests to handle auth and async. - - The main purpose of this wrapper is to handle authentication (by saving - headers) and enable easy async methods on the same base object. - """ - - headers: Optional[Dict[str, str]] = None - aiosession: Optional[aiohttp.ClientSession] = None - auth: Optional[Any] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - def get(self, url: str, **kwargs: Any) -> requests.Response: - """GET the URL and return the text.""" - return requests.get(url, headers=self.headers, auth=self.auth, **kwargs) - - def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """POST to the URL and return the text.""" - return requests.post( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """PATCH the URL and return the text.""" - return requests.patch( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """PUT the URL and return the text.""" - return requests.put( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def delete(self, url: str, **kwargs: Any) -> requests.Response: - """DELETE the URL and return the text.""" - return requests.delete(url, headers=self.headers, auth=self.auth, **kwargs) - - -class TextRequestsWrapper(BaseModel): - """Lightweight wrapper around requests library. - - The main purpose of this wrapper is to always return a text output. - """ - - headers: Optional[Dict[str, str]] = None - aiosession: Optional[aiohttp.ClientSession] = None - auth: Optional[Any] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - @property - def requests(self) -> Requests: - return Requests( - headers=self.headers, aiosession=self.aiosession, auth=self.auth - ) - - def get(self, url: str, **kwargs: Any) -> str: - """GET the URL and return the text.""" - return self.requests.get(url, **kwargs).text - - def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """POST to the URL and return the text.""" - return self.requests.post(url, data, **kwargs).text - - def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """PATCH the URL and return the text.""" - return self.requests.patch(url, data, **kwargs).text - - def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """PUT the URL and return the text.""" - return self.requests.put(url, data, **kwargs).text - - def delete(self, url: str, **kwargs: Any) -> str: - """DELETE the URL and return the text.""" - return self.requests.delete(url, **kwargs).text - - -# For backwards compatibility -RequestsWrapper = TextRequestsWrapper diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/requests/tool.py b/nextpy/ai/tools/toolkits/requests_toolkit/requests/tool.py deleted file mode 100644 index 6a90e970..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/requests/tool.py +++ /dev/null @@ -1,112 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Tools for making requests to an API endpoint.""" -import json -from typing import Any, Dict - -from pydantic import BaseModel - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.requests.requests import TextRequestsWrapper - - -def _parse_input(text: str) -> Dict[str, Any]: - """Parse the json string into a dict.""" - return json.loads(text) - - -def _clean_url(url: str) -> str: - """Strips quotes from the url.""" - return url.strip("\"'") - - -class BaseRequestsTool(BaseModel): - """Base class for requests tools.""" - - requests_wrapper: TextRequestsWrapper - - -class RequestsGetTool(BaseRequestsTool, BaseTool): - """Tool for making a GET request to an API endpoint.""" - - name = "requests_get" - description = "A portal to the internet. Use this when you need to get specific content from a website. Input should be a url (i.e. https://www.google.com). The output will be the text response of the GET request." - - def _run(self, url: str) -> str: - """Run the tool.""" - return self.requests_wrapper.get(_clean_url(url)) - - -class RequestsPostTool(BaseRequestsTool, BaseTool): - """Tool for making a POST request to an API endpoint.""" - - name = "requests_post" - description = """Use this when you want to POST to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to POST to the url. - Be careful to always use double quotes for strings in the json string - The output will be the text response of the POST request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.post(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsPatchTool(BaseRequestsTool, BaseTool): - """Tool for making a PATCH request to an API endpoint.""" - - name = "requests_patch" - description = """Use this when you want to PATCH to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to PATCH to the url. - Be careful to always use double quotes for strings in the json string - The output will be the text response of the PATCH request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.patch(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsPutTool(BaseRequestsTool, BaseTool): - """Tool for making a PUT request to an API endpoint.""" - - name = "requests_put" - description = """Use this when you want to PUT to a website. - Input should be a json string with two keys: "url" and "data". - The value of "url" should be a string, and the value of "data" should be a dictionary of - key-value pairs you want to PUT to the url. - Be careful to always use double quotes for strings in the json string. - The output will be the text response of the PUT request. - """ - - def _run(self, text: str) -> str: - """Run the tool.""" - try: - data = _parse_input(text) - return self.requests_wrapper.put(_clean_url(data["url"]), data["data"]) - except Exception as e: - return repr(e) - - -class RequestsDeleteTool(BaseRequestsTool, BaseTool): - """Tool for making a DELETE request to an API endpoint.""" - - name = "requests_delete" - description = "A portal to the internet. Use this when you need to make a DELETE request to a URL. Input should be a specific url, and the output will be the text response of the DELETE request." - - def _run(self, url: str) -> str: - """Run the tool.""" - return self.requests_wrapper.delete(_clean_url(url)) diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/requests/utils.py b/nextpy/ai/tools/toolkits/requests_toolkit/requests/utils.py deleted file mode 100644 index cf86e7f5..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/requests/utils.py +++ /dev/null @@ -1,100 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Lightweight wrapper around requests library, with async support.""" -from typing import Any, Dict, Optional - -import aiohttp -import requests -from pydantic import BaseModel, Extra - - -class Requests(BaseModel): - """Wrapper around requests to handle auth and async. - - The main purpose of this wrapper is to handle authentication (by saving - headers) and enable easy async methods on the same base object. - """ - - headers: Optional[Dict[str, str]] = None - aiosession: Optional[aiohttp.ClientSession] = None - auth: Optional[Any] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - def get(self, url: str, **kwargs: Any) -> requests.Response: - """GET the URL and return the text.""" - return requests.get(url, headers=self.headers, auth=self.auth, **kwargs) - - def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """POST to the URL and return the text.""" - return requests.post( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """PATCH the URL and return the text.""" - return requests.patch( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> requests.Response: - """PUT the URL and return the text.""" - return requests.put( - url, json=data, headers=self.headers, auth=self.auth, **kwargs - ) - - def delete(self, url: str, **kwargs: Any) -> requests.Response: - """DELETE the URL and return the text.""" - return requests.delete(url, headers=self.headers, auth=self.auth, **kwargs) - - -class TextRequestsWrapper(BaseModel): - """Lightweight wrapper around requests library. - - The main purpose of this wrapper is to always return a text output. - """ - - headers: Optional[Dict[str, str]] = None - aiosession: Optional[aiohttp.ClientSession] = None - auth: Optional[Any] = None - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - arbitrary_types_allowed = True - - @property - def requests(self) -> Requests: - return Requests( - headers=self.headers, aiosession=self.aiosession, auth=self.auth - ) - - def get(self, url: str, **kwargs: Any) -> str: - """GET the URL and return the text.""" - return self.requests.get(url, **kwargs).text - - def post(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """POST to the URL and return the text.""" - return self.requests.post(url, data, **kwargs).text - - def patch(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """PATCH the URL and return the text.""" - return self.requests.patch(url, data, **kwargs).text - - def put(self, url: str, data: Dict[str, Any], **kwargs: Any) -> str: - """PUT the URL and return the text.""" - return self.requests.put(url, data, **kwargs).text - - def delete(self, url: str, **kwargs: Any) -> str: - """DELETE the URL and return the text.""" - return self.requests.delete(url, **kwargs).text - - -# For backwards compatibility -RequestsWrapper = TextRequestsWrapper diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/test_tool_requests.py b/nextpy/ai/tools/toolkits/requests_toolkit/test_tool_requests.py deleted file mode 100644 index 8e5dd741..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/test_tool_requests.py +++ /dev/null @@ -1,77 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -from typing import Any, Dict - -import pytest - -from nextpy.ai.tools.toolkits.requests_toolkit.requests.base import ( - RequestsDeleteTool, - RequestsGetTool, - RequestsPatchTool, - RequestsPostTool, - RequestsPutTool, - _parse_input, -) -from nextpy.ai.tools.toolkits.requests_toolkit.requests.utils import TextRequestsWrapper - - -class _MockTextRequestsWrapper(TextRequestsWrapper): - @staticmethod - def get(url: str, **kwargs: Any) -> str: - return "get_response" - - @staticmethod - def post(url: str, data: Dict[str, Any], **kwargs: Any) -> str: - return f"post {str(data)}" - - @staticmethod - def patch(url: str, data: Dict[str, Any], **kwargs: Any) -> str: - return f"patch {str(data)}" - - @staticmethod - def put(url: str, data: Dict[str, Any], **kwargs: Any) -> str: - return f"put {str(data)}" - - @staticmethod - def delete(url: str, **kwargs: Any) -> str: - return "delete_response" - - -@pytest.fixture -def mock_requests_wrapper() -> TextRequestsWrapper: - return _MockTextRequestsWrapper() - - -def test_parse_input() -> None: - input_text = '{"url": "https://example.com", "data": {"key": "value"}}' - expected_output = {"url": "https://example.com", "data": {"key": "value"}} - assert _parse_input(input_text) == expected_output - - -def test_requests_get_tool(mock_requests_wrapper: TextRequestsWrapper) -> None: - tool = RequestsGetTool(requests_wrapper=mock_requests_wrapper) - assert tool.run("https://example.com") == "get_response" - - -def test_requests_post_tool(mock_requests_wrapper: TextRequestsWrapper) -> None: - tool = RequestsPostTool(requests_wrapper=mock_requests_wrapper) - input_text = '{"url": "https://example.com", "data": {"key": "value"}}' - assert tool.run(input_text) == "post {'key': 'value'}" - - -def test_requests_patch_tool(mock_requests_wrapper: TextRequestsWrapper) -> None: - tool = RequestsPatchTool(requests_wrapper=mock_requests_wrapper) - input_text = '{"url": "https://example.com", "data": {"key": "value"}}' - assert tool.run(input_text) == "patch {'key': 'value'}" - - -def test_requests_put_tool(mock_requests_wrapper: TextRequestsWrapper) -> None: - tool = RequestsPutTool(requests_wrapper=mock_requests_wrapper) - input_text = '{"url": "https://example.com", "data": {"key": "value"}}' - assert tool.run(input_text) == "put {'key': 'value'}" - - -def test_requests_delete_tool(mock_requests_wrapper: TextRequestsWrapper) -> None: - tool = RequestsDeleteTool(requests_wrapper=mock_requests_wrapper) - assert tool.run("https://example.com") == "delete_response" diff --git a/nextpy/ai/tools/toolkits/requests_toolkit/test_toolkit_requests.py b/nextpy/ai/tools/toolkits/requests_toolkit/test_toolkit_requests.py deleted file mode 100644 index 85d3f588..00000000 --- a/nextpy/ai/tools/toolkits/requests_toolkit/test_toolkit_requests.py +++ /dev/null @@ -1,165 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import json - -from nextpy.ai import engine -from nextpy.ai.tools.toolkits.requests_toolkit.request import RequestsToolkit -from nextpy.ai.tools.toolkits.requests_toolkit.requests.utils import TextRequestsWrapper - -# requests_get_tool = RequestsGetTool(requests_wrapper=TextRequestsWrapper()) -# requests_put_tool = RequestsPutTool(requests_wrapper=TextRequestsWrapper()) -# requests_post_tool = RequestsPostTool(requests_wrapper=TextRequestsWrapper()) -# requests_delete_tool = RequestsDeleteTool(requests_wrapper=TextRequestsWrapper()) -# requests_patch_tool = RequestsPatchTool(requests_wrapper=TextRequestsWrapper()) - - -request_tool = RequestsToolkit(requests_wrapper=TextRequestsWrapper()) -request_tools = request_tool.get_tools() -# print(request_tools) - -# requests_get_tool = Tool( -# name = "Request_get", -# func= requests_get_tool.run, -# description="Useful when you have to get content from a URL" -# ) - -# requests_put_tool = Tool( -# name = "Request_put", -# func= requests_put_tool.run, -# description = """Use this when you want to PUT to a website. -# Input should be a json string with two keys: "url" and "data". -# The value of "url" should be a string, and the value of "data" should be a dictionary of -# key-value pairs you want to PUT to the url. -# Be careful to always use double quotes for strings in the json string. -# The output will be the text response of the PUT request. -# """ -# ) - -# requests_delete_tool = Tool( -# name = "Request_delete", -# func= requests_delete_tool.run, -# description="Useful when you have to make delete request to a URL" -# ) - -# requests_patch_tool = Tool( -# name = "Request_patch", -# func= requests_patch_tool.run, -# description="""Use this when you want to PATCH to a website. -# Input should be a json string with two keys: "url" and "data". -# The value of "url" should be a string, and the value of "data" should be a dictionary of -# key-value pairs you want to PATCH to the url. -# Be careful to always use double quotes for strings in the json string -# The output will be the text response of the PATCH request. -# """ -# ) - -# requests_post_tool = Tool( -# name = "Request_post", -# func= requests_post_tool.run, -# description="""Use this when you want to POST to a website. -# Input should be a json string with two keys: "url" and "data". -# The value of "url" should be a string, and the value of "data" should be a dictionary of -# key-value pairs you want to POST to the url. -# Be careful to always use double quotes for strings in the json string -# The output will be the text response of the POST request. -# """ -# ) - -# tools = [requests_get_tool,requests_put_tool,requests_delete_tool,requests_patch_tool, requests_post_tool] - -# we use GPT-4 here, but you could use gpt-3.5-turbo as well -llm = engine.llms.OpenAI(model="gpt-3.5-turbo-16k") - - -def tool_use(query, tools=request_tools): - query = json.loads(query) - return tools[int(query["index"])].run(query["query"]) - - -experts = engine( - template=""" -{{#system~}} -You are a helpful Web assistant. You are given a set of tools to use -{{~#each tools}} -{{this}} -{{/each}} -{{~/system}} - -{{#user~}} -I want a response to the following question: -{{query}} -Think do you need to use the given tool to answer the question. Provide the answer in either <> or <>. -{{~/user}} - -{{#assistant~}} -{{gen 'tools_use' temperature=0 max_tokens=300}} -{{~/assistant}} - -{{#user~}} -If the answer is Yes then call the tool using the following format '{"index":[index of the tool to be used in the tools list], "query":[query to be passed]' -If the answer is No, answer to the {{query}} itself. -{{~/user}} - -{{#assistant~}} -{{gen 'action' temperature=0 max_tokens=500}} -{{#if (tools_use)=="Yes"}} -{{(tool_func action)}} -{{/if}} -{{~/assistant}} - -{{#user~}} -Summarise the answer in one sentence -{{~/user}} - -{{#assistant~}} -{{gen 'final_answer' temperature=0 max_tokens=500}} -{{~/assistant}} -""", - llm=llm, - tools=request_tools, - tool_func=tool_use, - stream=False, -) - -# get request (Gets the response provided by the given URL) -out = experts(query='Get the content from the following URL : "https://yaml.org"') -print(out) - -# All the below tools result in "405 bad gateway, Not Allowed" response -# delete request -# out = experts(query='Make a DELETE request to the following URL : "https://yaml.org"') -# print(out) - -# put request -# out = experts(query="""Put to the following URL with the following data { -# "url": "http://yaml.org", -# "data": { -# "key1": "value1", -# "key2": "value2", -# "key3": "value3" -# } -# }""") -# print(out) - -# Post request -# out = experts(query="""Post to the following URL with the following data { -# "url": "http://yaml.org", -# "data": { -# "key1": "value1", -# "key2": "value2", -# "key3": "value3" -# } -# }""") -# print(out) - -# Patch request -# out = experts(query="""Patch to the following URL with the following data { -# "url": "http://yaml.org", -# "data": { -# "key1": "value1", -# "key2": "value2", -# "key3": "value3" -# } -# }""") -# print(out) diff --git a/nextpy/ai/tools/toolkits/slack_toolkit/slack.py b/nextpy/ai/tools/toolkits/slack_toolkit/slack.py deleted file mode 100644 index dd775cfb..00000000 --- a/nextpy/ai/tools/toolkits/slack_toolkit/slack.py +++ /dev/null @@ -1,66 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Slack toolkit.""" - -import ssl -from datetime import datetime -from typing import List, Optional - -from pydantic import Field, validator - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.slack_toolkit.slack_tool.base import ( - FetchChannel, - LoadData, - SendMessage, -) -from nextpy.ai.tools.toolkits.slack_toolkit.slack_tool.utils import SlackReader - - -class SlackToolkit(BaseToolkit): - """Slack toolkit.""" - - reader: Optional[SlackReader] = Field(None) - slack_token: Optional[str] = Field(None) - earliest_date: Optional[datetime] = Field(None) - latest_date: Optional[datetime] = Field(None) - - class Config: - arbitrary_types_allowed = True - - @validator("reader", pre=True, always=True) - def set_reader(cls, v, values): - # Create the SSLContext object here - ssl_context = ssl.SSLContext() - return SlackReader( - slack_token=values.get("slack_token"), - ssl=ssl_context, - earliest_date=values.get("earliest_date"), - latest_date=values.get("latest_date"), - ) - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - LoadData( - slack_token=self.slack_token, - ssl=self.ssl, - earliest_date=self.earliest_date, - latest_date=self.latest_date, - ), - FetchChannel( - slack_token=self.slack_token, - ssl=self.ssl, - earliest_date=self.earliest_date, - latest_date=self.latest_date, - ), - SendMessage( - slack_token=self.slack_token, - ssl=self.ssl, - earliest_date=self.earliest_date, - latest_date=self.latest_date, - ), - ] - diff --git a/nextpy/ai/tools/toolkits/slack_toolkit/slack/base.py b/nextpy/ai/tools/toolkits/slack_toolkit/slack/base.py deleted file mode 100644 index b670b7eb..00000000 --- a/nextpy/ai/tools/toolkits/slack_toolkit/slack/base.py +++ /dev/null @@ -1,146 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import logging -from datetime import datetime -from ssl import SSLContext -from typing import List, Optional, Type - -from pydantic import BaseModel, Field - -from nextpy.ai.schema import Document -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.slack_toolkit.slack.utils import SlackReader - -logger = logging.getLogger(__name__) - - -class SlackBase(BaseTool): - def __init__( - self, - slack_token: Optional[str] = None, - ssl: Optional[SSLContext] = None, - earliest_date: Optional[datetime] = None, - latest_date: Optional[datetime] = None, - ) -> None: - """Initialize with parameters.""" - self.reader = SlackReader( - slack_token=slack_token, - ssl=ssl, - earliest_date=earliest_date, - latest_date=latest_date, - ) - - -class LoadDataArgsSchema(BaseModel): - channel_ids: List[str] = Field( - ..., - description=" Information about the parameter. ", - ) - reverse_chronological: bool = Field( - ..., - description=" Information about the parameter.", - ) - - -class LoadData(SlackBase): - name: str = "load_data" - description: str = "Load data from the input directory." - args_schema: Type[LoadDataArgsSchema] = LoadDataArgsSchema - - def load_data( - self, - channel_ids: List[str], - reverse_chronological: bool = True, - ) -> List[Document]: - """Load data from the input directory.""" - return self.reader.load_data( - channel_ids=channel_ids, - reverse_chronological=reverse_chronological, - ) - - def _run( - self, - channel_ids: List[str], - reverse_chronological: bool = True, - ) -> str: - """Run the tool.""" - try: - return self.load_data( - self, - channel_ids=channel_ids, - reverse_chronological=reverse_chronological, - ) - except Exception as e: - raise Exception(f"An error occurred: {e}") - - -class FetchChannel(SlackBase): - name: str = "fetch_channel" - description: str = "Fetch a list of relevant channels" - - def fetch_channels( - self, - ) -> List[str]: - """Fetch a list of relevant channels.""" - slack_client = self.reader.client - try: - msg_result = slack_client.conversations_list() - logger.info(msg_result) - except Exception as e: - logger.error(e) - raise e - - return msg_result["channels"] - - def _run( - self, - ) -> str: - """Run the tool.""" - return self.fetch_channels() - - -class SendMessageArgsSchema(BaseModel): - channel_id: str = Field( - ..., - description=" Information about the parameter. ", - ) - message: str = Field( - ..., - description=" Information about the parameter. ", - ) - - -class SendMessage(SlackBase): - name: str = "send_message" - description: str = "Send a message to a channel given the channel ID." - args_schema: Type[SendMessageArgsSchema] = SendMessageArgsSchema - - def send_message( - self, - channel_id: str, - message: str, - ) -> None: - """Send a message to a channel given the channel ID.""" - slack_client = self.reader.client - try: - msg_result = slack_client.chat_postMessage( - channel=channel_id, - text=message, - ) - logger.info(msg_result) - except Exception as e: - logger.error(e) - raise e - - def _run( - self, - channel_id: str, - message: str, - ) -> str: - """Run the tool.""" - try: - self.send_message(channel_id=channel_id, message=message) - return "Message Sent" - except Exception as e: - raise Exception(f"An error occurred: {e}") diff --git a/nextpy/ai/tools/toolkits/slack_toolkit/slack/utils.py b/nextpy/ai/tools/toolkits/slack_toolkit/slack/utils.py deleted file mode 100644 index 4e3754b5..00000000 --- a/nextpy/ai/tools/toolkits/slack_toolkit/slack/utils.py +++ /dev/null @@ -1,218 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Base reader class.""" -from abc import abstractmethod -from typing import Any, List - -from nextpy.ai.schema import Document - - -class BaseReader: - """Utilities for loading data from a directory.""" - - @abstractmethod - def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]: - """Load data from the input directory.""" - - -"""Slack reader.""" -import logging -import os -import time -from datetime import datetime -from ssl import SSLContext -from typing import List, Optional - -from llama_index.readers.base import BaseReader -from llama_index.schema import Document - -logger = logging.getLogger(__name__) - - -class SlackReader(BaseReader): - """Slack reader. - - Reads conversations from channels. If an earliest_date is provided, an - optional latest_date can also be provided. If no latest_date is provided, - we assume the latest date is the current timestamp. - - Args: - slack_token (Optional[str]): Slack token. If not provided, we - assume the environment variable `SLACK_BOT_TOKEN` is set. - ssl (Optional[str]): Custom SSL context. If not provided, it is assumed - there is already an SSL context available. - earliest_date (Optional[datetime]): Earliest date from which - to read conversations. If not provided, we read all messages. - latest_date (Optional[datetime]): Latest date from which to - read conversations. If not provided, defaults to current timestamp - in combination with earliest_date. - """ - - def __init__( - self, - slack_token: Optional[str] = None, - ssl: Optional[SSLContext] = None, - earliest_date: Optional[datetime] = None, - latest_date: Optional[datetime] = None, - ) -> None: - """Initialize with parameters.""" - from slack_sdk import WebClient - - if slack_token is None: - slack_token = os.environ["SLACK_BOT_TOKEN"] - if slack_token is None: - raise ValueError( - "Must specify `slack_token` or set environment " - "variable `SLACK_BOT_TOKEN`." - ) - if ssl is None: - self.client = WebClient(token=slack_token) - else: - self.client = WebClient(token=slack_token, ssl=ssl) - if latest_date is not None and earliest_date is None: - raise ValueError( - "Must specify `earliest_date` if `latest_date` is specified." - ) - if earliest_date is not None: - self.earliest_date_timestamp: Optional[float] = earliest_date.timestamp() - else: - self.earliest_date_timestamp = None - if latest_date is not None: - self.latest_date_timestamp = latest_date.timestamp() - else: - self.latest_date_timestamp = datetime.now().timestamp() - res = self.client.api_test() - if not res["ok"]: - raise ValueError(f"Error initializing Slack API: {res['error']}") - - def _read_message(self, channel_id: str, message_ts: str) -> str: - from slack_sdk.errors import SlackApiError - - """Read a message.""" - - messages_text: List[str] = [] - next_cursor = None - while True: - try: - # https://slack.com/api/conversations.replies - # List all replies to a message, including the message itself. - if self.earliest_date_timestamp is None: - result = self.client.conversations_replies( - channel=channel_id, ts=message_ts, cursor=next_cursor - ) - else: - conversations_replies_kwargs = { - "channel": channel_id, - "ts": message_ts, - "cursor": next_cursor, - "latest": str(self.latest_date_timestamp), - } - if self.earliest_date_timestamp is not None: - conversations_replies_kwargs["oldest"] = str( - self.earliest_date_timestamp - ) - result = self.client.conversations_replies( - **conversations_replies_kwargs # type: ignore - ) - messages = result["messages"] - messages_text.extend(message["text"] for message in messages) - if not result["has_more"]: - break - - next_cursor = result["response_metadata"]["next_cursor"] - except SlackApiError as e: - if e.response["error"] == "ratelimited": - logger.error( - "Rate limit error reached, sleeping for: {} seconds".format( - e.response.headers["retry-after"] - ) - ) - time.sleep(int(e.response.headers["retry-after"])) - else: - logger.error("Error parsing conversation replies: {}".format(e)) - - return "\n\n".join(messages_text) - - def _read_channel(self, channel_id: str, reverse_chronological: bool) -> str: - from slack_sdk.errors import SlackApiError - - """Read a channel.""" - - result_messages: List[str] = [] - next_cursor = None - while True: - try: - # Call the conversations.history method using the WebClient - # conversations.history returns the first 100 messages by default - # These results are paginated, - # see: https://api.slack.com/methods/conversations.history$pagination - conversations_history_kwargs = { - "channel": channel_id, - "cursor": next_cursor, - "latest": str(self.latest_date_timestamp), - } - if self.earliest_date_timestamp is not None: - conversations_history_kwargs["oldest"] = str( - self.earliest_date_timestamp - ) - result = self.client.conversations_history( - **conversations_history_kwargs # type: ignore - ) - conversation_history = result["messages"] - # Print results - logger.info( - "{} messages found in {}".format( - len(conversation_history), channel_id - ) - ) - result_messages.extend( - self._read_message(channel_id, message["ts"]) - for message in conversation_history - ) - if not result["has_more"]: - break - next_cursor = result["response_metadata"]["next_cursor"] - - except SlackApiError as e: - if e.response["error"] == "ratelimited": - logger.error( - "Rate limit error reached, sleeping for: {} seconds".format( - e.response.headers["retry-after"] - ) - ) - time.sleep(int(e.response.headers["retry-after"])) - else: - logger.error("Error parsing conversation replies: {}".format(e)) - - return ( - "\n\n".join(result_messages) - if reverse_chronological - else "\n\n".join(result_messages[::-1]) - ) - - def load_data( - self, channel_ids: List[str], reverse_chronological: bool = True - ) -> List[Document]: - """Load data from the input directory. - - Args: - channel_ids (List[str]): List of channel ids to read. - - Returns: - List[Document]: List of documents. - """ - results = [] - for channel_id in channel_ids: - channel_content = self._read_channel( - channel_id, reverse_chronological=reverse_chronological - ) - results.append( - Document(text=channel_content, metadata={"channel": channel_id}) - ) - return results - - -if __name__ == "__main__": - reader = SlackReader() - logger.info(reader.load_data(channel_ids=["C04DC2VUY3F"])) diff --git a/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/base.py b/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/base.py deleted file mode 100644 index 162bd671..00000000 --- a/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/base.py +++ /dev/null @@ -1,152 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -import logging -from datetime import datetime -from ssl import SSLContext -from typing import List, Optional, Type - -import pydantic -from pydantic import BaseModel, Field - -from nextpy.ai.schema import Document -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.slack_toolkit.slack_tool.utils import SlackReader - -logger = logging.getLogger(__name__) - - -class SlackBase(BaseTool): - """Slack toolkit.""" - - reader: Optional[SlackReader] = Field(None) - slack_token: Optional[str] = Field(None) - ssl: Optional[SSLContext] = Field(None) - earliest_date: Optional[datetime] = Field(None) - latest_date: Optional[datetime] = Field(None) - - class Config: - arbitrary_types_allowed = True - - @pydantic.validator("reader", pre=True, always=True) - def set_reader(cls, v, values): - return SlackReader( - slack_token=values.get("slack_token"), - ssl=values.get("ssl"), - earliest_date=values.get("earliest_date"), - latest_date=values.get("latest_date"), - ) - - -class LoadDataArgsSchema(BaseModel): - channel_ids: List[str] = Field( - ..., - description="List of IDs for the Slack channels from which to load data", - ) - reverse_chronological: bool = Field( - ..., - description="Signifies whether the loaded data should be ordered in reverse chronological order. By default, it's set to True", - ) - - -class LoadData(SlackBase): - name: str = "load_data" - description: str = "Load data from the input directory." - args_schema: Type[LoadDataArgsSchema] = LoadDataArgsSchema - - def load_data( - self, - channel_ids: List[str], - reverse_chronological: bool = True, - ) -> List[Document]: - """Load data from the input directory.""" - return self.reader.load_data( - channel_ids=channel_ids, - reverse_chronological=reverse_chronological, - ) - - def run( - self, - channel_ids: List[str], - reverse_chronological: bool = True, - ) -> str: - """Run the tool.""" - try: - return self.load_data( - self, - channel_ids=channel_ids, - reverse_chronological=reverse_chronological, - ) - except Exception as e: - raise Exception(f"An error occurred: {e}") - - -class FetchChannel(SlackBase): - name: str = "fetch_channel" - description: str = "Fetch a list of relevant channels" - - def fetch_channels( - self, - ) -> List[str]: - """Fetch a list of relevant channels.""" - slack_client = self.reader.client - try: - msg_result = slack_client.conversations_list() - logger.info(msg_result) - except Exception as e: - logger.error(e) - raise e - - return msg_result["channels"] - - def _run( - self, - ) -> str: - """Run the tool.""" - return self.fetch_channels() - - -class SendMessageArgsSchema(BaseModel): - channel_id: str = Field( - ..., - description="ID of the channel to send message to", - ) - message: str = Field( - ..., - description="Content of the message", - ) - - -class SendMessage(SlackBase): - name: str = "send_message" - description: str = "Send a message to a channel given the channel ID." - args_schema: Type[SendMessageArgsSchema] = SendMessageArgsSchema - - def send_message( - self, - channel_id: str, - message: str, - ) -> None: - """Send a message to a channel given the channel ID.""" - slack_client = self.reader.client - try: - msg_result = slack_client.chat_postMessage( - channel=channel_id, - text=message, - ) - logger.info(msg_result) - except Exception as e: - logger.error(e) - raise e - - def _run( - self, - channel_id: str, - message: str, - ) -> str: - """Run the tool.""" - try: - self.send_message(channel_id=channel_id, message=message) - return "Message Sent" - except Exception as e: - raise Exception(f"An error occurred: {e}") diff --git a/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/utils.py b/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/utils.py deleted file mode 100644 index 9bff5294..00000000 --- a/nextpy/ai/tools/toolkits/slack_toolkit/slack_tool/utils.py +++ /dev/null @@ -1,220 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Base reader class.""" -from abc import abstractmethod -from typing import Any, List - -from nextpy.ai.schema import Document - - -class BaseReader: - """Utilities for loading data from a directory.""" - - @abstractmethod - def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]: - """Load data from the input directory.""" - - -"""Slack reader.""" -import logging -import os -import time -from datetime import datetime -from ssl import SSLContext -from typing import List, Optional - -from llama_index.readers.base import BaseReader -from llama_index.schema import Document - -logger = logging.getLogger(__name__) - -os.environ["SLACK_BOT_TOKEN"] = "Slack Token" - - -class SlackReader(BaseReader): - """Slack reader. - - Reads conversations from channels. If an earliest_date is provided, an - optional latest_date can also be provided. If no latest_date is provided, - we assume the latest date is the current timestamp. - - Args: - slack_token (Optional[str]): Slack token. If not provided, we - assume the environment variable `SLACK_BOT_TOKEN` is set. - ssl (Optional[str]): Custom SSL context. If not provided, it is assumed - there is already an SSL context available. - earliest_date (Optional[datetime]): Earliest date from which - to read conversations. If not provided, we read all messages. - latest_date (Optional[datetime]): Latest date from which to - read conversations. If not provided, defaults to current timestamp - in combination with earliest_date. - """ - - def __init__( - self, - slack_token: Optional[str] = None, - ssl: Optional[SSLContext] = None, - earliest_date: Optional[datetime] = None, - latest_date: Optional[datetime] = None, - ) -> None: - """Initialize with parameters.""" - from slack_sdk import WebClient - - if slack_token is None: - slack_token = os.environ["SLACK_BOT_TOKEN"] - if slack_token is None: - raise ValueError( - "Must specify `slack_token` or set environment " - "variable `SLACK_BOT_TOKEN`." - ) - if ssl is None: - self.client = WebClient(token=slack_token) - else: - self.client = WebClient(token=slack_token, ssl=ssl) - if latest_date is not None and earliest_date is None: - raise ValueError( - "Must specify `earliest_date` if `latest_date` is specified." - ) - if earliest_date is not None: - self.earliest_date_timestamp: Optional[float] = earliest_date.timestamp() - else: - self.earliest_date_timestamp = None - if latest_date is not None: - self.latest_date_timestamp = latest_date.timestamp() - else: - self.latest_date_timestamp = datetime.now().timestamp() - res = self.client.api_test() - if not res["ok"]: - raise ValueError(f"Error initializing Slack API: {res['error']}") - - def _read_message(self, channel_id: str, message_ts: str) -> str: - from slack_sdk.errors import SlackApiError - - """Read a message.""" - - messages_text: List[str] = [] - next_cursor = None - while True: - try: - # https://slack.com/api/conversations.replies - # List all replies to a message, including the message itself. - if self.earliest_date_timestamp is None: - result = self.client.conversations_replies( - channel=channel_id, ts=message_ts, cursor=next_cursor - ) - else: - conversations_replies_kwargs = { - "channel": channel_id, - "ts": message_ts, - "cursor": next_cursor, - "latest": str(self.latest_date_timestamp), - } - if self.earliest_date_timestamp is not None: - conversations_replies_kwargs["oldest"] = str( - self.earliest_date_timestamp - ) - result = self.client.conversations_replies( - **conversations_replies_kwargs # type: ignore - ) - messages = result["messages"] - messages_text.extend(message["text"] for message in messages) - if not result["has_more"]: - break - - next_cursor = result["response_metadata"]["next_cursor"] - except SlackApiError as e: - if e.response["error"] == "ratelimited": - logger.error( - "Rate limit error reached, sleeping for: {} seconds".format( - e.response.headers["retry-after"] - ) - ) - time.sleep(int(e.response.headers["retry-after"])) - else: - logger.error("Error parsing conversation replies: {}".format(e)) - - return "\n\n".join(messages_text) - - def _read_channel(self, channel_id: str, reverse_chronological: bool) -> str: - from slack_sdk.errors import SlackApiError - - """Read a channel.""" - - result_messages: List[str] = [] - next_cursor = None - while True: - try: - # Call the conversations.history method using the WebClient - # conversations.history returns the first 100 messages by default - # These results are paginated, - # see: https://api.slack.com/methods/conversations.history$pagination - conversations_history_kwargs = { - "channel": channel_id, - "cursor": next_cursor, - "latest": str(self.latest_date_timestamp), - } - if self.earliest_date_timestamp is not None: - conversations_history_kwargs["oldest"] = str( - self.earliest_date_timestamp - ) - result = self.client.conversations_history( - **conversations_history_kwargs # type: ignore - ) - conversation_history = result["messages"] - # Print results - logger.info( - "{} messages found in {}".format( - len(conversation_history), channel_id - ) - ) - result_messages.extend( - self._read_message(channel_id, message["ts"]) - for message in conversation_history - ) - if not result["has_more"]: - break - next_cursor = result["response_metadata"]["next_cursor"] - - except SlackApiError as e: - if e.response["error"] == "ratelimited": - logger.error( - "Rate limit error reached, sleeping for: {} seconds".format( - e.response.headers["retry-after"] - ) - ) - time.sleep(int(e.response.headers["retry-after"])) - else: - logger.error("Error parsing conversation replies: {}".format(e)) - - return ( - "\n\n".join(result_messages) - if reverse_chronological - else "\n\n".join(result_messages[::-1]) - ) - - def load_data( - self, channel_ids: List[str], reverse_chronological: bool = True - ) -> List[Document]: - """Load data from the input directory. - - Args: - channel_ids (List[str]): List of channel ids to read. - - Returns: - List[Document]: List of documents. - """ - results = [] - for channel_id in channel_ids: - channel_content = self._read_channel( - channel_id, reverse_chronological=reverse_chronological - ) - results.append( - Document(text=channel_content, metadata={"channel": channel_id}) - ) - return results - - -if __name__ == "__main__": - reader = SlackReader() - logger.info(reader.load_data(channel_ids=["C04DC2VUY3F"])) diff --git a/nextpy/ai/tools/toolkits/zapier_toolkit/zapier.py b/nextpy/ai/tools/toolkits/zapier_toolkit/zapier.py deleted file mode 100644 index 8bfcc2d0..00000000 --- a/nextpy/ai/tools/toolkits/zapier_toolkit/zapier.py +++ /dev/null @@ -1,27 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Notion tool spec.""" - -from typing import List - -from nextpy.ai.tools.basetool import BaseTool -from nextpy.ai.tools.toolkits.base import BaseToolkit -from nextpy.ai.tools.toolkits.notion_toolkit.notion.base import LoadData, SearchData - -SEARCH_URL = "https://api.notion.com/v1/search" - - -class ZapierToolkit(BaseToolkit): - api_key: str = None - oauth_access_token: str = None - - def get_tools(self) -> List[BaseTool]: - """Get the tools in the toolkit.""" - return [ - LoadData(api_key=self.api_key, oauth_access_token=self.oauth_access_token), - SearchData( - integration_token=self.api_key, - oauth_access_token=self.oauth_access_token, - ), - ] diff --git a/nextpy/ai/tools/toolkits/zapier_toolkit/zapier/base.py b/nextpy/ai/tools/toolkits/zapier_toolkit/zapier/base.py deleted file mode 100644 index 2c5c8e1d..00000000 --- a/nextpy/ai/tools/toolkits/zapier_toolkit/zapier/base.py +++ /dev/null @@ -1,101 +0,0 @@ -# This file has been modified by the Nextpy Team in 2023 using AI tools and automation scripts. -# We have rigorously tested these modifications to ensure reliability and performance. Based on successful test results, we are confident in the quality and stability of these changes. - -"""Zapier tool spec.""" - -import json -from typing import Optional, Type - -import requests -from pydantic import BaseModel, Field - -from nextpy.ai.tools.basetool import BaseTool - -ACTION_URL_TMPL = "https://nla.zapier.com/api/v1/dynamic/exposed/{action_id}/execute/" - - -class NaturalLanguageSchema(BaseModel): - id: str = Field(..., description="Info about the Parameter") - kwargs = Field(..., description="Info about the Paramter") - - -class BaseZapier(BaseTool): - """Zapier tool spec.""" - - def __init__( - self, api_key: Optional[str] = None, oauth_access_token: Optional[str] = None - ) -> None: - """Initialize with parameters.""" - if api_key: - self._headers = {"x-api-key": api_key} - elif oauth_access_token: - self._headers = {"Authorization": f"Bearer {oauth_access_token}"} - else: - raise ValueError("Must provide either api_key or oauth_access_token") - - # Get the exposed actions from Zapier - actions = json.loads(self.list_actions()) - if "results" not in actions: - raise ValueError( - "No Zapier actions exposed, visit https://nla.zapier.com/dev/actions/ to expose actions." - ) - results = actions["results"] - - # Register the actions as Tools - for action in results: - params = action["params"] - - def function_action(id=action["id"], **kwargs): - return self.natural_language_query(id, **kwargs) - - action_name = action["description"].split(": ")[1].replace(" ", "_") - function_action.__name__ = action_name - function_action.__doc__ = f""" - This is a Zapier Natural Language Action function wrapper. - - The 'instructions' key is REQUIRED for all function calls. - The instructions key is a natural language string describing the action to be taken - The following are all of the valid arguments you can provide: {params} - - Ignore the id field, it is provided for you. - If the returned error field is not null, interpret the error and try to fix it. Otherwise, inform the user of how they might fix it. - """ - setattr(self, action_name, function_action) - self.spec_functions.append(action_name) - - -class ListAction(BaseZapier): - name: str = "ListAction" - description: str = "Description of the method" - - def list_actions(self): - response = requests.get( - "https://nla.zapier.com/api/v1/dynamic/exposed/", headers=self._headers - ) - return response.text - - def run(self): - try: - self.list_actions() - except Exception as e: - return e - - -class NaturalLanguageQuery(BaseZapier): - name: str = "NaturalLanguageQuery" - description: str = "Description of the method" - args_schema: Type[NaturalLanguageSchema] = NaturalLanguageSchema - - def natural_language_query(self, id: str, **kwargs): - response = requests.post( - ACTION_URL_TMPL.format(action_id=id), - headers=self._headers, - data=json.dumps(kwargs), - ) - return response.text - - def run(self, id: str, **kwargs): - try: - self.natural_language_query(self, id=id, kwargs=kwargs) - except Exception as e: - return e