From c030a5219c75e2359c705257b7a7423c84787ef7 Mon Sep 17 00:00:00 2001 From: codebanesr Date: Thu, 28 Sep 2023 03:18:21 +0300 Subject: [PATCH 1/2] Adding code to store swagger file in the database --- llm-server/.gitignore | 3 +- llm-server/app.py | 95 ++-------------- .../swagger_api.py => _swagger/controller.py} | 77 ++++--------- llm-server/routes/_swagger/service.py | 44 ++++++++ llm-server/routes/root_service.py | 102 ++++++++++++++++++ .../workflow/generate_openapi_payload.py | 5 +- .../routes/workflow/hierarchical_planner.py | 6 +- .../routes/workflow/load_openapi_spec.py | 47 -------- .../workflow/typings/run_workflow_input.py | 8 +- .../routes/workflow/workflow_service.py | 15 ++- llm-server/utils/detect_multiple_intents.py | 12 +-- 11 files changed, 199 insertions(+), 215 deletions(-) rename llm-server/routes/{swagger_controller/swagger_api.py => _swagger/controller.py} (60%) create mode 100644 llm-server/routes/_swagger/service.py create mode 100644 llm-server/routes/root_service.py delete mode 100644 llm-server/routes/workflow/load_openapi_spec.py diff --git a/llm-server/.gitignore b/llm-server/.gitignore index 6a6bf8353..24ef88e78 100644 --- a/llm-server/.gitignore +++ b/llm-server/.gitignore @@ -148,4 +148,5 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ \ No newline at end of file +.idea/ +.env.local \ No newline at end of file diff --git a/llm-server/app.py b/llm-server/app.py index 445291429..dd9e76e9c 100644 --- a/llm-server/app.py +++ b/llm-server/app.py @@ -1,108 +1,29 @@ import logging -import requests -import traceback -from flask import Flask, request -from langchain.chains.openai_functions import create_structured_output_chain -from langchain.chat_models import ChatOpenAI -from langchain.prompts import ChatPromptTemplate - -from langchain.utilities.openapi import OpenAPISpec -from utils.base import try_to_match_and_call_api_endpoint -from models.models import AiResponseFormat +from flask import Flask, request, jsonify, Response from routes.workflow.workflow_controller import workflow -from routes.swagger_controller.swagger_api import swagger_workflow -import json +from routes._swagger.controller import _swagger from typing import Any, Tuple -from prompts.base import api_base_prompt, non_api_base_prompt -from routes.workflow.workflow_service import run_workflow -from routes.workflow.typings.run_workflow_input import WorkflowData -from utils.detect_multiple_intents import hasSingleIntent, hasMultipleIntents -import os -from dotenv import load_dotenv - -load_dotenv() -shared_folder = os.getenv("SHARED_FOLDER", "/app/shared_data/") logging.basicConfig(level=logging.DEBUG) app = Flask(__name__) app.register_blueprint(workflow, url_prefix="/workflow") -app.register_blueprint(swagger_workflow, url_prefix="/swagger_api") +app.register_blueprint(_swagger, url_prefix="/swagger_api") +from routes.root_service import handle_request ## TODO: Implement caching for the swagger file content (no need to load it everytime) @app.route("/handle", methods=["POST", "OPTIONS"]) -def handle(): +def handle() -> Response: data = request.get_json() - text = data.get("text") - swagger_url = data.get("swagger_url") - base_prompt = data.get("base_prompt") - headers = data.get("headers", {}) - server_base_url = data.get("server_base_url") - - if not base_prompt: - return json.dumps({"error": "base_prompt is required"}), 400 - - if not text: - return json.dumps({"error": "text is required"}), 400 - - if not swagger_url: - return json.dumps({"error": "swagger_url is required"}), 400 - - if swagger_url.startswith("https://"): - pass - else: - swagger_url = shared_folder + swagger_url - - print(f"swagger_url::{swagger_url}") - try: - if hasMultipleIntents(text): - result = run_workflow( - WorkflowData(text, swagger_url, headers, server_base_url) - ) - - return result - except Exception as e: - raise e - - if swagger_url.startswith("https://"): - response = requests.get(swagger_url) - if response.status_code == 200: - swagger_text = response.text - else: - return json.dumps({"error": "Failed to fetch Swagger content"}), 500 - else: - try: - with open(swagger_url, "r") as file: - swagger_text = file.read() - except FileNotFoundError: - return json.dumps({"error": "File not found"}), 404 - - swagger_spec = OpenAPISpec.from_text(swagger_text) - try: - json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers) + response = handle_request(data) + return jsonify(response) except Exception as e: - logging.error(f"Failed to call or map API endpoint: {str(e)}") - logging.error("Exception traceback:\n" + traceback.format_exc()) - json_output = None - - llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0) - - if json_output is None: - prompt_msgs = non_api_base_prompt(base_prompt, text) - - else: - prompt_msgs = api_base_prompt(base_prompt, text, json_output) - - prompt = ChatPromptTemplate(messages=prompt_msgs) - chain = create_structured_output_chain(AiResponseFormat, llm, prompt, verbose=False) - chain_output = chain.run(question=text) - - return json.loads(json.dumps(chain_output.dict())), 200 + return jsonify({"error": str(e)}) @app.errorhandler(500) diff --git a/llm-server/routes/swagger_controller/swagger_api.py b/llm-server/routes/_swagger/controller.py similarity index 60% rename from llm-server/routes/swagger_controller/swagger_api.py rename to llm-server/routes/_swagger/controller.py index 2426d4ee8..46a51319c 100644 --- a/llm-server/routes/swagger_controller/swagger_api.py +++ b/llm-server/routes/_swagger/controller.py @@ -1,20 +1,18 @@ from flask import Flask, request, jsonify, Blueprint, request, Response -from flask_pymongo import PyMongo import json, yaml from bson import ObjectId +import routes._swagger.service as swagger_service from utils.db import Database -from typing import Any -import requests db_instance = Database() mongo = db_instance.get_db() -swagger_workflow = Blueprint("swagger_workflow", __name__) +_swagger = Blueprint("_swagger", __name__) -@swagger_workflow.route("/", methods=["GET"]) -def get_swagger_files() -> Response: +@_swagger.route("/b/", methods=["GET"]) +def get_swagger_files(id: str) -> Response: # Get page and page_size query params page = int(request.args.get("page", 1)) page_size = int(request.args.get("page_size", 10)) @@ -26,7 +24,7 @@ def get_swagger_files() -> Response: # Query for paginated docs files = [ doc.update({"_id": str(doc["_id"])}) or doc - for doc in mongo.swagger_files.find({}, {}).skip(skip).limit(limit) + for doc in mongo.swagger_files.find({"bot_id": id}, {}).skip(skip).limit(limit) ] # Get total docs count @@ -38,48 +36,15 @@ def get_swagger_files() -> Response: return jsonify(data) -@swagger_workflow.route("/", methods=["POST"]) -def add_swagger_file(): - if request.content_type == "application/json": - # JSON file - file_content = request.get_json() - - elif "multipart/form-data" in request.content_type: - # Uploaded file - file = request.files.get("file") - if file is None: - return jsonify({"error": "File upload is required"}), 400 - - if file.filename.endswith(".json"): - try: - file_content = json.load(file) - except json.JSONDecodeError as e: - return ( - jsonify({"error": "Invalid JSON format in the uploaded file"}), - 400, - ) - - elif file.filename.endswith(".yaml") or file.filename.endswith(".yml"): - try: - file_content = yaml.safe_load(file) - except yaml.YAMLError as e: - return ( - jsonify({"error": "Invalid YAML format in the uploaded file"}), - 400, - ) - - else: - return jsonify({"error": "Unsupported content type"}), 400 - - # Insert into MongoDB - mongo.swagger_files.insert_one(file_content) - - return jsonify({"message": "File added successfully"}) - - -@swagger_workflow.route("/", methods=["GET"]) -def get_swagger_file(id: str) -> Response: - file = mongo.swagger_files.find_one({"_id": ObjectId(id)}) +@_swagger.route("/b/", methods=["POST"]) +def add_swagger_file(id) -> Response: + result = swagger_service.add_swagger_file(request, id) + return jsonify(result) + + +@_swagger.route("/<_id>", methods=["GET"]) +def get_swagger_file(_id: str) -> Response: + file = mongo.swagger_files.find_one({"_id": ObjectId(_id)}) if not file: return jsonify({"message": "Swagger file not found"}) @@ -87,7 +52,7 @@ def get_swagger_file(id: str) -> Response: return jsonify(file) -@swagger_workflow.route("/transform/<_id>", methods=["GET"]) +@_swagger.route("/transform/<_id>", methods=["GET"]) def get_transformed_swagger_file(_id: str) -> Response: swagger_json = mongo.swagger_files.aggregate( [ @@ -138,18 +103,18 @@ def get_transformed_swagger_file(_id: str) -> Response: return jsonify(list(swagger_json)) -@swagger_workflow.route("/", methods=["PUT"]) -def update_swagger_file(id: str) -> Response: +@_swagger.route("/<_id>", methods=["PUT"]) +def update_swagger_file(_id: str) -> Response: data = request.get_json() - result = mongo.swagger_files.update_one({"_id": ObjectId(id)}, {"$set": data}) + result = mongo.swagger_files.update_one({"_id": ObjectId(_id)}, {"$set": data}) if result.modified_count == 1: return jsonify({"message": "Swagger file updated successfully"}) return jsonify({"message": "Swagger file not found"}) -@swagger_workflow.route("/", methods=["DELETE"]) -def delete_swagger_file(id: str) -> Response: - result = mongo.swagger_files.delete_one({"_id": ObjectId(id)}) +@_swagger.route("/<_id>", methods=["DELETE"]) +def delete_swagger_file(_id: str) -> Response: + result = mongo.swagger_files.delete_one({"_id": ObjectId(_id)}) if result.deleted_count == 1: return jsonify({"message": "Swagger file deleted successfully"}) return jsonify({"message": "Swagger file not found"}) diff --git a/llm-server/routes/_swagger/service.py b/llm-server/routes/_swagger/service.py new file mode 100644 index 000000000..1165818de --- /dev/null +++ b/llm-server/routes/_swagger/service.py @@ -0,0 +1,44 @@ +import json +import yaml + +from utils.db import Database + +db_instance = Database() +mongo = db_instance.get_db() +from typing import Dict +from flask import Request + + +def add_swagger_file(request: Request, id: str) -> Dict[str, str]: + if request.content_type == "application/json": + # JSON file + file_content = request.get_json() + + elif "multipart/form-data" in request.content_type: + # Uploaded file + file = request.files.get("file") + if file is None: + return {"error": "File upload is required"} + + if file.filename and file.filename.endswith(".json"): + try: + file_content = json.load(file) + except json.JSONDecodeError as e: + return {"error": "Invalid JSON format in uploaded file"} + + elif file.filename and ( + file.filename.endswith(".yaml") or file.filename.endswith(".yml") + ): + try: + file_content = yaml.safe_load(file) + except yaml.YAMLError as e: + return {"error": "Invalid YAML format in uploaded file"} + + else: + return {"error": "Unsupported content type"} + + # Insert into MongoDB + file_content["bot_id"] = id + mongo.swagger_files.insert_one(file_content) + + return {"message": "File added successfully"} diff --git a/llm-server/routes/root_service.py b/llm-server/routes/root_service.py new file mode 100644 index 000000000..596b6592d --- /dev/null +++ b/llm-server/routes/root_service.py @@ -0,0 +1,102 @@ +import requests, traceback, logging +from langchain.chains.openai_functions import create_structured_output_chain +from langchain.chat_models import ChatOpenAI +from langchain.prompts import ChatPromptTemplate + +from langchain.utilities.openapi import OpenAPISpec +from utils.base import try_to_match_and_call_api_endpoint +from models.models import AiResponseFormat +from prompts.base import api_base_prompt, non_api_base_prompt +from routes.workflow.workflow_service import run_workflow +from routes.workflow.typings.run_workflow_input import WorkflowData +from utils.detect_multiple_intents import hasSingleIntent, hasMultipleIntents +import os +from dotenv import load_dotenv +from typing import Dict, Any, cast +from utils.db import Database +import json + +db_instance = Database() +mongo = db_instance.get_db() + + +load_dotenv() +shared_folder = os.getenv("SHARED_FOLDER", "/app/shared_data/") + + +def handle_request(data: Dict[str, Any]) -> Any: + text = data.get("text") + swagger_url = cast(str, data.get("swagger_url")) + base_prompt = data.get("base_prompt") + headers = data.get("headers", {}) + server_base_url = cast(str, data.get("server_base_url")) + + if not base_prompt: + raise Exception("base_prompt is required") + + if not text: + raise Exception("text is required") + + if not swagger_url: + raise Exception("swagger_url is required") + + # Check if swagger file exists in MongoDB + swagger_doc = mongo.swagger_files.find_one({"_id": swagger_url}) + + if swagger_doc: + swagger_doc["_id"] = str(swagger_doc["_id"]) + swagger_text = swagger_doc + else: + if swagger_url.startswith("https://"): + pass + else: + swagger_url = shared_folder + swagger_url + + print(f"swagger_url::{swagger_url}") + + if swagger_url.startswith("https://"): + response = requests.get(swagger_url) + if response.status_code == 200: + swagger_text = response.text + else: + raise Exception("Failed to fetch Swagger content") + else: + try: + with open(swagger_url, "r") as file: + swagger_text = file.read() + except FileNotFoundError: + raise Exception("File not found") + + swagger_json = json.loads(swagger_text) + swagger_json["bot_id"] = swagger_url.replace(shared_folder, "") + mongo.swagger_files.update_one( + {"bot_id": swagger_json["bot_id"]}, {"$set": swagger_json}, True + ) + + try: + if hasMultipleIntents(text): + return run_workflow( + WorkflowData(text, swagger_text, headers, server_base_url) + ) + except Exception as e: + print(e) + + swagger_spec = OpenAPISpec.from_text(swagger_text) + + try: + json_output = try_to_match_and_call_api_endpoint(swagger_spec, text, headers) + except Exception as e: + logging.error(f"Failed to call or map API endpoint: {str(e)}") + logging.error("Exception traceback:\n" + traceback.format_exc()) + json_output = None + + llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0) + + if json_output is None: + prompt_msgs = non_api_base_prompt(base_prompt, text) + else: + prompt_msgs = api_base_prompt(base_prompt, text, json_output) + + prompt = ChatPromptTemplate(messages=prompt_msgs) + chain = create_structured_output_chain(AiResponseFormat, llm, prompt, verbose=False) + return chain.run(question=text).dict() diff --git a/llm-server/routes/workflow/generate_openapi_payload.py b/llm-server/routes/workflow/generate_openapi_payload.py index e80ebd2b5..6ed9f85cd 100644 --- a/llm-server/routes/workflow/generate_openapi_payload.py +++ b/llm-server/routes/workflow/generate_openapi_payload.py @@ -4,7 +4,6 @@ from langchain.tools.json.tool import JsonSpec from utils.get_llm import get_llm from dotenv import load_dotenv -from routes.workflow.load_openapi_spec import load_openapi_spec from routes.workflow.extractors.extract_body import extractBodyFromSchema from routes.workflow.extractors.extract_param import extractParamsFromSchema from routes.workflow.extractors.hydrate_params import ( @@ -153,7 +152,7 @@ def extract_json_payload(input_string: str) -> Optional[Any]: def generate_openapi_payload( - spec_source: str, text: str, _operation_id: str, prev_api_response: str + swagger_text: str, text: str, _operation_id: str, prev_api_response: str ) -> Dict[str, Any]: """Generates an API request payload based on an OpenAPI spec. Args: @@ -173,7 +172,7 @@ def generate_openapi_payload( """ params: Optional[JsonData] = {} body: Optional[Dict[str, Any]] = {} - spec_dict: Dict[str, Any] = load_openapi_spec(spec_source) + spec_dict: Dict[str, Any] = json.loads(swagger_text) # extracted_feature = extract_feature_from_user_query(text) # Continue with the rest of the code diff --git a/llm-server/routes/workflow/hierarchical_planner.py b/llm-server/routes/workflow/hierarchical_planner.py index 6162b2cf0..8e44da762 100644 --- a/llm-server/routes/workflow/hierarchical_planner.py +++ b/llm-server/routes/workflow/hierarchical_planner.py @@ -1,12 +1,12 @@ from typing import Dict, Any from langchain.agents.agent_toolkits.openapi.spec import reduce_openapi_spec -from routes.workflow.load_openapi_spec import load_openapi_spec from langchain.requests import RequestsWrapper from langchain.llms.openai import OpenAI # from langchain.agents.agent_toolkits.openapi import planner # This is a custom planner, because of issue in langchains current implementation of planner, we will track this from api_caller import planner +import json import os @@ -15,10 +15,10 @@ def create_and_run_openapi_agent( - spec_path: str, user_query: str, headers: Dict[str, str] = {} + swagger_text: str, user_query: str, headers: Dict[str, str] = {} ) -> Any: # Load OpenAPI spec - raw_spec = load_openapi_spec(spec_path) + raw_spec = json.loads(swagger_text) spec = reduce_openapi_spec(raw_spec) # Create RequestsWrapper with auth diff --git a/llm-server/routes/workflow/load_openapi_spec.py b/llm-server/routes/workflow/load_openapi_spec.py deleted file mode 100644 index 625149335..000000000 --- a/llm-server/routes/workflow/load_openapi_spec.py +++ /dev/null @@ -1,47 +0,0 @@ -import yaml -import json -import requests -import os -from typing import Any, Dict, Union - - -def load_openapi_spec(spec_source: Union[str, Dict[str, Any]]) -> Any: - if isinstance(spec_source, str): - if spec_source.startswith(("http://", "https://")): - return load_spec_from_url(spec_source) - else: - return load_spec_from_file(spec_source) - elif isinstance(spec_source, dict): - return spec_source - else: - raise ValueError( - "Unsupported spec_source type. It should be a URL, file path, or dictionary." - ) - - -def load_spec_from_url(url: str) -> Any: - response = requests.get(url) - if response.status_code == 200: - content_type = response.headers.get("content-type", "").lower() - if "json" in content_type: - return json.loads(response.text) - elif "yaml" in content_type: - return yaml.load(response.text, Loader=yaml.FullLoader) - elif "text/plain" in content_type: - return yaml.load(response.text, Loader=yaml.FullLoader) - else: - raise Exception(f"Unsupported content type in response: {content_type}") - else: - raise Exception(f"Failed to fetch OpenAPI spec from URL: {url}") - - -def load_spec_from_file(file_path: str) -> Any: - file_extension = os.path.splitext(file_path)[1].lower() - if file_extension == ".json": - with open(file_path, "r") as file: - return json.load(file) - elif file_extension in (".yaml", ".yml"): - with open(file_path, "r") as file: - return yaml.load(file, Loader=yaml.FullLoader) - else: - raise ValueError(f"Unsupported file format: {file_extension}") diff --git a/llm-server/routes/workflow/typings/run_workflow_input.py b/llm-server/routes/workflow/typings/run_workflow_input.py index 9c2ed50f7..71aa23596 100644 --- a/llm-server/routes/workflow/typings/run_workflow_input.py +++ b/llm-server/routes/workflow/typings/run_workflow_input.py @@ -3,9 +3,13 @@ class WorkflowData: def __init__( - self, text: str, swagger_url: str, headers: Dict[str, str], server_base_url: str + self, + text: str, + swagger_text: str, + headers: Dict[str, str], + server_base_url: str, ) -> None: self.text = text - self.swagger_url = swagger_url + self.swagger_text = swagger_text self.headers = headers self.server_base_url = server_base_url diff --git a/llm-server/routes/workflow/workflow_service.py b/llm-server/routes/workflow/workflow_service.py index d49d83c54..352c6e2bd 100644 --- a/llm-server/routes/workflow/workflow_service.py +++ b/llm-server/routes/workflow/workflow_service.py @@ -2,10 +2,7 @@ from utils.db import Database from utils.vector_db.get_vector_store import get_vector_store from utils.vector_db.store_options import StoreOptions -from routes.workflow.generate_openapi_payload import ( - generate_openapi_payload, - load_openapi_spec, -) +from routes.workflow.generate_openapi_payload import generate_openapi_payload from utils.make_api_call import make_api_request from routes.workflow.typings.run_workflow_input import WorkflowData from langchain.tools.json.tool import JsonSpec @@ -43,7 +40,7 @@ def get_valid_url( def run_workflow(data: WorkflowData) -> Any: text = data.text - swagger_src = data.swagger_url + swagger_text = data.swagger_text headers = data.headers or {} # This will come from the request payload later on when implementing multi-tenancy namespace = "workflows" @@ -68,7 +65,7 @@ def run_workflow(data: WorkflowData) -> Any: record = mongo.workflows.find_one({"_id": first_document_id}) result = run_openapi_operations( - record, swagger_src, text, headers, server_base_url + record, swagger_text, text, headers, server_base_url ) return result @@ -77,13 +74,13 @@ def run_workflow(data: WorkflowData) -> Any: print(f"Error fetching data from namespace '{namespace}': {str(e)}") # Call openapi spec even if an error occurred with Qdrant - result = create_and_run_openapi_agent(swagger_src, text, headers) + result = create_and_run_openapi_agent(swagger_text, text, headers) return {"response": result} def run_openapi_operations( record: Any, - swagger_src: str, + swagger_text: str, text: str, headers: Any, server_base_url: str, @@ -94,7 +91,7 @@ def run_openapi_operations( for step in flow.get("steps"): operation_id = step.get("open_api_operation_id") api_payload = generate_openapi_payload( - swagger_src, text, operation_id, prev_api_response + swagger_text, text, operation_id, prev_api_response ) api_payload["path"] = get_valid_url(api_payload, server_base_url) diff --git a/llm-server/utils/detect_multiple_intents.py b/llm-server/utils/detect_multiple_intents.py index 2449e8272..df8290912 100644 --- a/llm-server/utils/detect_multiple_intents.py +++ b/llm-server/utils/detect_multiple_intents.py @@ -1,8 +1,5 @@ import re -from routes.workflow.generate_openapi_payload import ( - load_openapi_spec, -) from routes.workflow.typings.run_workflow_input import WorkflowData from langchain.tools.json.tool import JsonSpec from typing import List @@ -11,6 +8,7 @@ from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from utils.get_llm import get_llm +import json # use spaCy or BERT for more accurate results @@ -50,13 +48,13 @@ def hasMultipleIntents(user_input: str) -> bool: # print(json.dumps(result, indent=2)) -def getSummaries(spec_source: str): +def getSummaries(swagger_text: str): """Get API endpoint summaries from an OpenAPI spec.""" summaries: List[str] = [] # Load the OpenAPI spec - spec_dict: Optional[Dict[str, Any]] = load_openapi_spec(spec_source) + spec_dict: Optional[Dict[str, Any]] = json.loads(swagger_text) if not spec_dict: raise ValueError("Unable to load OpenAPI spec") @@ -76,8 +74,8 @@ def getSummaries(spec_source: str): return summaries -def hasSingleIntent(spec_source: str, user_requirement: str) -> bool: - summaries = getSummaries(spec_source) +def hasSingleIntent(swagger_text: str, user_requirement: str) -> bool: + summaries = getSummaries(swagger_text) _DEFAULT_TEMPLATE = """ User: Here is a list of API summaries: {summaries} From 3757280ab33176a41bfc9b4d12a5271dd7b30f8f Mon Sep 17 00:00:00 2001 From: Ahmad Hassan <76843311+ah7255703@users.noreply.github.com> Date: Thu, 28 Sep 2023 23:22:27 +0300 Subject: [PATCH 2/2] CI patch --- .github/workflows/build-widget.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-widget.yml b/.github/workflows/build-widget.yml index 309b4c901..93d807afc 100644 --- a/.github/workflows/build-widget.yml +++ b/.github/workflows/build-widget.yml @@ -14,6 +14,9 @@ jobs: with: node-version: 18 + - name: install pnpm + run: npm install -g pnpm + - name: Install dependencies run: cd copilot-widget/ && pnpm install