Skip to content

Commit

Permalink
Fixed textract errors; Added graceful shutdown; changed default model…
Browse files Browse the repository at this point in the history
…s; Fixed startup event deprecation in FastAPI
  • Loading branch information
Dicklesworthstone authored Apr 4, 2024
1 parent ee770c1 commit ff85086
Showing 1 changed file with 41 additions and 21 deletions.
62 changes: 41 additions & 21 deletions swiss_army_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import glob
import json
import os
import signal
import re
import tempfile
import traceback
Expand All @@ -33,6 +34,7 @@
from fastapi.param_functions import Body
from fastapi import FastAPI, HTTPException, Request, UploadFile, File, Depends, Form
from fastapi.responses import JSONResponse, FileResponse, HTMLResponse, Response
from contextlib import asynccontextmanager
from sqlalchemy import select
from sqlalchemy import text as sql_text
from sqlalchemy.exc import SQLAlchemyError
Expand All @@ -46,6 +48,21 @@
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
logger = setup_logger()

class GracefulExit(BaseException):
pass

def raise_graceful_exit():
raise GracefulExit()

@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup code
await initialize_globals()
yield
# Shutdown code (if any)
pass


# Note: the Ramdisk setup and teardown requires sudo; to enable password-less sudo, edit your sudoers file with `sudo visudo`.
# Add the following lines, replacing username with your actual username
# username ALL=(ALL) NOPASSWD: /bin/mount -t tmpfs -o size=*G tmpfs /mnt/ramdisk
Expand All @@ -58,7 +75,7 @@
USE_SECURITY_TOKEN = config("USE_SECURITY_TOKEN", default=False, cast=bool)
else:
USE_SECURITY_TOKEN = False
DEFAULT_MODEL_NAME = config("DEFAULT_MODEL_NAME", default="yarn-mistral-7b-128k", cast=str)
DEFAULT_MODEL_NAME = config("DEFAULT_MODEL_NAME", default="mistral-7b-instruct-v0.2", cast=str)
USE_RAMDISK = config("USE_RAMDISK", default=False, cast=bool)
RAMDISK_PATH = config("RAMDISK_PATH", default="/mnt/ramdisk", cast=str)
BASE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -68,9 +85,9 @@
description_string = """
🇨🇭🎖️🦙 Swiss Army Llama is your One-Stop-Shop to Quickly and Conveniently Integrate Powerful Local LLM Functionality into your Project via a REST API.
"""
app = FastAPI(title="Swiss Army Llama", description=description_string, docs_url="/") # Set the Swagger UI to root

app = FastAPI(title="Swiss Army Llama", description=description_string, docs_url="/", lifespan=lifespan) # Set the Swagger UI to root


@app.exception_handler(SQLAlchemyError)
async def sqlalchemy_exception_handler(request: Request, exc: SQLAlchemyError) -> JSONResponse:
logger.exception(exc)
Expand Down Expand Up @@ -100,7 +117,7 @@ async def custom_swagger_ui_html():
### Example Response:
```json
{
"model_names": ["yarn-llama-2-7b-128k", "openchat_v3.2_super", "yarn-mistral-7b-128k", "my_super_custom_model"]
"model_names": ["yarn-llama-2-7b-128k", "Hermes-2-Pro-Mistral-7B", "mistral-7b-instruct-v0.2", "my_super_custom_model"]
}
```""",
response_description="A JSON object containing the list of available model names.")
Expand Down Expand Up @@ -277,7 +294,7 @@ async def add_new_model(model_url: str, token: str = None) -> Dict[str, Any]:
```json
{
"text": "This is a sample text.",
"llm_model_name": "openchat_v3.2_super"
"llm_model_name": "Hermes-2-Pro-Mistral-7B"
}
```
Expand Down Expand Up @@ -332,7 +349,7 @@ async def get_embedding_vector_for_string(request: EmbeddingRequest, req: Reques
```json
{
"text": "This is a sample text.",
"llm_model_name": "openchat_v3.2_super"
"llm_model_name": "Hermes-2-Pro-Mistral-7B"
}
```
Expand Down Expand Up @@ -485,7 +502,7 @@ async def get_token_level_embeddings_matrix_and_combined_feature_vector_for_stri
{
"text1": "This is a sample text.",
"text2": "This is another sample text.",
"llm_model_name": "openchat_v3.2_super",
"llm_model_name": "Hermes-2-Pro-Mistral-7B",
"similarity_measure": "all"
}
```""")
Expand Down Expand Up @@ -562,7 +579,7 @@ async def compute_similarity_between_strings(request: SimilarityRequest, req: Re
```json
{
"query_text": "Find me the most similar string!",
"llm_model_name": "openchat_v3.2_super",
"llm_model_name": "Hermes-2-Pro-Mistral-7B",
"number_of_most_similar_strings_to_return": 5
}
```
Expand Down Expand Up @@ -652,7 +669,7 @@ async def search_stored_embeddings_with_query_string_for_semantic_similarity(req
```json
{
"query_text": "Find me the most similar string!",
"llm_model_name": "openchat_v3.2_super",
"llm_model_name": "Hermes-2-Pro-Mistral-7B",
"similarity_filter_percentage": 0.02,
"number_of_most_similar_strings_to_return": 5
}
Expand Down Expand Up @@ -849,7 +866,7 @@ async def get_all_embedding_vectors_for_document(file: UploadFile = File(...),
```json
{
"input_prompt": "The Kings of France in the 17th Century:",
"llm_model_name": "yarn-mistral-7b-128k",
"llm_model_name": "mistral-7b-instruct-v0.2",
"temperature": 0.95,
"grammar_file_string": "json",
"number_of_tokens_to_generate": 500,
Expand All @@ -865,7 +882,7 @@ async def get_all_embedding_vectors_for_document(file: UploadFile = File(...),
[
{
"input_prompt": "The Kings of France in the 17th Century:",
"llm_model_name": "yarn-mistral-7b-128k",
"llm_model_name": "mistral-7b-instruct-v0.2",
"grammar_file_string": "json",
"number_of_tokens_to_generate": 500,
"number_of_completions_to_generate": 3,
Expand All @@ -875,7 +892,7 @@ async def get_all_embedding_vectors_for_document(file: UploadFile = File(...),
},
{
"input_prompt": "The Kings of France in the 17th Century:",
"llm_model_name": "yarn-mistral-7b-128k",
"llm_model_name": "mistral-7b-instruct-v0.2",
"grammar_file_string": "json",
"number_of_tokens_to_generate": 500,
"number_of_completions_to_generate": 3,
Expand All @@ -885,7 +902,7 @@ async def get_all_embedding_vectors_for_document(file: UploadFile = File(...),
},
{
"input_prompt": "The Kings of France in the 17th Century:",
"llm_model_name": "yarn-mistral-7b-128k",
"llm_model_name": "mistral-7b-instruct-v0.2",
"grammar_file_string": "json",
"number_of_tokens_to_generate": 500,
"number_of_completions_to_generate": 3,
Expand Down Expand Up @@ -1199,12 +1216,15 @@ def show_logs(minutes: int = 5):
def show_logs_default():
return show_logs_func(5)



@app.on_event("startup")
async def startup_event():
await initialize_globals()


if __name__ == "__main__":
uvicorn.run("swiss_army_llama:app", **option)
try:
uvicorn.run("swiss_army_llama:app", **option)
except GracefulExit:
logger.info("Received signal to terminate. Shutting down gracefully...")
sys.exit(0)
except KeyboardInterrupt:
logger.info("Received KeyboardInterrupt. Shutting down gracefully...")
sys.exit(0)
except Exception:
logger.exception("Unhandled exception occurred during shutdown.")
sys.exit(1)

0 comments on commit ff85086

Please sign in to comment.