-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrontapi.py
75 lines (60 loc) · 2.1 KB
/
frontapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import uvicorn
import asyncio
import json
app = FastAPI()
class ChatCompletionRequest(BaseModel):
model: str
messages: list
temperature: float = 1.0
top_p: float = 1.0
n: int = 1
stream: bool = False
stop: list = None
max_tokens: int = None
presence_penalty: float = 0.0
frequency_penalty: float = 0.0
logit_bias: dict = None
user: str = None
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
try:
# Here we would process the request, analyze the prompt,
# inject extra context, and send it to the actual LLM.
# For now, we'll just return a mock response.
messages = request.messages
prompt = messages[-1]['content'] if messages else ""
response = {
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": request.model,
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a mock response from the shared context LLM API."
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}
# Here you would save the original prompt and final output for Pinecone, etc.
return JSONResponse(content=response)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/v1/completions")
async def completions(request: Request):
# This endpoint is for text completions (non-chat model)
# Implement similar logic as chat_completions
body = await request.json()
# Process the request...
return JSONResponse(content={"text": "Mock completion response"})
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)