diff --git a/e2e/basic.spec.ts b/e2e/basic.spec.ts index 93e3edfb9..f2d0d6ac6 100644 --- a/e2e/basic.spec.ts +++ b/e2e/basic.spec.ts @@ -12,7 +12,7 @@ import type { } from "../helpers"; import { createTestDir, runCreateLlama, type AppType } from "./utils"; -const templateTypes: TemplateType[] = ["streaming", "simple"]; +const templateTypes: TemplateType[] = ["streaming"]; const templateFrameworks: TemplateFramework[] = [ "nextjs", "express", @@ -30,20 +30,8 @@ for (const templateType of templateTypes) { for (const templateEngine of templateEngines) { for (const templateUI of templateUIs) { for (const templatePostInstallAction of templatePostInstallActions) { - if (templateFramework === "nextjs" && templateType === "simple") { - // nextjs doesn't support simple templates - skip tests - continue; - } const appType: AppType = - templateFramework === "express" || templateFramework === "fastapi" - ? templateType === "simple" - ? "--no-frontend" // simple templates don't have frontends - : "--frontend" - : ""; - if (appType === "--no-frontend" && templateUI !== "html") { - // if there's no frontend, don't iterate over UIs - continue; - } + templateFramework === "nextjs" ? "" : "--frontend"; test.describe(`try create-llama ${templateType} ${templateFramework} ${templateEngine} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => { let port: number; let externalPort: number; @@ -79,7 +67,6 @@ for (const templateType of templateTypes) { }); test("Frontend should have a title", async ({ page }) => { test.skip(templatePostInstallAction !== "runApp"); - test.skip(appType === "--no-frontend"); await page.goto(`http://localhost:${port}`); await expect(page.getByText("Built by LlamaIndex")).toBeVisible(); }); @@ -88,7 +75,6 @@ for (const templateType of templateTypes) { page, }) => { test.skip(templatePostInstallAction !== "runApp"); - test.skip(appType === "--no-frontend"); await page.goto(`http://localhost:${port}`); await page.fill("form input", "hello"); const [response] = await Promise.all([ @@ -109,14 +95,13 @@ for (const templateType of templateTypes) { expect(response.ok()).toBeTruthy(); }); - test("Backend should response when calling API", async ({ + test("Backend frameworks should response when calling non-streaming chat API", async ({ request, }) => { test.skip(templatePostInstallAction !== "runApp"); - test.skip(appType !== "--no-frontend"); - const backendPort = appType === "" ? port : externalPort; + test.skip(templateFramework === "nextjs"); const response = await request.post( - `http://localhost:${backendPort}/api/chat`, + `http://localhost:${externalPort}/api/chat/request`, { data: { messages: [ diff --git a/helpers/types.ts b/helpers/types.ts index 0c5a30f05..0bed9a6db 100644 --- a/helpers/types.ts +++ b/helpers/types.ts @@ -1,7 +1,7 @@ import { PackageManager } from "../helpers/get-pkg-manager"; import { Tool } from "./tools"; -export type TemplateType = "simple" | "streaming" | "community" | "llamapack"; +export type TemplateType = "streaming" | "community" | "llamapack"; export type TemplateFramework = "nextjs" | "express" | "fastapi"; export type TemplateEngine = "simple" | "context"; export type TemplateUI = "html" | "shadcn"; diff --git a/questions.ts b/questions.ts index 6fc06da0d..6cec0a6e8 100644 --- a/questions.ts +++ b/questions.ts @@ -380,8 +380,7 @@ export const askQuestions = async ( name: "template", message: "Which template would you like to use?", choices: [ - { title: "Chat without streaming", value: "simple" }, - { title: "Chat with streaming", value: "streaming" }, + { title: "Chat", value: "streaming" }, { title: `Community template from ${styledRepo}`, value: "community", @@ -450,13 +449,10 @@ export const askQuestions = async ( program.framework = getPrefOrDefault("framework"); } else { const choices = [ + { title: "NextJS", value: "nextjs" }, { title: "Express", value: "express" }, { title: "FastAPI (Python)", value: "fastapi" }, ]; - if (program.template === "streaming") { - // allow NextJS only for streaming template - choices.unshift({ title: "NextJS", value: "nextjs" }); - } const { framework } = await prompts( { @@ -473,10 +469,7 @@ export const askQuestions = async ( } } - if ( - program.template === "streaming" && - (program.framework === "express" || program.framework === "fastapi") - ) { + if (program.framework === "express" || program.framework === "fastapi") { // if a backend-only framework is selected, ask whether we should create a frontend // (only for streaming backends) if (program.frontend === undefined) { diff --git a/templates/types/simple/express/README-template.md b/templates/types/simple/express/README-template.md deleted file mode 100644 index a596bc067..000000000 --- a/templates/types/simple/express/README-template.md +++ /dev/null @@ -1,56 +0,0 @@ -This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Express](https://expressjs.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama). - -## Getting Started - -First, install the dependencies: - -``` -npm install -``` - -Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step): - -``` -npm run generate -``` - -Third, run the development server: - -``` -npm run dev -``` - -Then call the express API endpoint `/api/chat` to see the result: - -``` -curl --location 'localhost:8000/api/chat' \ ---header 'Content-Type: application/json' \ ---data '{ "messages": [{ "role": "user", "content": "Hello" }] }' -``` - -You can start editing the API by modifying `src/controllers/chat.controller.ts`. The endpoint auto-updates as you save the file. - -## Production - -First, build the project: - -``` -npm run build -``` - -You can then run the production server: - -``` -NODE_ENV=production npm run start -``` - -> Note that the `NODE_ENV` environment variable is set to `production`. This disables CORS for all origins. - -## Learn More - -To learn more about LlamaIndex, take a look at the following resources: - -- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features). -- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features). - -You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome! diff --git a/templates/types/simple/express/eslintrc.json b/templates/types/simple/express/eslintrc.json deleted file mode 100644 index cf20cdc7a..000000000 --- a/templates/types/simple/express/eslintrc.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "eslint:recommended" -} diff --git a/templates/types/simple/express/gitignore b/templates/types/simple/express/gitignore deleted file mode 100644 index 7d5e30fc2..000000000 --- a/templates/types/simple/express/gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# local env files -.env -node_modules/ \ No newline at end of file diff --git a/templates/types/simple/express/index.ts b/templates/types/simple/express/index.ts deleted file mode 100644 index 150dbf598..000000000 --- a/templates/types/simple/express/index.ts +++ /dev/null @@ -1,44 +0,0 @@ -/* eslint-disable turbo/no-undeclared-env-vars */ -import cors from "cors"; -import "dotenv/config"; -import express, { Express, Request, Response } from "express"; -import { initObservability } from "./src/observability"; -import chatRouter from "./src/routes/chat.route"; - -const app: Express = express(); -const port = parseInt(process.env.PORT || "8000"); - -const env = process.env["NODE_ENV"]; -const isDevelopment = !env || env === "development"; -const prodCorsOrigin = process.env["PROD_CORS_ORIGIN"]; - -initObservability(); - -app.use(express.json()); - -if (isDevelopment) { - console.warn("Running in development mode - allowing CORS for all origins"); - app.use(cors()); -} else if (prodCorsOrigin) { - console.log( - `Running in production mode - allowing CORS for domain: ${prodCorsOrigin}`, - ); - const corsOptions = { - origin: prodCorsOrigin, // Restrict to production domain - }; - app.use(cors(corsOptions)); -} else { - console.warn("Production CORS origin not set, defaulting to no CORS."); -} - -app.use(express.text()); - -app.get("/", (req: Request, res: Response) => { - res.send("LlamaIndex Express Server"); -}); - -app.use("/api/chat", chatRouter); - -app.listen(port, () => { - console.log(`⚡️[server]: Server is running at http://localhost:${port}`); -}); diff --git a/templates/types/simple/express/package.json b/templates/types/simple/express/package.json deleted file mode 100644 index eef99bac1..000000000 --- a/templates/types/simple/express/package.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "name": "llama-index-express", - "version": "1.0.0", - "main": "dist/index.js", - "type": "module", - "scripts": { - "build": "tsup index.ts --format esm --dts", - "start": "node dist/index.js", - "dev": "concurrently \"tsup index.ts --format esm --dts --watch\" \"nodemon -q dist/index.js\"" - }, - "dependencies": { - "cors": "^2.8.5", - "dotenv": "^16.3.1", - "express": "^4.18.2", - "llamaindex": "latest" - }, - "devDependencies": { - "@types/cors": "^2.8.17", - "@types/express": "^4.17.21", - "@types/node": "^20.9.5", - "concurrently": "^8.2.2", - "eslint": "^8.54.0", - "nodemon": "^3.0.1", - "tsup": "^7.3.0", - "typescript": "^5.3.2" - } -} diff --git a/templates/types/simple/express/src/controllers/engine/chat.ts b/templates/types/simple/express/src/controllers/engine/chat.ts deleted file mode 100644 index abb02e90c..000000000 --- a/templates/types/simple/express/src/controllers/engine/chat.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { LLM, SimpleChatEngine } from "llamaindex"; - -export async function createChatEngine(llm: LLM) { - return new SimpleChatEngine({ - llm, - }); -} diff --git a/templates/types/simple/express/src/observability/index.ts b/templates/types/simple/express/src/observability/index.ts deleted file mode 100644 index 2e4ce2b18..000000000 --- a/templates/types/simple/express/src/observability/index.ts +++ /dev/null @@ -1 +0,0 @@ -export const initObservability = () => {}; diff --git a/templates/types/simple/express/src/routes/chat.route.ts b/templates/types/simple/express/src/routes/chat.route.ts deleted file mode 100644 index bdfeb0853..000000000 --- a/templates/types/simple/express/src/routes/chat.route.ts +++ /dev/null @@ -1,8 +0,0 @@ -import express from "express"; -import { chat } from "../controllers/chat.controller"; - -const llmRouter = express.Router(); - -llmRouter.route("/").post(chat); - -export default llmRouter; diff --git a/templates/types/simple/express/tsconfig.json b/templates/types/simple/express/tsconfig.json deleted file mode 100644 index bc819cab4..000000000 --- a/templates/types/simple/express/tsconfig.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "compilerOptions": { - "target": "es2016", - "esModuleInterop": true, - "forceConsistentCasingInFileNames": true, - "strict": true, - "skipLibCheck": true, - "moduleResolution": "node" - } -} diff --git a/templates/types/simple/fastapi/app/__init__.py b/templates/types/simple/fastapi/app/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/types/simple/fastapi/app/api/__init__.py b/templates/types/simple/fastapi/app/api/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/types/simple/fastapi/app/api/routers/__init__.py b/templates/types/simple/fastapi/app/api/routers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/types/simple/fastapi/app/api/routers/chat.py b/templates/types/simple/fastapi/app/api/routers/chat.py deleted file mode 100644 index 8405f2ac5..000000000 --- a/templates/types/simple/fastapi/app/api/routers/chat.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List -from pydantic import BaseModel -from fastapi import APIRouter, Depends, HTTPException, status -from llama_index.core.chat_engine.types import BaseChatEngine -from llama_index.core.llms import ChatMessage, MessageRole -from app.engine import get_chat_engine - -chat_router = r = APIRouter() - - -class _Message(BaseModel): - role: MessageRole - content: str - - -class _ChatData(BaseModel): - messages: List[_Message] - - -class _Result(BaseModel): - result: _Message - - -@r.post("") -async def chat( - data: _ChatData, - chat_engine: BaseChatEngine = Depends(get_chat_engine), -) -> _Result: - # check preconditions and get last message - if len(data.messages) == 0: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="No messages provided", - ) - lastMessage = data.messages.pop() - if lastMessage.role != MessageRole.USER: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Last message must be from user", - ) - # convert messages coming from the request to type ChatMessage - messages = [ - ChatMessage( - role=m.role, - content=m.content, - ) - for m in data.messages - ] - - # query chat engine - response = await chat_engine.achat(lastMessage.content, messages) - return _Result( - result=_Message(role=MessageRole.ASSISTANT, content=response.response) - ) diff --git a/templates/types/simple/fastapi/app/engine/__init__.py b/templates/types/simple/fastapi/app/engine/__init__.py deleted file mode 100644 index fd8bb96a8..000000000 --- a/templates/types/simple/fastapi/app/engine/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from llama_index.core.chat_engine import SimpleChatEngine - - -def get_chat_engine(): - return SimpleChatEngine.from_defaults() diff --git a/templates/types/simple/fastapi/app/settings.py b/templates/types/simple/fastapi/app/settings.py deleted file mode 100644 index 3f2c5e078..000000000 --- a/templates/types/simple/fastapi/app/settings.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -from typing import Dict -from llama_index.core.settings import Settings -from llama_index.llms.openai import OpenAI -from llama_index.embeddings.openai import OpenAIEmbedding - - -def llm_config_from_env() -> Dict: - from llama_index.core.constants import DEFAULT_TEMPERATURE - - model = os.getenv("MODEL") - temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - max_tokens = os.getenv("LLM_MAX_TOKENS") - - config = { - "model": model, - "temperature": float(temperature), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - return config - - -def embedding_config_from_env() -> Dict: - model = os.getenv("EMBEDDING_MODEL") - dimension = os.getenv("EMBEDDING_DIM") - - config = { - "model": model, - "dimension": int(dimension) if dimension is not None else None, - } - return config - - -def init_settings(): - llm_configs = llm_config_from_env() - embedding_configs = embedding_config_from_env() - - Settings.llm = OpenAI(**llm_configs) - Settings.embed_model = OpenAIEmbedding(**embedding_configs) - Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024")) - Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20")) diff --git a/templates/types/simple/fastapi/gitignore b/templates/types/simple/fastapi/gitignore deleted file mode 100644 index a6ad564cd..000000000 --- a/templates/types/simple/fastapi/gitignore +++ /dev/null @@ -1,3 +0,0 @@ -__pycache__ -storage -.env diff --git a/templates/types/simple/fastapi/main.py b/templates/types/simple/fastapi/main.py deleted file mode 100644 index 41721a8f2..000000000 --- a/templates/types/simple/fastapi/main.py +++ /dev/null @@ -1,39 +0,0 @@ -from dotenv import load_dotenv - -load_dotenv() - -import logging -import os -import uvicorn -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware -from app.api.routers.chat import chat_router -from app.settings import init_settings - -app = FastAPI() - -init_settings() - -environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set - - -if environment == "dev": - logger = logging.getLogger("uvicorn") - logger.warning("Running in development mode - allowing CORS for all origins") - app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) - -app.include_router(chat_router, prefix="/api/chat") - - -if __name__ == "__main__": - app_host = os.getenv("APP_HOST", "0.0.0.0") - app_port = int(os.getenv("APP_PORT", "8000")) - reload = True if environment == "dev" else False - - uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload) diff --git a/templates/types/simple/fastapi/pyproject.toml b/templates/types/simple/fastapi/pyproject.toml deleted file mode 100644 index 5dae9d818..000000000 --- a/templates/types/simple/fastapi/pyproject.toml +++ /dev/null @@ -1,19 +0,0 @@ -[tool.poetry] -name = "app" -version = "0.1.0" -description = "" -authors = ["Marcus Schiesser "] -readme = "README.md" - -[tool.poetry.dependencies] -python = "^3.11,<3.12" -fastapi = "^0.109.1" -uvicorn = { extras = ["standard"], version = "^0.23.2" } -python-dotenv = "^1.0.0" -llama-index = "0.10.15" -llama-index-core = "0.10.15" -llama-index-agent-openai = "0.1.5" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/templates/types/simple/fastapi/tests/__init__.py b/templates/types/simple/fastapi/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/templates/types/streaming/express/README-template.md b/templates/types/streaming/express/README-template.md index 452069d2c..62a5f2485 100644 --- a/templates/types/streaming/express/README-template.md +++ b/templates/types/streaming/express/README-template.md @@ -20,15 +20,29 @@ Third, run the development server: npm run dev ``` -Then call the express API endpoint `/api/chat` to see the result: +The example provides two different API endpoints: + +1. `/api/chat` - a streaming chat endpoint (found in `src/controllers/chat.controller.ts`) +2. `/api/chat/request` - a non-streaming chat endpoint (found in `src/controllers/chat-request.controller.ts`) + +You can test the streaming endpoint with the following curl request: ``` curl --location 'localhost:8000/api/chat' \ ---header 'Content-Type: text/plain' \ +--header 'Content-Type: application/json' \ +--data '{ "messages": [{ "role": "user", "content": "Hello" }] }' +``` + +And for the non-streaming endpoint run: + +``` +curl --location 'localhost:8000/api/chat/request' \ +--header 'Content-Type: application/json' \ --data '{ "messages": [{ "role": "user", "content": "Hello" }] }' ``` -You can start editing the API by modifying `src/controllers/chat.controller.ts`. The endpoint auto-updates as you save the file. +You can start editing the API by modifying `src/controllers/chat.controller.ts` or `src/controllers/chat-request.controller.ts`. The endpoint auto-updates as you save the file. +You can delete the endpoint that you're not using. ## Production diff --git a/templates/types/simple/express/src/controllers/chat.controller.ts b/templates/types/streaming/express/src/controllers/chat-request.controller.ts similarity index 94% rename from templates/types/simple/express/src/controllers/chat.controller.ts rename to templates/types/streaming/express/src/controllers/chat-request.controller.ts index 5fdb88255..b81b59997 100644 --- a/templates/types/simple/express/src/controllers/chat.controller.ts +++ b/templates/types/streaming/express/src/controllers/chat-request.controller.ts @@ -21,7 +21,7 @@ const convertMessageContent = ( ]; }; -export const chat = async (req: Request, res: Response) => { +export const chatRequest = async (req: Request, res: Response) => { try { const { messages, data }: { messages: ChatMessage[]; data: any } = req.body; const userMessage = messages.pop(); @@ -48,7 +48,7 @@ export const chat = async (req: Request, res: Response) => { // Calling LlamaIndex's ChatEngine to get a response const response = await chatEngine.chat({ message: userMessageContent, - messages, + chatHistory: messages, }); const result: ChatMessage = { role: "assistant", diff --git a/templates/types/streaming/express/src/routes/chat.route.ts b/templates/types/streaming/express/src/routes/chat.route.ts index bdfeb0853..f935980c1 100644 --- a/templates/types/streaming/express/src/routes/chat.route.ts +++ b/templates/types/streaming/express/src/routes/chat.route.ts @@ -1,8 +1,10 @@ import express from "express"; +import { chatRequest } from "../controllers/chat-request.controller"; import { chat } from "../controllers/chat.controller"; const llmRouter = express.Router(); llmRouter.route("/").post(chat); +llmRouter.route("/request").post(chatRequest); export default llmRouter; diff --git a/templates/types/streaming/fastapi/README-template.md b/templates/types/streaming/fastapi/README-template.md index 2bea76831..ca7e0b339 100644 --- a/templates/types/streaming/fastapi/README-template.md +++ b/templates/types/streaming/fastapi/README-template.md @@ -11,7 +11,7 @@ poetry install poetry shell ``` -By default, we use the OpenAI LLM (though you can customize, see `app/settings.py`). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory. +By default, we use the OpenAI LLM (though you can customize, see `app/settings.py`). As a result, you need to specify an `OPENAI_API_KEY` in an .env file in this directory. Example `.env` file: @@ -33,7 +33,12 @@ Third, run the development server: python main.py ``` -Then call the API endpoint `/api/chat` to see the result: +The example provides two different API endpoints: + +1. `/api/chat` - a streaming chat endpoint +2. `/api/chat/request` - a non-streaming chat endpoint + +You can test the streaming endpoint with the following curl request: ``` curl --location 'localhost:8000/api/chat' \ @@ -41,7 +46,15 @@ curl --location 'localhost:8000/api/chat' \ --data '{ "messages": [{ "role": "user", "content": "Hello" }] }' ``` -You can start editing the API by modifying `app/api/routers/chat.py`. The endpoint auto-updates as you save the file. +And for the non-streaming endpoint run: + +``` +curl --location 'localhost:8000/api/chat/request' \ +--header 'Content-Type: application/json' \ +--data '{ "messages": [{ "role": "user", "content": "Hello" }] }' +``` + +You can start editing the API endpoints by modifying `app/api/routers/chat.py`. The endpoints auto-update as you save the file. You can delete the endpoint you're not using. Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API. diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py index 278a9a753..2ef7ff1f7 100644 --- a/templates/types/streaming/fastapi/app/api/routers/chat.py +++ b/templates/types/streaming/fastapi/app/api/routers/chat.py @@ -5,6 +5,7 @@ from llama_index.core.chat_engine.types import BaseChatEngine from llama_index.core.llms import ChatMessage, MessageRole from app.engine import get_chat_engine +from typing import List, Tuple chat_router = r = APIRouter() @@ -18,20 +19,19 @@ class _ChatData(BaseModel): messages: List[_Message] -@r.post("") -async def chat( - request: Request, - data: _ChatData, - chat_engine: BaseChatEngine = Depends(get_chat_engine), -): +class _Result(BaseModel): + result: _Message + + +async def parse_chat_data(data: _ChatData) -> Tuple[str, List[ChatMessage]]: # check preconditions and get last message if len(data.messages) == 0: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="No messages provided", ) - lastMessage = data.messages.pop() - if lastMessage.role != MessageRole.USER: + last_message = data.messages.pop() + if last_message.role != MessageRole.USER: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Last message must be from user", @@ -44,16 +44,38 @@ async def chat( ) for m in data.messages ] + return last_message.content, messages - # query chat engine - response = await chat_engine.astream_chat(lastMessage.content, messages) - # stream response +# streaming endpoint - delete if not needed +@r.post("") +async def chat( + request: Request, + data: _ChatData, + chat_engine: BaseChatEngine = Depends(get_chat_engine), +): + last_message_content, messages = await parse_chat_data(data) + + response = await chat_engine.astream_chat(last_message_content, messages) + async def event_generator(): async for token in response.async_response_gen(): - # If client closes connection, stop sending events if await request.is_disconnected(): break yield token return StreamingResponse(event_generator(), media_type="text/plain") + + +# non-streaming endpoint - delete if not needed +@r.post("/request") +async def chat_request( + data: _ChatData, + chat_engine: BaseChatEngine = Depends(get_chat_engine), +) -> _Result: + last_message_content, messages = await parse_chat_data(data) + + response = await chat_engine.achat(last_message_content, messages) + return _Result( + result=_Message(role=MessageRole.ASSISTANT, content=response.response) + )