diff --git a/e2e/basic.spec.ts b/e2e/basic.spec.ts
index 93e3edfb9..f2d0d6ac6 100644
--- a/e2e/basic.spec.ts
+++ b/e2e/basic.spec.ts
@@ -12,7 +12,7 @@ import type {
 } from "../helpers";
 import { createTestDir, runCreateLlama, type AppType } from "./utils";
 
-const templateTypes: TemplateType[] = ["streaming", "simple"];
+const templateTypes: TemplateType[] = ["streaming"];
 const templateFrameworks: TemplateFramework[] = [
   "nextjs",
   "express",
@@ -30,20 +30,8 @@ for (const templateType of templateTypes) {
     for (const templateEngine of templateEngines) {
       for (const templateUI of templateUIs) {
         for (const templatePostInstallAction of templatePostInstallActions) {
-          if (templateFramework === "nextjs" && templateType === "simple") {
-            // nextjs doesn't support simple templates - skip tests
-            continue;
-          }
           const appType: AppType =
-            templateFramework === "express" || templateFramework === "fastapi"
-              ? templateType === "simple"
-                ? "--no-frontend" // simple templates don't have frontends
-                : "--frontend"
-              : "";
-          if (appType === "--no-frontend" && templateUI !== "html") {
-            // if there's no frontend, don't iterate over UIs
-            continue;
-          }
+            templateFramework === "nextjs" ? "" : "--frontend";
           test.describe(`try create-llama ${templateType} ${templateFramework} ${templateEngine} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
             let port: number;
             let externalPort: number;
@@ -79,7 +67,6 @@ for (const templateType of templateTypes) {
             });
             test("Frontend should have a title", async ({ page }) => {
               test.skip(templatePostInstallAction !== "runApp");
-              test.skip(appType === "--no-frontend");
               await page.goto(`http://localhost:${port}`);
               await expect(page.getByText("Built by LlamaIndex")).toBeVisible();
             });
@@ -88,7 +75,6 @@ for (const templateType of templateTypes) {
               page,
             }) => {
               test.skip(templatePostInstallAction !== "runApp");
-              test.skip(appType === "--no-frontend");
               await page.goto(`http://localhost:${port}`);
               await page.fill("form input", "hello");
               const [response] = await Promise.all([
@@ -109,14 +95,13 @@ for (const templateType of templateTypes) {
               expect(response.ok()).toBeTruthy();
             });
 
-            test("Backend should response when calling API", async ({
+            test("Backend frameworks should response when calling non-streaming chat API", async ({
               request,
             }) => {
               test.skip(templatePostInstallAction !== "runApp");
-              test.skip(appType !== "--no-frontend");
-              const backendPort = appType === "" ? port : externalPort;
+              test.skip(templateFramework === "nextjs");
               const response = await request.post(
-                `http://localhost:${backendPort}/api/chat`,
+                `http://localhost:${externalPort}/api/chat/request`,
                 {
                   data: {
                     messages: [
diff --git a/helpers/types.ts b/helpers/types.ts
index 0c5a30f05..0bed9a6db 100644
--- a/helpers/types.ts
+++ b/helpers/types.ts
@@ -1,7 +1,7 @@
 import { PackageManager } from "../helpers/get-pkg-manager";
 import { Tool } from "./tools";
 
-export type TemplateType = "simple" | "streaming" | "community" | "llamapack";
+export type TemplateType = "streaming" | "community" | "llamapack";
 export type TemplateFramework = "nextjs" | "express" | "fastapi";
 export type TemplateEngine = "simple" | "context";
 export type TemplateUI = "html" | "shadcn";
diff --git a/questions.ts b/questions.ts
index 6fc06da0d..6cec0a6e8 100644
--- a/questions.ts
+++ b/questions.ts
@@ -380,8 +380,7 @@ export const askQuestions = async (
           name: "template",
           message: "Which template would you like to use?",
           choices: [
-            { title: "Chat without streaming", value: "simple" },
-            { title: "Chat with streaming", value: "streaming" },
+            { title: "Chat", value: "streaming" },
             {
               title: `Community template from ${styledRepo}`,
               value: "community",
@@ -450,13 +449,10 @@ export const askQuestions = async (
       program.framework = getPrefOrDefault("framework");
     } else {
       const choices = [
+        { title: "NextJS", value: "nextjs" },
         { title: "Express", value: "express" },
         { title: "FastAPI (Python)", value: "fastapi" },
       ];
-      if (program.template === "streaming") {
-        // allow NextJS only for streaming template
-        choices.unshift({ title: "NextJS", value: "nextjs" });
-      }
 
       const { framework } = await prompts(
         {
@@ -473,10 +469,7 @@ export const askQuestions = async (
     }
   }
 
-  if (
-    program.template === "streaming" &&
-    (program.framework === "express" || program.framework === "fastapi")
-  ) {
+  if (program.framework === "express" || program.framework === "fastapi") {
     // if a backend-only framework is selected, ask whether we should create a frontend
     // (only for streaming backends)
     if (program.frontend === undefined) {
diff --git a/templates/types/simple/express/README-template.md b/templates/types/simple/express/README-template.md
deleted file mode 100644
index a596bc067..000000000
--- a/templates/types/simple/express/README-template.md
+++ /dev/null
@@ -1,56 +0,0 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Express](https://expressjs.com/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
-
-## Getting Started
-
-First, install the dependencies:
-
-```
-npm install
-```
-
-Second, generate the embeddings of the documents in the `./data` directory (if this folder exists - otherwise, skip this step):
-
-```
-npm run generate
-```
-
-Third, run the development server:
-
-```
-npm run dev
-```
-
-Then call the express API endpoint `/api/chat` to see the result:
-
-```
-curl --location 'localhost:8000/api/chat' \
---header 'Content-Type: application/json' \
---data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
-```
-
-You can start editing the API by modifying `src/controllers/chat.controller.ts`. The endpoint auto-updates as you save the file.
-
-## Production
-
-First, build the project:
-
-```
-npm run build
-```
-
-You can then run the production server:
-
-```
-NODE_ENV=production npm run start
-```
-
-> Note that the `NODE_ENV` environment variable is set to `production`. This disables CORS for all origins.
-
-## Learn More
-
-To learn more about LlamaIndex, take a look at the following resources:
-
-- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
-- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
-
-You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
diff --git a/templates/types/simple/express/eslintrc.json b/templates/types/simple/express/eslintrc.json
deleted file mode 100644
index cf20cdc7a..000000000
--- a/templates/types/simple/express/eslintrc.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  "extends": "eslint:recommended"
-}
diff --git a/templates/types/simple/express/gitignore b/templates/types/simple/express/gitignore
deleted file mode 100644
index 7d5e30fc2..000000000
--- a/templates/types/simple/express/gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# local env files
-.env
-node_modules/
\ No newline at end of file
diff --git a/templates/types/simple/express/index.ts b/templates/types/simple/express/index.ts
deleted file mode 100644
index 150dbf598..000000000
--- a/templates/types/simple/express/index.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
-import cors from "cors";
-import "dotenv/config";
-import express, { Express, Request, Response } from "express";
-import { initObservability } from "./src/observability";
-import chatRouter from "./src/routes/chat.route";
-
-const app: Express = express();
-const port = parseInt(process.env.PORT || "8000");
-
-const env = process.env["NODE_ENV"];
-const isDevelopment = !env || env === "development";
-const prodCorsOrigin = process.env["PROD_CORS_ORIGIN"];
-
-initObservability();
-
-app.use(express.json());
-
-if (isDevelopment) {
-  console.warn("Running in development mode - allowing CORS for all origins");
-  app.use(cors());
-} else if (prodCorsOrigin) {
-  console.log(
-    `Running in production mode - allowing CORS for domain: ${prodCorsOrigin}`,
-  );
-  const corsOptions = {
-    origin: prodCorsOrigin, // Restrict to production domain
-  };
-  app.use(cors(corsOptions));
-} else {
-  console.warn("Production CORS origin not set, defaulting to no CORS.");
-}
-
-app.use(express.text());
-
-app.get("/", (req: Request, res: Response) => {
-  res.send("LlamaIndex Express Server");
-});
-
-app.use("/api/chat", chatRouter);
-
-app.listen(port, () => {
-  console.log(`⚡️[server]: Server is running at http://localhost:${port}`);
-});
diff --git a/templates/types/simple/express/package.json b/templates/types/simple/express/package.json
deleted file mode 100644
index eef99bac1..000000000
--- a/templates/types/simple/express/package.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "name": "llama-index-express",
-  "version": "1.0.0",
-  "main": "dist/index.js",
-  "type": "module",
-  "scripts": {
-    "build": "tsup index.ts --format esm --dts",
-    "start": "node dist/index.js",
-    "dev": "concurrently \"tsup index.ts --format esm --dts --watch\" \"nodemon -q dist/index.js\""
-  },
-  "dependencies": {
-    "cors": "^2.8.5",
-    "dotenv": "^16.3.1",
-    "express": "^4.18.2",
-    "llamaindex": "latest"
-  },
-  "devDependencies": {
-    "@types/cors": "^2.8.17",
-    "@types/express": "^4.17.21",
-    "@types/node": "^20.9.5",
-    "concurrently": "^8.2.2",
-    "eslint": "^8.54.0",
-    "nodemon": "^3.0.1",
-    "tsup": "^7.3.0",
-    "typescript": "^5.3.2"
-  }
-}
diff --git a/templates/types/simple/express/src/controllers/engine/chat.ts b/templates/types/simple/express/src/controllers/engine/chat.ts
deleted file mode 100644
index abb02e90c..000000000
--- a/templates/types/simple/express/src/controllers/engine/chat.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-import { LLM, SimpleChatEngine } from "llamaindex";
-
-export async function createChatEngine(llm: LLM) {
-  return new SimpleChatEngine({
-    llm,
-  });
-}
diff --git a/templates/types/simple/express/src/observability/index.ts b/templates/types/simple/express/src/observability/index.ts
deleted file mode 100644
index 2e4ce2b18..000000000
--- a/templates/types/simple/express/src/observability/index.ts
+++ /dev/null
@@ -1 +0,0 @@
-export const initObservability = () => {};
diff --git a/templates/types/simple/express/src/routes/chat.route.ts b/templates/types/simple/express/src/routes/chat.route.ts
deleted file mode 100644
index bdfeb0853..000000000
--- a/templates/types/simple/express/src/routes/chat.route.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import express from "express";
-import { chat } from "../controllers/chat.controller";
-
-const llmRouter = express.Router();
-
-llmRouter.route("/").post(chat);
-
-export default llmRouter;
diff --git a/templates/types/simple/express/tsconfig.json b/templates/types/simple/express/tsconfig.json
deleted file mode 100644
index bc819cab4..000000000
--- a/templates/types/simple/express/tsconfig.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": true,
-    "skipLibCheck": true,
-    "moduleResolution": "node"
-  }
-}
diff --git a/templates/types/simple/fastapi/app/__init__.py b/templates/types/simple/fastapi/app/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/templates/types/simple/fastapi/app/api/__init__.py b/templates/types/simple/fastapi/app/api/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/templates/types/simple/fastapi/app/api/routers/__init__.py b/templates/types/simple/fastapi/app/api/routers/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/templates/types/simple/fastapi/app/api/routers/chat.py b/templates/types/simple/fastapi/app/api/routers/chat.py
deleted file mode 100644
index 8405f2ac5..000000000
--- a/templates/types/simple/fastapi/app/api/routers/chat.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import List
-from pydantic import BaseModel
-from fastapi import APIRouter, Depends, HTTPException, status
-from llama_index.core.chat_engine.types import BaseChatEngine
-from llama_index.core.llms import ChatMessage, MessageRole
-from app.engine import get_chat_engine
-
-chat_router = r = APIRouter()
-
-
-class _Message(BaseModel):
-    role: MessageRole
-    content: str
-
-
-class _ChatData(BaseModel):
-    messages: List[_Message]
-
-
-class _Result(BaseModel):
-    result: _Message
-
-
-@r.post("")
-async def chat(
-    data: _ChatData,
-    chat_engine: BaseChatEngine = Depends(get_chat_engine),
-) -> _Result:
-    # check preconditions and get last message
-    if len(data.messages) == 0:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="No messages provided",
-        )
-    lastMessage = data.messages.pop()
-    if lastMessage.role != MessageRole.USER:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Last message must be from user",
-        )
-    # convert messages coming from the request to type ChatMessage
-    messages = [
-        ChatMessage(
-            role=m.role,
-            content=m.content,
-        )
-        for m in data.messages
-    ]
-
-    # query chat engine
-    response = await chat_engine.achat(lastMessage.content, messages)
-    return _Result(
-        result=_Message(role=MessageRole.ASSISTANT, content=response.response)
-    )
diff --git a/templates/types/simple/fastapi/app/engine/__init__.py b/templates/types/simple/fastapi/app/engine/__init__.py
deleted file mode 100644
index fd8bb96a8..000000000
--- a/templates/types/simple/fastapi/app/engine/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from llama_index.core.chat_engine import SimpleChatEngine
-
-
-def get_chat_engine():
-    return SimpleChatEngine.from_defaults()
diff --git a/templates/types/simple/fastapi/app/settings.py b/templates/types/simple/fastapi/app/settings.py
deleted file mode 100644
index 3f2c5e078..000000000
--- a/templates/types/simple/fastapi/app/settings.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import os
-from typing import Dict
-from llama_index.core.settings import Settings
-from llama_index.llms.openai import OpenAI
-from llama_index.embeddings.openai import OpenAIEmbedding
-
-
-def llm_config_from_env() -> Dict:
-    from llama_index.core.constants import DEFAULT_TEMPERATURE
-
-    model = os.getenv("MODEL")
-    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
-    max_tokens = os.getenv("LLM_MAX_TOKENS")
-
-    config = {
-        "model": model,
-        "temperature": float(temperature),
-        "max_tokens": int(max_tokens) if max_tokens is not None else None,
-    }
-    return config
-
-
-def embedding_config_from_env() -> Dict:
-    model = os.getenv("EMBEDDING_MODEL")
-    dimension = os.getenv("EMBEDDING_DIM")
-
-    config = {
-        "model": model,
-        "dimension": int(dimension) if dimension is not None else None,
-    }
-    return config
-
-
-def init_settings():
-    llm_configs = llm_config_from_env()
-    embedding_configs = embedding_config_from_env()
-
-    Settings.llm = OpenAI(**llm_configs)
-    Settings.embed_model = OpenAIEmbedding(**embedding_configs)
-    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
-    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
diff --git a/templates/types/simple/fastapi/gitignore b/templates/types/simple/fastapi/gitignore
deleted file mode 100644
index a6ad564cd..000000000
--- a/templates/types/simple/fastapi/gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-__pycache__
-storage
-.env
diff --git a/templates/types/simple/fastapi/main.py b/templates/types/simple/fastapi/main.py
deleted file mode 100644
index 41721a8f2..000000000
--- a/templates/types/simple/fastapi/main.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from dotenv import load_dotenv
-
-load_dotenv()
-
-import logging
-import os
-import uvicorn
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from app.api.routers.chat import chat_router
-from app.settings import init_settings
-
-app = FastAPI()
-
-init_settings()
-
-environment = os.getenv("ENVIRONMENT", "dev")  # Default to 'development' if not set
-
-
-if environment == "dev":
-    logger = logging.getLogger("uvicorn")
-    logger.warning("Running in development mode - allowing CORS for all origins")
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=["*"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-
-app.include_router(chat_router, prefix="/api/chat")
-
-
-if __name__ == "__main__":
-    app_host = os.getenv("APP_HOST", "0.0.0.0")
-    app_port = int(os.getenv("APP_PORT", "8000"))
-    reload = True if environment == "dev" else False
-
-    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload)
diff --git a/templates/types/simple/fastapi/pyproject.toml b/templates/types/simple/fastapi/pyproject.toml
deleted file mode 100644
index 5dae9d818..000000000
--- a/templates/types/simple/fastapi/pyproject.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[tool.poetry]
-name = "app"
-version = "0.1.0"
-description = ""
-authors = ["Marcus Schiesser <mail@marcusschiesser.de>"]
-readme = "README.md"
-
-[tool.poetry.dependencies]
-python = "^3.11,<3.12"
-fastapi = "^0.109.1"
-uvicorn = { extras = ["standard"], version = "^0.23.2" }
-python-dotenv = "^1.0.0"
-llama-index = "0.10.15"
-llama-index-core = "0.10.15"
-llama-index-agent-openai = "0.1.5"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/templates/types/simple/fastapi/tests/__init__.py b/templates/types/simple/fastapi/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/templates/types/streaming/express/README-template.md b/templates/types/streaming/express/README-template.md
index 452069d2c..62a5f2485 100644
--- a/templates/types/streaming/express/README-template.md
+++ b/templates/types/streaming/express/README-template.md
@@ -20,15 +20,29 @@ Third, run the development server:
 npm run dev
 ```
 
-Then call the express API endpoint `/api/chat` to see the result:
+The example provides two different API endpoints:
+
+1. `/api/chat` - a streaming chat endpoint (found in `src/controllers/chat.controller.ts`)
+2. `/api/chat/request` - a non-streaming chat endpoint (found in `src/controllers/chat-request.controller.ts`)
+
+You can test the streaming endpoint with the following curl request:
 
 ```
 curl --location 'localhost:8000/api/chat' \
---header 'Content-Type: text/plain' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
+```
+
+And for the non-streaming endpoint run:
+
+```
+curl --location 'localhost:8000/api/chat/request' \
+--header 'Content-Type: application/json' \
 --data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
 ```
 
-You can start editing the API by modifying `src/controllers/chat.controller.ts`. The endpoint auto-updates as you save the file.
+You can start editing the API by modifying `src/controllers/chat.controller.ts` or `src/controllers/chat-request.controller.ts`. The endpoint auto-updates as you save the file.
+You can delete the endpoint that you're not using.
 
 ## Production
 
diff --git a/templates/types/simple/express/src/controllers/chat.controller.ts b/templates/types/streaming/express/src/controllers/chat-request.controller.ts
similarity index 94%
rename from templates/types/simple/express/src/controllers/chat.controller.ts
rename to templates/types/streaming/express/src/controllers/chat-request.controller.ts
index 5fdb88255..b81b59997 100644
--- a/templates/types/simple/express/src/controllers/chat.controller.ts
+++ b/templates/types/streaming/express/src/controllers/chat-request.controller.ts
@@ -21,7 +21,7 @@ const convertMessageContent = (
   ];
 };
 
-export const chat = async (req: Request, res: Response) => {
+export const chatRequest = async (req: Request, res: Response) => {
   try {
     const { messages, data }: { messages: ChatMessage[]; data: any } = req.body;
     const userMessage = messages.pop();
@@ -48,7 +48,7 @@ export const chat = async (req: Request, res: Response) => {
     // Calling LlamaIndex's ChatEngine to get a response
     const response = await chatEngine.chat({
       message: userMessageContent,
-      messages,
+      chatHistory: messages,
     });
     const result: ChatMessage = {
       role: "assistant",
diff --git a/templates/types/streaming/express/src/routes/chat.route.ts b/templates/types/streaming/express/src/routes/chat.route.ts
index bdfeb0853..f935980c1 100644
--- a/templates/types/streaming/express/src/routes/chat.route.ts
+++ b/templates/types/streaming/express/src/routes/chat.route.ts
@@ -1,8 +1,10 @@
 import express from "express";
+import { chatRequest } from "../controllers/chat-request.controller";
 import { chat } from "../controllers/chat.controller";
 
 const llmRouter = express.Router();
 
 llmRouter.route("/").post(chat);
+llmRouter.route("/request").post(chatRequest);
 
 export default llmRouter;
diff --git a/templates/types/streaming/fastapi/README-template.md b/templates/types/streaming/fastapi/README-template.md
index 2bea76831..ca7e0b339 100644
--- a/templates/types/streaming/fastapi/README-template.md
+++ b/templates/types/streaming/fastapi/README-template.md
@@ -11,7 +11,7 @@ poetry install
 poetry shell
 ```
 
-By default, we use the OpenAI LLM (though you can customize, see `app/settings.py`). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
+By default, we use the OpenAI LLM (though you can customize, see `app/settings.py`). As a result, you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
 
 Example `.env` file:
 
@@ -33,7 +33,12 @@ Third, run the development server:
 python main.py
 ```
 
-Then call the API endpoint `/api/chat` to see the result:
+The example provides two different API endpoints:
+
+1. `/api/chat` - a streaming chat endpoint
+2. `/api/chat/request` - a non-streaming chat endpoint
+
+You can test the streaming endpoint with the following curl request:
 
 ```
 curl --location 'localhost:8000/api/chat' \
@@ -41,7 +46,15 @@ curl --location 'localhost:8000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
 ```
 
-You can start editing the API by modifying `app/api/routers/chat.py`. The endpoint auto-updates as you save the file.
+And for the non-streaming endpoint run:
+
+```
+curl --location 'localhost:8000/api/chat/request' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
+```
+
+You can start editing the API endpoints by modifying `app/api/routers/chat.py`. The endpoints auto-update as you save the file. You can delete the endpoint you're not using.
 
 Open [http://localhost:8000/docs](http://localhost:8000/docs) with your browser to see the Swagger UI of the API.
 
diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py
index 278a9a753..2ef7ff1f7 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat.py
@@ -5,6 +5,7 @@
 from llama_index.core.chat_engine.types import BaseChatEngine
 from llama_index.core.llms import ChatMessage, MessageRole
 from app.engine import get_chat_engine
+from typing import List, Tuple
 
 chat_router = r = APIRouter()
 
@@ -18,20 +19,19 @@ class _ChatData(BaseModel):
     messages: List[_Message]
 
 
-@r.post("")
-async def chat(
-    request: Request,
-    data: _ChatData,
-    chat_engine: BaseChatEngine = Depends(get_chat_engine),
-):
+class _Result(BaseModel):
+    result: _Message
+
+
+async def parse_chat_data(data: _ChatData) -> Tuple[str, List[ChatMessage]]:
     # check preconditions and get last message
     if len(data.messages) == 0:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail="No messages provided",
         )
-    lastMessage = data.messages.pop()
-    if lastMessage.role != MessageRole.USER:
+    last_message = data.messages.pop()
+    if last_message.role != MessageRole.USER:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail="Last message must be from user",
@@ -44,16 +44,38 @@ async def chat(
         )
         for m in data.messages
     ]
+    return last_message.content, messages
 
-    # query chat engine
-    response = await chat_engine.astream_chat(lastMessage.content, messages)
 
-    # stream response
+# streaming endpoint - delete if not needed
+@r.post("")
+async def chat(
+    request: Request,
+    data: _ChatData,
+    chat_engine: BaseChatEngine = Depends(get_chat_engine),
+):
+    last_message_content, messages = await parse_chat_data(data)
+
+    response = await chat_engine.astream_chat(last_message_content, messages)
+
     async def event_generator():
         async for token in response.async_response_gen():
-            # If client closes connection, stop sending events
             if await request.is_disconnected():
                 break
             yield token
 
     return StreamingResponse(event_generator(), media_type="text/plain")
+
+
+# non-streaming endpoint - delete if not needed
+@r.post("/request")
+async def chat_request(
+    data: _ChatData,
+    chat_engine: BaseChatEngine = Depends(get_chat_engine),
+) -> _Result:
+    last_message_content, messages = await parse_chat_data(data)
+
+    response = await chat_engine.achat(last_message_content, messages)
+    return _Result(
+        result=_Message(role=MessageRole.ASSISTANT, content=response.response)
+    )