diff --git a/agents-api/agents_api/autogen/Chat.py b/agents-api/agents_api/autogen/Chat.py
index 1f4f5a67a..94832c4cb 100644
--- a/agents-api/agents_api/autogen/Chat.py
+++ b/agents-api/agents_api/autogen/Chat.py
@@ -10,7 +10,7 @@
 
 from .Common import LogitBias
 from .Docs import DocReference
-from .Entries import ChatMLMessage, InputChatMLMessage
+from .Entries import InputChatMLMessage
 from .Tools import FunctionTool, NamedToolChoice
 
 
@@ -23,7 +23,7 @@ class BaseChatOutput(BaseModel):
     """
     The reason the model stopped generating tokens
     """
-    logprobs: Annotated[LogProbResponse | None, Field(...)]
+    logprobs: LogProbResponse | None = None
     """
     The log probabilities of tokens
     """
@@ -33,7 +33,7 @@ class BaseChatResponse(BaseModel):
     model_config = ConfigDict(
         populate_by_name=True,
     )
-    usage: Annotated[CompetionUsage | None, Field(...)]
+    usage: CompetionUsage | None = None
     """
     Usage statistics for the completion request
     """
@@ -61,7 +61,7 @@ class BaseTokenLogProb(BaseModel):
     """
     The log probability of the token
     """
-    bytes: Annotated[list[int] | None, Field(...)]
+    bytes: list[int] | None = None
 
 
 class ChatInputData(BaseModel):
@@ -90,7 +90,7 @@ class ChatOutputChunk(BaseChatOutput):
     model_config = ConfigDict(
         populate_by_name=True,
     )
-    delta: ChatMLMessage
+    delta: InputChatMLMessage
     """
     The message generated by the model
     """
@@ -166,7 +166,7 @@ class MultipleChatOutput(BaseChatOutput):
     model_config = ConfigDict(
         populate_by_name=True,
     )
-    messages: list[ChatMLMessage]
+    messages: list[InputChatMLMessage]
 
 
 class OpenAISettings(BaseModel):
@@ -199,7 +199,7 @@ class SingleChatOutput(BaseChatOutput):
     model_config = ConfigDict(
         populate_by_name=True,
     )
-    message: ChatMLMessage
+    message: InputChatMLMessage
 
 
 class TokenLogProb(BaseTokenLogProb):
diff --git a/agents-api/agents_api/autogen/Entries.py b/agents-api/agents_api/autogen/Entries.py
index cfe3692f2..b9921daa4 100644
--- a/agents-api/agents_api/autogen/Entries.py
+++ b/agents-api/agents_api/autogen/Entries.py
@@ -17,7 +17,7 @@ class BaseEntry(BaseModel):
     )
     role: Literal[
         "user",
-        "agent",
+        "assistant",
         "system",
         "function",
         "function_response",
@@ -67,43 +67,6 @@ class ChatMLImageContentPart(BaseModel):
     """
 
 
-class ChatMLMessage(BaseModel):
-    model_config = ConfigDict(
-        populate_by_name=True,
-    )
-    role: Literal[
-        "user",
-        "agent",
-        "system",
-        "function",
-        "function_response",
-        "function_call",
-        "auto",
-    ]
-    """
-    The role of the message
-    """
-    content: str | list[str] | list[ChatMLTextContentPart | ChatMLImageContentPart]
-    """
-    The content parts of the message
-    """
-    name: str | None = None
-    """
-    Name
-    """
-    tool_calls: Annotated[
-        list[ChosenToolCall], Field([], json_schema_extra={"readOnly": True})
-    ]
-    """
-    Tool calls generated by the model.
-    """
-    created_at: Annotated[AwareDatetime, Field(json_schema_extra={"readOnly": True})]
-    """
-    When this resource was created as UTC date-time
-    """
-    id: Annotated[UUID, Field(json_schema_extra={"readOnly": True})]
-
-
 class ChatMLTextContentPart(BaseModel):
     model_config = ConfigDict(
         populate_by_name=True,
@@ -159,7 +122,7 @@ class InputChatMLMessage(BaseModel):
     )
     role: Literal[
         "user",
-        "agent",
+        "assistant",
         "system",
         "function",
         "function_response",
diff --git a/agents-api/agents_api/common/protocol/sessions.py b/agents-api/agents_api/common/protocol/sessions.py
index 73c71b365..80f9049d4 100644
--- a/agents-api/agents_api/common/protocol/sessions.py
+++ b/agents-api/agents_api/common/protocol/sessions.py
@@ -54,7 +54,7 @@ def get_active_agent(self) -> Agent:
         """
         Get the active agent from the session data.
         """
-        requested_agent: UUID | None = self.settings.agent
+        requested_agent: UUID | None = self.settings and self.settings.agent
 
         if requested_agent:
             assert requested_agent in [agent.id for agent in self.agents], (
@@ -67,7 +67,7 @@ def get_active_agent(self) -> Agent:
         return self.agents[0]
 
     def merge_settings(self, chat_input: ChatInput) -> ChatSettings:
-        request_settings = ChatSettings.model_validate(chat_input)
+        request_settings = chat_input.model_dump(exclude_unset=True)
         active_agent = self.get_active_agent()
         default_settings = active_agent.default_settings
 
@@ -75,7 +75,7 @@ def merge_settings(self, chat_input: ChatInput) -> ChatSettings:
             **{
                 "model": active_agent.model,
                 **default_settings.model_dump(),
-                **request_settings.model_dump(exclude_unset=True),
+                **request_settings,
             }
         )
 
diff --git a/agents-api/agents_api/common/utils/template.py b/agents-api/agents_api/common/utils/template.py
index eabf6f307..d806ddb6d 100644
--- a/agents-api/agents_api/common/utils/template.py
+++ b/agents-api/agents_api/common/utils/template.py
@@ -40,6 +40,28 @@ async def render_template_string(
     return rendered
 
 
+async def render_template_chatml(
+    messages: list[dict], variables: dict, check: bool = False
+) -> list[dict]:
+    # Parse template
+    # FIXME: should template_strings contain a list of ChatMLTextContentPart? Should we handle it somehow?
+    templates = [jinja_env.from_string(msg["content"]) for msg in messages]
+
+    # If check is required, get required vars from template and validate variables
+    if check:
+        for template in templates:
+            schema = to_json_schema(infer(template))
+            validate(instance=variables, schema=schema)
+
+    # Render
+    rendered = [
+        ({**msg, "content": await template.render_async(**variables)})
+        for template, msg in zip(templates, messages)
+    ]
+
+    return rendered
+
+
 async def render_template_parts(
     template_strings: list[dict], variables: dict, check: bool = False
 ) -> list[dict]:
@@ -73,7 +95,7 @@ async def render_template_parts(
 
 
 async def render_template(
-    template_string: str | list[dict],
+    input: str | list[dict],
     variables: dict,
     check: bool = False,
     skip_vars: list[str] | None = None,
@@ -83,8 +105,15 @@ async def render_template(
         for name, val in variables.items()
         if not (skip_vars is not None and isinstance(name, str) and name in skip_vars)
     }
-    if isinstance(template_string, str):
-        return await render_template_string(template_string, variables, check)
 
-    elif isinstance(template_string, list):
-        return await render_template_parts(template_string, variables, check)
+    match input:
+        case str():
+            future = render_template_string(input, variables, check)
+
+        case [{"content": str()}, *_]:
+            future = render_template_chatml(input, variables, check)
+
+        case _:
+            future = render_template_parts(input, variables, check)
+
+    return await future
diff --git a/agents-api/agents_api/models/docs/search_docs_hybrid.py b/agents-api/agents_api/models/docs/search_docs_hybrid.py
index 52ae76277..0a9cd2815 100644
--- a/agents-api/agents_api/models/docs/search_docs_hybrid.py
+++ b/agents-api/agents_api/models/docs/search_docs_hybrid.py
@@ -18,6 +18,9 @@ def dbsf_normalize(scores: list[float]) -> list[float]:
     Scores scaled using minmax scaler with our custom feature range
     (extremes indicated as 3 standard deviations from the mean)
     """
+    if len(scores) < 2:
+        return scores
+
     sd = stdev(scores)
     if sd == 0:
         return scores
diff --git a/agents-api/agents_api/routers/docs/__init__.py b/agents-api/agents_api/routers/docs/__init__.py
index 0d9fe8b5c..10195be77 100644
--- a/agents-api/agents_api/routers/docs/__init__.py
+++ b/agents-api/agents_api/routers/docs/__init__.py
@@ -1,6 +1,7 @@
 # ruff: noqa: F401
 from .create_doc import create_agent_doc, create_user_doc
 from .delete_doc import delete_agent_doc, delete_user_doc
+from .embed import embed
 from .get_doc import get_doc
 from .list_docs import list_agent_docs, list_user_docs
 from .router import router
diff --git a/agents-api/agents_api/routers/docs/embed.py b/agents-api/agents_api/routers/docs/embed.py
new file mode 100644
index 000000000..1de99bfce
--- /dev/null
+++ b/agents-api/agents_api/routers/docs/embed.py
@@ -0,0 +1,28 @@
+from typing import Annotated
+
+from fastapi import Depends
+from pydantic import UUID4
+
+import agents_api.clients.embed as embedder
+
+from ...autogen.openapi_model import (
+    EmbedQueryRequest,
+    EmbedQueryResponse,
+)
+from ...dependencies.developer_id import get_developer_id
+from .router import router
+
+
+@router.post("/embed", tags=["docs"])
+async def embed(
+    x_developer_id: Annotated[UUID4, Depends(get_developer_id)],
+    data: EmbedQueryRequest,
+) -> EmbedQueryResponse:
+    text_to_embed: str | list[str] = data.text
+    text_to_embed: list[str] = (
+        [text_to_embed] if isinstance(text_to_embed, str) else text_to_embed
+    )
+
+    vectors = await embedder.embed(inputs=text_to_embed)
+
+    return EmbedQueryResponse(vectors=vectors)
diff --git a/agents-api/agents_api/routers/sessions/chat.py b/agents-api/agents_api/routers/sessions/chat.py
index 225e25163..f0023cb93 100644
--- a/agents-api/agents_api/routers/sessions/chat.py
+++ b/agents-api/agents_api/routers/sessions/chat.py
@@ -7,14 +7,16 @@
 from ...autogen.openapi_model import (
     ChatInput,
     ChatResponse,
+    ChunkChatResponse,
     CreateEntryRequest,
     DocReference,
     History,
+    MessageChatResponse,
 )
-from ...clients.embed import embed
-from ...clients.litellm import acompletion
+from ...clients import embed, litellm
 from ...common.protocol.developers import Developer
 from ...common.protocol.sessions import ChatContext
+from ...common.utils.datetime import utcnow
 from ...common.utils.template import render_template
 from ...dependencies.developer_id import get_developer_data
 from ...models.docs.search_docs_hybrid import search_docs_hybrid
@@ -24,28 +26,14 @@
 from .router import router
 
 
-@router.post(
-    "/sessions/{session_id}/chat",
-    status_code=HTTP_201_CREATED,
-    tags=["sessions", "chat"],
-)
-async def chat(
-    developer: Annotated[Developer, Depends(get_developer_data)],
+async def get_messages(
+    *,
+    developer: Developer,
     session_id: UUID,
-    data: ChatInput,
-    background_tasks: BackgroundTasks,
-) -> ChatResponse:
-    # First get the chat context
-    chat_context: ChatContext = prepare_chat_context(
-        developer_id=developer.id,
-        session_id=session_id,
-    )
-    assert isinstance(chat_context, ChatContext)
-
-    # Merge the settings and prepare environment
-    chat_context.merge_settings(data)
-    settings: dict = chat_context.settings.model_dump()
-    env: dict = chat_context.get_chat_environment()
+    new_raw_messages: list[dict],
+    chat_context: ChatContext,
+):
+    assert len(new_raw_messages) > 0
 
     # Get the session history
     history: History = get_history(
@@ -62,10 +50,8 @@ async def chat(
         if entry.id not in {r.head for r in relations}
     ]
 
-    new_raw_messages = [msg.model_dump() for msg in data.messages]
-
     # Search matching docs
-    [query_embedding, *_] = await embed(
+    [query_embedding, *_] = await embed.embed(
         inputs=[
             f"{msg.get('name') or msg['role']}: {msg['content']}"
             for msg in new_raw_messages
@@ -82,13 +68,46 @@ async def chat(
         query_embedding=query_embedding,
     )
 
+    return past_messages, doc_references
+
+
+@router.post(
+    "/sessions/{session_id}/chat",
+    status_code=HTTP_201_CREATED,
+    tags=["sessions", "chat"],
+)
+async def chat(
+    developer: Annotated[Developer, Depends(get_developer_data)],
+    session_id: UUID,
+    data: ChatInput,
+    background_tasks: BackgroundTasks,
+) -> ChatResponse:
+    # First get the chat context
+    chat_context: ChatContext = prepare_chat_context(
+        developer_id=developer.id,
+        session_id=session_id,
+    )
+
+    # Merge the settings and prepare environment
+    chat_context.merge_settings(data)
+    settings: dict = chat_context.settings.model_dump()
+    env: dict = chat_context.get_chat_environment()
+    new_raw_messages = [msg.model_dump() for msg in data.messages]
+
     # Render the messages
+    past_messages, doc_references = await get_messages(
+        developer=developer,
+        session_id=session_id,
+        new_raw_messages=new_raw_messages,
+        chat_context=chat_context,
+    )
+
     env["docs"] = doc_references
-    new_messages = render_template(new_raw_messages, variables=env)
+    new_messages = await render_template(new_raw_messages, variables=env)
     messages = past_messages + new_messages
 
     # Get the response from the model
-    model_response = await acompletion(
+    model_response = await litellm.acompletion(
         messages=messages,
         **settings,
         user=str(developer.id),
@@ -96,25 +115,27 @@ async def chat(
     )
 
     # Save the input and the response to the session history
-    new_entries = [CreateEntryRequest(**msg) for msg in new_messages]
-    background_tasks.add_task(
-        create_entries,
-        developer_id=developer.id,
-        session_id=session_id,
-        data=new_entries,
-        mark_session_as_updated=True,
-    )
+    if data.save:
+        new_entries = [
+            CreateEntryRequest(**msg, source="api_request") for msg in new_messages
+        ]
+        background_tasks.add_task(
+            create_entries,
+            developer_id=developer.id,
+            session_id=session_id,
+            data=new_entries,
+            mark_session_as_updated=True,
+        )
 
     # Return the response
-    response_json = model_response.model_dump()
-    response_json.pop("id", None)
-
-    chat_response: ChatResponse = ChatResponse(
-        **response_json,
+    chat_response_class = ChunkChatResponse if data.stream else MessageChatResponse
+    chat_response: ChatResponse = chat_response_class(
         id=uuid4(),
-        created_at=model_response.created,
+        created_at=utcnow(),
         jobs=[],
         docs=doc_references,
+        usage=model_response.usage.model_dump(),
+        choices=[choice.model_dump() for choice in model_response.choices],
     )
 
     return chat_response
diff --git a/agents-api/poetry.lock b/agents-api/poetry.lock
index 4892624c7..9e0dcabdf 100644
--- a/agents-api/poetry.lock
+++ b/agents-api/poetry.lock
@@ -2139,19 +2139,19 @@ files = [
 
 [[package]]
 name = "langchain"
-version = "0.2.12"
+version = "0.2.13"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langchain-0.2.12-py3-none-any.whl", hash = "sha256:565d2f5df1c06815d1c684400218ec4ae5e1027887aad343226fad846c54e726"},
-    {file = "langchain-0.2.12.tar.gz", hash = "sha256:fe7bd409c133017446fec54c38a5e7cb14f74e020090d7b5065374badf71e6d1"},
+    {file = "langchain-0.2.13-py3-none-any.whl", hash = "sha256:80f21e48cdada424dd2af9bbf42234fe095744cf181b31eeb63d1da7479e2783"},
+    {file = "langchain-0.2.13.tar.gz", hash = "sha256:947e96ac3153a46aa6a0d8207e5f8b6794084c397f60a01bbf4bba78e6838fee"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
-langchain-core = ">=0.2.27,<0.3.0"
+langchain-core = ">=0.2.30,<0.3.0"
 langchain-text-splitters = ">=0.2.0,<0.3.0"
 langsmith = ">=0.1.17,<0.2.0"
 numpy = {version = ">=1,<2", markers = "python_version < \"3.12\""}
@@ -2163,20 +2163,20 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
 
 [[package]]
 name = "langchain-community"
-version = "0.2.11"
+version = "0.2.12"
 description = "Community contributed LangChain integrations."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langchain_community-0.2.11-py3-none-any.whl", hash = "sha256:465c03ba1603975d141533424185e09546ecf09e379c93aee2671bdc9b325cda"},
-    {file = "langchain_community-0.2.11.tar.gz", hash = "sha256:ede261ff8202f1433f004ee90baf89f371cee37cb1abfc16dd0f8392db10b23e"},
+    {file = "langchain_community-0.2.12-py3-none-any.whl", hash = "sha256:50e74473dd2309bdef561760afbbf0c5ea17ed91fc4dfa0d52279dd16d6d34e0"},
+    {file = "langchain_community-0.2.12.tar.gz", hash = "sha256:d671cfc6a4f3b65f49a2e59ab420d0164f109d0a56fc4b4996518205c63b8c7e"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 dataclasses-json = ">=0.5.7,<0.7"
-langchain = ">=0.2.12,<0.3.0"
-langchain-core = ">=0.2.27,<0.3.0"
+langchain = ">=0.2.13,<0.3.0"
+langchain-core = ">=0.2.30,<0.3.0"
 langsmith = ">=0.1.0,<0.2.0"
 numpy = {version = ">=1,<2", markers = "python_version < \"3.12\""}
 PyYAML = ">=5.3"
@@ -2186,13 +2186,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
 
 [[package]]
 name = "langchain-core"
-version = "0.2.29"
+version = "0.2.30"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langchain_core-0.2.29-py3-none-any.whl", hash = "sha256:846c04a3bb72e409a9b928e0eb3ea1762e1473f2c4fb6df2596fbd7b3ab75973"},
-    {file = "langchain_core-0.2.29.tar.gz", hash = "sha256:491324745a7afee5a7b285c3904edd9dd0c6efa7daf26b92fec6e84a2d2f5d10"},
+    {file = "langchain_core-0.2.30-py3-none-any.whl", hash = "sha256:ea7eccb9566dd51b2b74bd292c4239d843a77cdba8ffae2b5edf7000d70d4194"},
+    {file = "langchain_core-0.2.30.tar.gz", hash = "sha256:552ec586698140062cd299a83bad7e308f925b496d306b62529579c6fb122f7a"},
 ]
 
 [package.dependencies]
@@ -2904,13 +2904,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.40.3"
+version = "1.40.6"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.40.3-py3-none-any.whl", hash = "sha256:09396cb6e2e15c921a5d872bf92841a60a9425da10dcd962b45fe7c4f48f8395"},
-    {file = "openai-1.40.3.tar.gz", hash = "sha256:f2ffe907618240938c59d7ccc67dd01dc8c50be203c0077240db6758d2f02480"},
+    {file = "openai-1.40.6-py3-none-any.whl", hash = "sha256:b36372124a779381a420a34dd96f762baa748b6bdfaf83a6b9f2745f72ccc1c5"},
+    {file = "openai-1.40.6.tar.gz", hash = "sha256:2239232bcb7f4bd4ce8e02544b5769618582411cf399816d96686d1b6c1e5c8d"},
 ]
 
 [package.dependencies]
diff --git a/agents-api/tests/fixtures.py b/agents-api/tests/fixtures.py
index f2b76e3e3..fafb351f0 100644
--- a/agents-api/tests/fixtures.py
+++ b/agents-api/tests/fixtures.py
@@ -1,7 +1,9 @@
+from unittest.mock import patch
 from uuid import uuid4
 
 from cozo_migrate.api import apply, init
 from fastapi.testclient import TestClient
+from litellm.types.utils import Choices, ModelResponse
 from pycozo import Client as CozoClient
 from temporalio.client import WorkflowHandle
 from ward import fixture
@@ -18,6 +20,7 @@
 from agents_api.env import api_key, api_key_header_name
 from agents_api.models.agent.create_agent import create_agent
 from agents_api.models.agent.delete_agent import delete_agent
+from agents_api.models.developer.get_developer import get_developer
 from agents_api.models.docs.create_doc import create_doc
 from agents_api.models.docs.delete_doc import delete_doc
 from agents_api.models.execution.create_execution import create_execution
@@ -31,6 +34,8 @@
 from agents_api.models.user.delete_user import delete_user
 from agents_api.web import app
 
+EMBEDDING_SIZE: int = 1024
+
 
 @fixture(scope="global")
 def cozo_client(migrations_dir: str = "./migrations"):
@@ -65,6 +70,32 @@ def test_developer_id(cozo_client=cozo_client):
     )
 
 
+@fixture(scope="global")
+def test_developer(cozo_client=cozo_client, developer_id=test_developer_id):
+    return get_developer(
+        developer_id=developer_id,
+        client=cozo_client,
+    )
+
+
+@fixture(scope="global")
+def patch_embed_acompletion():
+    mock_model_response = ModelResponse(
+        id="fake_id",
+        choices=[Choices(message={"role": "assistant", "content": "Hello, world!"})],
+        created=0,
+        object="text_completion",
+    )
+
+    with patch("agents_api.clients.embed.embed") as embed, patch(
+        "agents_api.clients.litellm.acompletion"
+    ) as acompletion:
+        embed.return_value = [[1.0] * EMBEDDING_SIZE]
+        acompletion.return_value = mock_model_response
+
+        yield embed, acompletion
+
+
 @fixture(scope="global")
 def test_agent(cozo_client=cozo_client, developer_id=test_developer_id):
     agent = create_agent(
@@ -232,10 +263,12 @@ def test_execution(
 
     yield execution
 
-    client.run(f"""
+    client.run(
+        f"""
     ?[execution_id] <- ["{str(execution.id)}"]
     :delete executions {{ execution_id  }}
-    """)
+    """
+    )
 
 
 @fixture(scope="global")
diff --git a/agents-api/tests/test_agent_queries.py b/agents-api/tests/test_agent_queries.py
index b6d69c287..f074a768a 100644
--- a/agents-api/tests/test_agent_queries.py
+++ b/agents-api/tests/test_agent_queries.py
@@ -108,6 +108,7 @@ def _(client=cozo_client, developer_id=test_developer_id, agent=test_agent):
         data=UpdateAgentRequest(
             name="updated agent",
             about="updated agent about",
+            model="gpt-4o",
             default_settings={"temperature": 1.0},
             metadata={"hello": "world"},
         ),
diff --git a/agents-api/tests/test_agent_routes.py b/agents-api/tests/test_agent_routes.py
index 53951a855..353d6ab95 100644
--- a/agents-api/tests/test_agent_routes.py
+++ b/agents-api/tests/test_agent_routes.py
@@ -139,6 +139,7 @@ def _(make_request=make_request, agent=test_agent):
         name="updated agent",
         about="updated agent about",
         default_settings={"temperature": 1.0},
+        model="gpt-4o",
         metadata={"hello": "world"},
     )
 
diff --git a/agents-api/tests/test_chat_routes.py b/agents-api/tests/test_chat_routes.py
new file mode 100644
index 000000000..55d94b2a0
--- /dev/null
+++ b/agents-api/tests/test_chat_routes.py
@@ -0,0 +1,97 @@
+# Tests for session queries
+
+from ward import test
+
+from agents_api.autogen.Sessions import CreateSessionRequest
+from agents_api.clients import embed, litellm
+from agents_api.models.session.create_session import create_session
+from agents_api.models.session.prepare_chat_context import prepare_chat_context
+from agents_api.routers.sessions.chat import get_messages
+from tests.fixtures import (
+    cozo_client,
+    make_request,
+    patch_embed_acompletion,
+    test_agent,
+    test_developer,
+    test_developer_id,
+    test_session,
+    test_tool,
+    test_user,
+)
+
+
+@test("chat: check that patching libs works")
+async def _(
+    _=patch_embed_acompletion,
+):
+    assert (await litellm.acompletion(model="gpt-4o", messages=[])).id == "fake_id"
+    assert (await embed.embed())[0][0] == 1.0
+
+
+@test("chat: check that get_messages works")
+async def _(
+    developer=test_developer,
+    client=cozo_client,
+    developer_id=test_developer_id,
+    agent=test_agent,
+    session=test_session,
+    tool=test_tool,
+    user=test_user,
+    mocks=patch_embed_acompletion,
+):
+    (embed, _) = mocks
+
+    chat_context = prepare_chat_context(
+        developer_id=developer_id,
+        session_id=session.id,
+        client=client,
+    )
+
+    session_id = session.id
+
+    new_raw_messages = [{"role": "user", "content": "hello"}]
+
+    past_messages, doc_references = await get_messages(
+        developer=developer,
+        session_id=session_id,
+        new_raw_messages=new_raw_messages,
+        chat_context=chat_context,
+    )
+
+    assert isinstance(past_messages, list)
+    assert isinstance(doc_references, list)
+
+    # Check that embed was called at least once
+    embed.assert_called()
+
+
+@test("chat: check that chat route calls both mocks")
+async def _(
+    make_request=make_request,
+    developer_id=test_developer_id,
+    agent=test_agent,
+    mocks=patch_embed_acompletion,
+    client=cozo_client,
+):
+    session = create_session(
+        developer_id=developer_id,
+        data=CreateSessionRequest(
+            agent=agent.id,
+            situation="test session about",
+        ),
+        client=client,
+    )
+
+    (embed, acompletion) = mocks
+
+    response = make_request(
+        method="POST",
+        url=f"/sessions/{session.id}/chat",
+        json={"messages": [{"role": "user", "content": "hello"}]},
+    )
+
+    response.raise_for_status()
+
+    # Check that both mocks were called at least once
+    embed.assert_called()
+    acompletion.assert_called()
diff --git a/agents-api/tests/test_docs_queries.py b/agents-api/tests/test_docs_queries.py
index 5b9c28841..4743ea45d 100644
--- a/agents-api/tests/test_docs_queries.py
+++ b/agents-api/tests/test_docs_queries.py
@@ -10,6 +10,7 @@
 from agents_api.models.docs.list_docs import list_docs
 from agents_api.models.docs.search_docs_by_embedding import search_docs_by_embedding
 from tests.fixtures import (
+    EMBEDDING_SIZE,
     cozo_client,
     test_agent,
     test_developer_id,
@@ -17,8 +18,6 @@
     test_user,
 )
 
-EMBEDDING_SIZE: int = 1024
-
 
 @test("model: create docs")
 def _(
diff --git a/agents-api/tests/test_docs_routes.py b/agents-api/tests/test_docs_routes.py
index a2e699a47..05f095f49 100644
--- a/agents-api/tests/test_docs_routes.py
+++ b/agents-api/tests/test_docs_routes.py
@@ -1,6 +1,13 @@
 from ward import test
 
-from tests.fixtures import make_request, test_agent, test_doc, test_user, test_user_doc
+from tests.fixtures import (
+    make_request,
+    patch_embed_acompletion,
+    test_agent,
+    test_doc,
+    test_user,
+    test_user_doc,
+)
 
 
 @test("route: create user doc")
@@ -160,3 +167,22 @@ def _(make_request=make_request, user=test_user, doc=test_user_doc):
 
     # FIXME: This test is failing because the search is not returning the expected results
     # assert len(docs) >= 1
+
+
+@test("routes: embed route")
+async def _(
+    make_request=make_request,
+    mocks=patch_embed_acompletion,
+):
+    (embed, _) = mocks
+
+    response = make_request(
+        method="POST",
+        url="/embed",
+        json={"text": "blah blah"},
+    )
+
+    result = response.json()
+    assert "vectors" in result
+
+    embed.assert_called()
diff --git a/sdks/python/julep/api/__init__.py b/sdks/python/julep/api/__init__.py
index 61726cb46..c7371d4e1 100644
--- a/sdks/python/julep/api/__init__.py
+++ b/sdks/python/julep/api/__init__.py
@@ -75,11 +75,6 @@
     EntriesBaseEntryContentItemItem_Text,
     EntriesBaseEntrySource,
     EntriesChatMlImageContentPart,
-    EntriesChatMlMessage,
-    EntriesChatMlMessageContent,
-    EntriesChatMlMessageContentItem,
-    EntriesChatMlMessageContentItem_ImageUrl,
-    EntriesChatMlMessageContentItem_Text,
     EntriesChatMlRole,
     EntriesChatMlTextContentPart,
     EntriesEntry,
@@ -281,11 +276,6 @@
     "EntriesBaseEntryContentItemItem_Text",
     "EntriesBaseEntrySource",
     "EntriesChatMlImageContentPart",
-    "EntriesChatMlMessage",
-    "EntriesChatMlMessageContent",
-    "EntriesChatMlMessageContentItem",
-    "EntriesChatMlMessageContentItem_ImageUrl",
-    "EntriesChatMlMessageContentItem_Text",
     "EntriesChatMlRole",
     "EntriesChatMlTextContentPart",
     "EntriesEntry",
diff --git a/sdks/python/julep/api/types/__init__.py b/sdks/python/julep/api/types/__init__.py
index 5f1b1b76a..3df1542cd 100644
--- a/sdks/python/julep/api/types/__init__.py
+++ b/sdks/python/julep/api/types/__init__.py
@@ -84,13 +84,6 @@
 )
 from .entries_base_entry_source import EntriesBaseEntrySource
 from .entries_chat_ml_image_content_part import EntriesChatMlImageContentPart
-from .entries_chat_ml_message import EntriesChatMlMessage
-from .entries_chat_ml_message_content import EntriesChatMlMessageContent
-from .entries_chat_ml_message_content_item import (
-    EntriesChatMlMessageContentItem,
-    EntriesChatMlMessageContentItem_ImageUrl,
-    EntriesChatMlMessageContentItem_Text,
-)
 from .entries_chat_ml_role import EntriesChatMlRole
 from .entries_chat_ml_text_content_part import EntriesChatMlTextContentPart
 from .entries_entry import EntriesEntry
@@ -323,11 +316,6 @@
     "EntriesBaseEntryContentItemItem_Text",
     "EntriesBaseEntrySource",
     "EntriesChatMlImageContentPart",
-    "EntriesChatMlMessage",
-    "EntriesChatMlMessageContent",
-    "EntriesChatMlMessageContentItem",
-    "EntriesChatMlMessageContentItem_ImageUrl",
-    "EntriesChatMlMessageContentItem_Text",
     "EntriesChatMlRole",
     "EntriesChatMlTextContentPart",
     "EntriesEntry",
diff --git a/sdks/python/julep/api/types/chat_chat_output_chunk.py b/sdks/python/julep/api/types/chat_chat_output_chunk.py
index 6eb42c639..926787155 100644
--- a/sdks/python/julep/api/types/chat_chat_output_chunk.py
+++ b/sdks/python/julep/api/types/chat_chat_output_chunk.py
@@ -6,7 +6,7 @@
 from ..core.datetime_utils import serialize_datetime
 from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
 from .chat_base_chat_output import ChatBaseChatOutput
-from .entries_chat_ml_message import EntriesChatMlMessage
+from .entries_input_chat_ml_message import EntriesInputChatMlMessage
 
 
 class ChatChatOutputChunk(ChatBaseChatOutput):
@@ -14,7 +14,7 @@ class ChatChatOutputChunk(ChatBaseChatOutput):
     Streaming chat completion output
     """
 
-    delta: EntriesChatMlMessage = pydantic_v1.Field()
+    delta: EntriesInputChatMlMessage = pydantic_v1.Field()
     """
     The message generated by the model
     """
diff --git a/sdks/python/julep/api/types/chat_multiple_chat_output.py b/sdks/python/julep/api/types/chat_multiple_chat_output.py
index ab41027eb..2fe0a50df 100644
--- a/sdks/python/julep/api/types/chat_multiple_chat_output.py
+++ b/sdks/python/julep/api/types/chat_multiple_chat_output.py
@@ -6,7 +6,7 @@
 from ..core.datetime_utils import serialize_datetime
 from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
 from .chat_base_chat_output import ChatBaseChatOutput
-from .entries_chat_ml_message import EntriesChatMlMessage
+from .entries_input_chat_ml_message import EntriesInputChatMlMessage
 
 
 class ChatMultipleChatOutput(ChatBaseChatOutput):
@@ -14,7 +14,7 @@ class ChatMultipleChatOutput(ChatBaseChatOutput):
     The output returned by the model. Note that, depending on the model provider, they might return more than one message.
     """
 
-    messages: typing.List[EntriesChatMlMessage]
+    messages: typing.List[EntriesInputChatMlMessage]
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/sdks/python/julep/api/types/chat_single_chat_output.py b/sdks/python/julep/api/types/chat_single_chat_output.py
index 236d7d566..51b5d99fe 100644
--- a/sdks/python/julep/api/types/chat_single_chat_output.py
+++ b/sdks/python/julep/api/types/chat_single_chat_output.py
@@ -6,7 +6,7 @@
 from ..core.datetime_utils import serialize_datetime
 from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
 from .chat_base_chat_output import ChatBaseChatOutput
-from .entries_chat_ml_message import EntriesChatMlMessage
+from .entries_input_chat_ml_message import EntriesInputChatMlMessage
 
 
 class ChatSingleChatOutput(ChatBaseChatOutput):
@@ -14,7 +14,7 @@ class ChatSingleChatOutput(ChatBaseChatOutput):
     The output returned by the model. Note that, depending on the model provider, they might return more than one message.
     """
 
-    message: EntriesChatMlMessage
+    message: EntriesInputChatMlMessage
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
diff --git a/sdks/python/julep/api/types/entries_chat_ml_message.py b/sdks/python/julep/api/types/entries_chat_ml_message.py
deleted file mode 100644
index f3c087f6b..000000000
--- a/sdks/python/julep/api/types/entries_chat_ml_message.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import datetime as dt
-import typing
-
-from ..core.datetime_utils import serialize_datetime
-from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
-from .common_uuid import CommonUuid
-from .entries_chat_ml_message_content import EntriesChatMlMessageContent
-from .entries_chat_ml_role import EntriesChatMlRole
-from .tools_chosen_tool_call import ToolsChosenToolCall
-
-
-class EntriesChatMlMessage(pydantic_v1.BaseModel):
-    role: EntriesChatMlRole = pydantic_v1.Field()
-    """
-    The role of the message
-    """
-
-    content: EntriesChatMlMessageContent = pydantic_v1.Field()
-    """
-    The content parts of the message
-    """
-
-    name: typing.Optional[str] = pydantic_v1.Field(default=None)
-    """
-    Name
-    """
-
-    tool_calls: typing.List[ToolsChosenToolCall] = pydantic_v1.Field()
-    """
-    Tool calls generated by the model.
-    """
-
-    created_at: dt.datetime = pydantic_v1.Field()
-    """
-    When this resource was created as UTC date-time
-    """
-
-    id: CommonUuid
-
-    def json(self, **kwargs: typing.Any) -> str:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().json(**kwargs_with_defaults)
-
-    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
-        kwargs_with_defaults_exclude_unset: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        kwargs_with_defaults_exclude_none: typing.Any = {
-            "by_alias": True,
-            "exclude_none": True,
-            **kwargs,
-        }
-
-        return deep_union_pydantic_dicts(
-            super().dict(**kwargs_with_defaults_exclude_unset),
-            super().dict(**kwargs_with_defaults_exclude_none),
-        )
-
-    class Config:
-        frozen = True
-        smart_union = True
-        extra = pydantic_v1.Extra.allow
-        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/sdks/python/julep/api/types/entries_chat_ml_message_content.py b/sdks/python/julep/api/types/entries_chat_ml_message_content.py
deleted file mode 100644
index 3b80f2242..000000000
--- a/sdks/python/julep/api/types/entries_chat_ml_message_content.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-import typing
-
-from .entries_chat_ml_message_content_item import EntriesChatMlMessageContentItem
-
-EntriesChatMlMessageContent = typing.Union[
-    str, typing.List[str], typing.List[EntriesChatMlMessageContentItem]
-]
diff --git a/sdks/python/julep/api/types/entries_chat_ml_message_content_item.py b/sdks/python/julep/api/types/entries_chat_ml_message_content_item.py
deleted file mode 100644
index 1af6f3b06..000000000
--- a/sdks/python/julep/api/types/entries_chat_ml_message_content_item.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# This file was auto-generated by Fern from our API Definition.
-
-from __future__ import annotations
-
-import datetime as dt
-import typing
-
-from ..core.datetime_utils import serialize_datetime
-from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
-from .entries_image_url import EntriesImageUrl
-
-
-class EntriesChatMlMessageContentItem_Text(pydantic_v1.BaseModel):
-    text: str
-    type: typing.Literal["text"] = "text"
-
-    def json(self, **kwargs: typing.Any) -> str:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().json(**kwargs_with_defaults)
-
-    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
-        kwargs_with_defaults_exclude_unset: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        kwargs_with_defaults_exclude_none: typing.Any = {
-            "by_alias": True,
-            "exclude_none": True,
-            **kwargs,
-        }
-
-        return deep_union_pydantic_dicts(
-            super().dict(**kwargs_with_defaults_exclude_unset),
-            super().dict(**kwargs_with_defaults_exclude_none),
-        )
-
-    class Config:
-        frozen = True
-        smart_union = True
-        extra = pydantic_v1.Extra.allow
-        json_encoders = {dt.datetime: serialize_datetime}
-
-
-class EntriesChatMlMessageContentItem_ImageUrl(pydantic_v1.BaseModel):
-    image_url: EntriesImageUrl
-    type: typing.Literal["image_url"] = "image_url"
-
-    def json(self, **kwargs: typing.Any) -> str:
-        kwargs_with_defaults: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        return super().json(**kwargs_with_defaults)
-
-    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
-        kwargs_with_defaults_exclude_unset: typing.Any = {
-            "by_alias": True,
-            "exclude_unset": True,
-            **kwargs,
-        }
-        kwargs_with_defaults_exclude_none: typing.Any = {
-            "by_alias": True,
-            "exclude_none": True,
-            **kwargs,
-        }
-
-        return deep_union_pydantic_dicts(
-            super().dict(**kwargs_with_defaults_exclude_unset),
-            super().dict(**kwargs_with_defaults_exclude_none),
-        )
-
-    class Config:
-        frozen = True
-        smart_union = True
-        extra = pydantic_v1.Extra.allow
-        json_encoders = {dt.datetime: serialize_datetime}
-
-
-EntriesChatMlMessageContentItem = typing.Union[
-    EntriesChatMlMessageContentItem_Text, EntriesChatMlMessageContentItem_ImageUrl
-]
diff --git a/sdks/python/julep/api/types/entries_chat_ml_role.py b/sdks/python/julep/api/types/entries_chat_ml_role.py
index 9c61bb75a..0cc9a5d46 100644
--- a/sdks/python/julep/api/types/entries_chat_ml_role.py
+++ b/sdks/python/julep/api/types/entries_chat_ml_role.py
@@ -5,7 +5,7 @@
 EntriesChatMlRole = typing.Union[
     typing.Literal[
         "user",
-        "agent",
+        "assistant",
         "system",
         "function",
         "function_response",
diff --git a/sdks/python/poetry.lock b/sdks/python/poetry.lock
index bef7da8bf..38e2c1b19 100644
--- a/sdks/python/poetry.lock
+++ b/sdks/python/poetry.lock
@@ -1751,13 +1751,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.40.3"
+version = "1.40.6"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.40.3-py3-none-any.whl", hash = "sha256:09396cb6e2e15c921a5d872bf92841a60a9425da10dcd962b45fe7c4f48f8395"},
-    {file = "openai-1.40.3.tar.gz", hash = "sha256:f2ffe907618240938c59d7ccc67dd01dc8c50be203c0077240db6758d2f02480"},
+    {file = "openai-1.40.6-py3-none-any.whl", hash = "sha256:b36372124a779381a420a34dd96f762baa748b6bdfaf83a6b9f2745f72ccc1c5"},
+    {file = "openai-1.40.6.tar.gz", hash = "sha256:2239232bcb7f4bd4ce8e02544b5769618582411cf399816d96686d1b6c1e5c8d"},
 ]
 
 [package.dependencies]
diff --git a/sdks/ts/src/api/index.ts b/sdks/ts/src/api/index.ts
index 0bd8c86e7..3aa08e1f6 100644
--- a/sdks/ts/src/api/index.ts
+++ b/sdks/ts/src/api/index.ts
@@ -64,7 +64,6 @@ export type { Docs_TextOnlyDocSearchRequest } from "./models/Docs_TextOnlyDocSea
 export type { Docs_VectorDocSearchRequest } from "./models/Docs_VectorDocSearchRequest";
 export type { Entries_BaseEntry } from "./models/Entries_BaseEntry";
 export type { Entries_ChatMLImageContentPart } from "./models/Entries_ChatMLImageContentPart";
-export type { Entries_ChatMLMessage } from "./models/Entries_ChatMLMessage";
 export type { Entries_ChatMLRole } from "./models/Entries_ChatMLRole";
 export type { Entries_ChatMLTextContentPart } from "./models/Entries_ChatMLTextContentPart";
 export type { Entries_Entry } from "./models/Entries_Entry";
@@ -183,7 +182,6 @@ export { $Docs_TextOnlyDocSearchRequest } from "./schemas/$Docs_TextOnlyDocSearc
 export { $Docs_VectorDocSearchRequest } from "./schemas/$Docs_VectorDocSearchRequest";
 export { $Entries_BaseEntry } from "./schemas/$Entries_BaseEntry";
 export { $Entries_ChatMLImageContentPart } from "./schemas/$Entries_ChatMLImageContentPart";
-export { $Entries_ChatMLMessage } from "./schemas/$Entries_ChatMLMessage";
 export { $Entries_ChatMLRole } from "./schemas/$Entries_ChatMLRole";
 export { $Entries_ChatMLTextContentPart } from "./schemas/$Entries_ChatMLTextContentPart";
 export { $Entries_Entry } from "./schemas/$Entries_Entry";
diff --git a/sdks/ts/src/api/models/Chat_BaseChatOutput.ts b/sdks/ts/src/api/models/Chat_BaseChatOutput.ts
index 6770dd5d5..91c56285f 100644
--- a/sdks/ts/src/api/models/Chat_BaseChatOutput.ts
+++ b/sdks/ts/src/api/models/Chat_BaseChatOutput.ts
@@ -13,5 +13,5 @@ export type Chat_BaseChatOutput = {
   /**
    * The log probabilities of tokens
    */
-  logprobs: Chat_LogProbResponse | null;
+  logprobs?: Chat_LogProbResponse;
 };
diff --git a/sdks/ts/src/api/models/Chat_BaseChatResponse.ts b/sdks/ts/src/api/models/Chat_BaseChatResponse.ts
index 781dd3838..078b8873f 100644
--- a/sdks/ts/src/api/models/Chat_BaseChatResponse.ts
+++ b/sdks/ts/src/api/models/Chat_BaseChatResponse.ts
@@ -9,7 +9,7 @@ export type Chat_BaseChatResponse = {
   /**
    * Usage statistics for the completion request
    */
-  usage: Chat_CompetionUsage | null;
+  usage?: Chat_CompetionUsage;
   /**
    * Background job IDs that may have been spawned from this interaction.
    */
diff --git a/sdks/ts/src/api/models/Chat_BaseTokenLogProb.ts b/sdks/ts/src/api/models/Chat_BaseTokenLogProb.ts
index 41b715757..dd9711b4d 100644
--- a/sdks/ts/src/api/models/Chat_BaseTokenLogProb.ts
+++ b/sdks/ts/src/api/models/Chat_BaseTokenLogProb.ts
@@ -8,5 +8,5 @@ export type Chat_BaseTokenLogProb = {
    * The log probability of the token
    */
   logprob: number;
-  bytes: Array<number> | null;
+  bytes?: Array<number>;
 };
diff --git a/sdks/ts/src/api/models/Chat_ChatOutputChunk.ts b/sdks/ts/src/api/models/Chat_ChatOutputChunk.ts
index 9d52a7fdc..a3bbfacc7 100644
--- a/sdks/ts/src/api/models/Chat_ChatOutputChunk.ts
+++ b/sdks/ts/src/api/models/Chat_ChatOutputChunk.ts
@@ -3,7 +3,7 @@
 /* tslint:disable */
 /* eslint-disable */
 import type { Chat_BaseChatOutput } from "./Chat_BaseChatOutput";
-import type { Entries_ChatMLMessage } from "./Entries_ChatMLMessage";
+import type { Entries_InputChatMLMessage } from "./Entries_InputChatMLMessage";
 /**
  * Streaming chat completion output
  */
@@ -11,5 +11,5 @@ export type Chat_ChatOutputChunk = Chat_BaseChatOutput & {
   /**
    * The message generated by the model
    */
-  delta: Entries_ChatMLMessage;
+  delta: Entries_InputChatMLMessage;
 };
diff --git a/sdks/ts/src/api/models/Chat_MultipleChatOutput.ts b/sdks/ts/src/api/models/Chat_MultipleChatOutput.ts
index 759edb0f5..b0eb182f6 100644
--- a/sdks/ts/src/api/models/Chat_MultipleChatOutput.ts
+++ b/sdks/ts/src/api/models/Chat_MultipleChatOutput.ts
@@ -3,10 +3,10 @@
 /* tslint:disable */
 /* eslint-disable */
 import type { Chat_BaseChatOutput } from "./Chat_BaseChatOutput";
-import type { Entries_ChatMLMessage } from "./Entries_ChatMLMessage";
+import type { Entries_InputChatMLMessage } from "./Entries_InputChatMLMessage";
 /**
  * The output returned by the model. Note that, depending on the model provider, they might return more than one message.
  */
 export type Chat_MultipleChatOutput = Chat_BaseChatOutput & {
-  messages: Array<Entries_ChatMLMessage>;
+  messages: Array<Entries_InputChatMLMessage>;
 };
diff --git a/sdks/ts/src/api/models/Chat_SingleChatOutput.ts b/sdks/ts/src/api/models/Chat_SingleChatOutput.ts
index e4571c234..57b76490c 100644
--- a/sdks/ts/src/api/models/Chat_SingleChatOutput.ts
+++ b/sdks/ts/src/api/models/Chat_SingleChatOutput.ts
@@ -3,10 +3,10 @@
 /* tslint:disable */
 /* eslint-disable */
 import type { Chat_BaseChatOutput } from "./Chat_BaseChatOutput";
-import type { Entries_ChatMLMessage } from "./Entries_ChatMLMessage";
+import type { Entries_InputChatMLMessage } from "./Entries_InputChatMLMessage";
 /**
  * The output returned by the model. Note that, depending on the model provider, they might return more than one message.
  */
 export type Chat_SingleChatOutput = Chat_BaseChatOutput & {
-  message: Entries_ChatMLMessage;
+  message: Entries_InputChatMLMessage;
 };
diff --git a/sdks/ts/src/api/models/Entries_ChatMLMessage.ts b/sdks/ts/src/api/models/Entries_ChatMLMessage.ts
deleted file mode 100644
index 019cfb7db..000000000
--- a/sdks/ts/src/api/models/Entries_ChatMLMessage.ts
+++ /dev/null
@@ -1,30 +0,0 @@
-/* generated using openapi-typescript-codegen -- do no edit */
-/* istanbul ignore file */
-/* tslint:disable */
-/* eslint-disable */
-import type { Common_uuid } from "./Common_uuid";
-import type { Entries_ChatMLRole } from "./Entries_ChatMLRole";
-import type { Tools_ChosenToolCall } from "./Tools_ChosenToolCall";
-export type Entries_ChatMLMessage = {
-  /**
-   * The role of the message
-   */
-  role: Entries_ChatMLRole;
-  /**
-   * The content parts of the message
-   */
-  content: string | Array<string>;
-  /**
-   * Name
-   */
-  name?: string;
-  /**
-   * Tool calls generated by the model.
-   */
-  readonly tool_calls: Array<Tools_ChosenToolCall>;
-  /**
-   * When this resource was created as UTC date-time
-   */
-  readonly created_at: string;
-  readonly id: Common_uuid;
-};
diff --git a/sdks/ts/src/api/models/Entries_ChatMLRole.ts b/sdks/ts/src/api/models/Entries_ChatMLRole.ts
index 0a4789cde..d0ad7e4da 100644
--- a/sdks/ts/src/api/models/Entries_ChatMLRole.ts
+++ b/sdks/ts/src/api/models/Entries_ChatMLRole.ts
@@ -7,7 +7,7 @@
  */
 export type Entries_ChatMLRole =
   | "user"
-  | "agent"
+  | "assistant"
   | "system"
   | "function"
   | "function_response"
diff --git a/sdks/ts/src/api/schemas/$Chat_BaseChatOutput.ts b/sdks/ts/src/api/schemas/$Chat_BaseChatOutput.ts
index 3053f6412..e5c25070d 100644
--- a/sdks/ts/src/api/schemas/$Chat_BaseChatOutput.ts
+++ b/sdks/ts/src/api/schemas/$Chat_BaseChatOutput.ts
@@ -27,8 +27,6 @@ export const $Chat_BaseChatOutput = {
           type: "Chat_LogProbResponse",
         },
       ],
-      isRequired: true,
-      isNullable: true,
     },
   },
 } as const;
diff --git a/sdks/ts/src/api/schemas/$Chat_BaseChatResponse.ts b/sdks/ts/src/api/schemas/$Chat_BaseChatResponse.ts
index 57a56bc70..1ca3e7008 100644
--- a/sdks/ts/src/api/schemas/$Chat_BaseChatResponse.ts
+++ b/sdks/ts/src/api/schemas/$Chat_BaseChatResponse.ts
@@ -12,8 +12,6 @@ export const $Chat_BaseChatResponse = {
           type: "Chat_CompetionUsage",
         },
       ],
-      isRequired: true,
-      isNullable: true,
     },
     jobs: {
       type: "array",
diff --git a/sdks/ts/src/api/schemas/$Chat_BaseTokenLogProb.ts b/sdks/ts/src/api/schemas/$Chat_BaseTokenLogProb.ts
index e9cd03715..e7af2a307 100644
--- a/sdks/ts/src/api/schemas/$Chat_BaseTokenLogProb.ts
+++ b/sdks/ts/src/api/schemas/$Chat_BaseTokenLogProb.ts
@@ -20,8 +20,6 @@ export const $Chat_BaseTokenLogProb = {
         type: "number",
         format: "uint16",
       },
-      isRequired: true,
-      isNullable: true,
     },
   },
 } as const;
diff --git a/sdks/ts/src/api/schemas/$Chat_ChatOutputChunk.ts b/sdks/ts/src/api/schemas/$Chat_ChatOutputChunk.ts
index 2ad1c1175..71efa1925 100644
--- a/sdks/ts/src/api/schemas/$Chat_ChatOutputChunk.ts
+++ b/sdks/ts/src/api/schemas/$Chat_ChatOutputChunk.ts
@@ -16,7 +16,7 @@ export const $Chat_ChatOutputChunk = {
           description: `The message generated by the model`,
           contains: [
             {
-              type: "Entries_ChatMLMessage",
+              type: "Entries_InputChatMLMessage",
             },
           ],
           isRequired: true,
diff --git a/sdks/ts/src/api/schemas/$Chat_MultipleChatOutput.ts b/sdks/ts/src/api/schemas/$Chat_MultipleChatOutput.ts
index 8dc1b3bb7..331290c5c 100644
--- a/sdks/ts/src/api/schemas/$Chat_MultipleChatOutput.ts
+++ b/sdks/ts/src/api/schemas/$Chat_MultipleChatOutput.ts
@@ -14,7 +14,7 @@ export const $Chat_MultipleChatOutput = {
         messages: {
           type: "array",
           contains: {
-            type: "Entries_ChatMLMessage",
+            type: "Entries_InputChatMLMessage",
           },
           isRequired: true,
         },
diff --git a/sdks/ts/src/api/schemas/$Chat_SingleChatOutput.ts b/sdks/ts/src/api/schemas/$Chat_SingleChatOutput.ts
index 1cd376f3f..75c9ddf74 100644
--- a/sdks/ts/src/api/schemas/$Chat_SingleChatOutput.ts
+++ b/sdks/ts/src/api/schemas/$Chat_SingleChatOutput.ts
@@ -12,7 +12,7 @@ export const $Chat_SingleChatOutput = {
     {
       properties: {
         message: {
-          type: "Entries_ChatMLMessage",
+          type: "Entries_InputChatMLMessage",
           isRequired: true,
         },
       },
diff --git a/sdks/ts/src/api/schemas/$Entries_ChatMLMessage.ts b/sdks/ts/src/api/schemas/$Entries_ChatMLMessage.ts
deleted file mode 100644
index a9a55cee2..000000000
--- a/sdks/ts/src/api/schemas/$Entries_ChatMLMessage.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-/* generated using openapi-typescript-codegen -- do no edit */
-/* istanbul ignore file */
-/* tslint:disable */
-/* eslint-disable */
-export const $Entries_ChatMLMessage = {
-  properties: {
-    role: {
-      type: "all-of",
-      description: `The role of the message`,
-      contains: [
-        {
-          type: "Entries_ChatMLRole",
-        },
-      ],
-      isRequired: true,
-    },
-    content: {
-      type: "any-of",
-      description: `The content parts of the message`,
-      contains: [
-        {
-          type: "string",
-        },
-        {
-          type: "array",
-          contains: {
-            type: "string",
-          },
-        },
-      ],
-      isRequired: true,
-    },
-    name: {
-      type: "string",
-      description: `Name`,
-    },
-    tool_calls: {
-      type: "array",
-      contains: {
-        type: "Tools_ChosenToolCall",
-      },
-      isReadOnly: true,
-      isRequired: true,
-    },
-    created_at: {
-      type: "string",
-      description: `When this resource was created as UTC date-time`,
-      isReadOnly: true,
-      isRequired: true,
-      format: "date-time",
-    },
-    id: {
-      type: "all-of",
-      contains: [
-        {
-          type: "Common_uuid",
-        },
-      ],
-      isReadOnly: true,
-      isRequired: true,
-    },
-  },
-} as const;
diff --git a/typespec/chat/models.tsp b/typespec/chat/models.tsp
index 97f374413..da7c170ca 100644
--- a/typespec/chat/models.tsp
+++ b/typespec/chat/models.tsp
@@ -169,7 +169,7 @@ model BaseTokenLogProb {
     /** The log probability of the token */
     logprob: float32;
 
-    bytes: uint16[] | null;
+    bytes?: uint16[];
 }
 
 model TokenLogProb extends BaseTokenLogProb {
@@ -188,17 +188,17 @@ model BaseChatOutput {
     finish_reason: FinishReason;
 
     /** The log probabilities of tokens */
-    logprobs: LogProbResponse | null;
+    logprobs?: LogProbResponse;
 }
 
 /** The output returned by the model. Note that, depending on the model provider, they might return more than one message. */
 model SingleChatOutput extends BaseChatOutput {
-    message: ChatMLMessage;
+    message: InputChatMLMessage;
 }
 
 /** The output returned by the model. Note that, depending on the model provider, they might return more than one message. */
 model MultipleChatOutput extends BaseChatOutput {
-    messages: ChatMLMessage[];
+    messages: InputChatMLMessage[];
 }
 
 alias ChatOutput = SingleChatOutput | MultipleChatOutput;
@@ -206,12 +206,12 @@ alias ChatOutput = SingleChatOutput | MultipleChatOutput;
 /** Streaming chat completion output */
 model ChatOutputChunk extends BaseChatOutput {
     /** The message generated by the model */
-    delta: ChatMLMessage;
+    delta: InputChatMLMessage;
 }
 
 model BaseChatResponse {
     /** Usage statistics for the completion request */
-    usage: CompetionUsage | null;
+    usage?: CompetionUsage;
 
     /** Background job IDs that may have been spawned from this interaction. */
     jobs: uuid[];
diff --git a/typespec/entries/models.tsp b/typespec/entries/models.tsp
index 0d8e604d9..f9050b4f4 100644
--- a/typespec/entries/models.tsp
+++ b/typespec/entries/models.tsp
@@ -22,7 +22,7 @@ enum ImageDetail {
 /** ChatML role (system|assistant|user|function_call|function|function_response|auto) */
 enum ChatMLRole {
     user,
-    agent,
+    assistant,
     system,
     function,
     function_response,