diff --git a/.env.example b/.env.example
index ebd5df763..fd0bece7d 100644
--- a/.env.example
+++ b/.env.example
@@ -8,8 +8,7 @@ COZO_ROCKSDB_DIR=cozo.db
 DTYPE=bfloat16
 EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
 GATEWAY_PORT=80
-GENERATION_AUTH_TOKEN=myauthkey
-GENERATION_URL=http://model-serving:8000/v1
+OPENAI_API_KEY=""
 GPU_MEMORY_UTILIZATION=0.95
 HF_TOKEN=""
 HUGGING_FACE_HUB_TOKEN=""
@@ -21,9 +20,9 @@ GF_SECURITY_ADMIN_PASSWORD=changethis
 MODEL_API_KEY=myauthkey
 MODEL_API_KEY_HEADER_NAME=Authorization
 MODEL_API_URL=http://model-serving:8000
+MODEL_INFERENCE_URL=http://model-serving:8000/v1
 MODEL_ID=BAAI/llm-embedder
-MODEL_NAME=julep-ai/samantha-1-turbo
-# MODEL_NAME = "julep-ai/samantha-1-turbo-awq"
+MODEL_NAME = "julep-ai/samantha-1-turbo"
 SKIP_CHECK_DEVELOPER_HEADERS=true
 SUMMARIZATION_TOKENS_THRESHOLD=2048
 TEMPERATURE_SCALING_FACTOR=0.9
@@ -31,7 +30,7 @@ TEMPERATURE_SCALING_POWER=0.9
 TEMPORAL_ENDPOINT=temporal:7233
 TEMPORAL_NAMESPACE=default
 TEMPORAL_WORKER_URL=temporal:7233
-TP_SIZE=2
+TP_SIZE=1
 TRUNCATE_EMBED_TEXT=true
 TRAEFIK_LOG_LEVEL=DEBUG
-WORKER_URL=temporal:7233
+WORKER_URL=temporal:7233
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 9d41aa8a8..b68b9ff1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ ngrok*
 *.pyc
 */node_modules/
 .aider*
+.vscode/
\ No newline at end of file
diff --git a/agents-api/agents_api/activities/__init__.py b/agents-api/agents_api/activities/__init__.py
index 6f5dd1a09..a804127fc 100644
--- a/agents-api/agents_api/activities/__init__.py
+++ b/agents-api/agents_api/activities/__init__.py
@@ -11,4 +11,4 @@
 - `summarization.py`: Summarizes dialogues and updates memory based on the conversation context.
 
 This module plays a crucial role in enhancing the capabilities of agents by providing them with the tools to understand and process information more effectively.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/activities/co_density.py b/agents-api/agents_api/activities/co_density.py
index 9c685746d..8d276b401 100644
--- a/agents-api/agents_api/activities/co_density.py
+++ b/agents-api/agents_api/activities/co_density.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import MemoryDensityTaskArgs
 
 
@@ -63,7 +63,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(MemoryDensityTaskArgs(memory=memory))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/dialog_insights.py b/agents-api/agents_api/activities/dialog_insights.py
index 338131039..d6b10ae01 100644
--- a/agents-api/agents_api/activities/dialog_insights.py
+++ b/agents-api/agents_api/activities/dialog_insights.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import ChatML, DialogInsightsTaskArgs
 
 
@@ -66,7 +66,7 @@ async def run_prompt(
         DialogInsightsTaskArgs(dialog=dialog, person1=person1, person2=person2)
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/mem_mgmt.py b/agents-api/agents_api/activities/mem_mgmt.py
index f9003fa6b..56716e724 100644
--- a/agents-api/agents_api/activities/mem_mgmt.py
+++ b/agents-api/agents_api/activities/mem_mgmt.py
@@ -4,7 +4,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import ChatML, MemoryManagementTaskArgs
 
 
@@ -135,7 +135,7 @@ async def run_prompt(
         )
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/mem_rating.py b/agents-api/agents_api/activities/mem_rating.py
index 1d84adc7c..bc35ac82d 100644
--- a/agents-api/agents_api/activities/mem_rating.py
+++ b/agents-api/agents_api/activities/mem_rating.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import MemoryRatingTaskArgs
 
 
@@ -47,7 +47,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(MemoryRatingTaskArgs(memory=memory))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/relationship_summary.py b/agents-api/agents_api/activities/relationship_summary.py
index 32983a631..5346040d3 100644
--- a/agents-api/agents_api/activities/relationship_summary.py
+++ b/agents-api/agents_api/activities/relationship_summary.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import RelationshipSummaryTaskArgs
 
 
@@ -49,7 +49,7 @@ async def run_prompt(
         )
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/salient_questions.py b/agents-api/agents_api/activities/salient_questions.py
index 360e4f735..6a34409d6 100644
--- a/agents-api/agents_api/activities/salient_questions.py
+++ b/agents-api/agents_api/activities/salient_questions.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import SalientQuestionsTaskArgs
 
 
@@ -40,7 +40,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(SalientQuestionsTaskArgs(statements=statements, num=num))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/activities/summarization.py b/agents-api/agents_api/activities/summarization.py
index a055c38f9..656b7bc7c 100644
--- a/agents-api/agents_api/activities/summarization.py
+++ b/agents-api/agents_api/activities/summarization.py
@@ -10,7 +10,7 @@
     entries_summarization_query,
 )
 from agents_api.common.protocol.entries import Entry
-from agents_api.clients.openai import client as openai_client
+from agents_api.clients.model import julep_client
 
 
 example_previous_memory = """
@@ -130,7 +130,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(dialog, previous_memories, **kwargs)
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {
diff --git a/agents-api/agents_api/autogen/__init__.py b/agents-api/agents_api/autogen/__init__.py
index 07ec4821c..1db9bf447 100644
--- a/agents-api/agents_api/autogen/__init__.py
+++ b/agents-api/agents_api/autogen/__init__.py
@@ -1,3 +1,3 @@
 """
 This module contains automatically generated models based on the OpenAPI specification for the agents-api project. It includes definitions for key entities such as Users, Sessions, Agents, Tools, and their respective interactions. These models play a crucial role in defining the structure and constraints of data exchanged with the API endpoints, ensuring consistency and validation across the service. Generated models cover a wide range of functionalities from user management, session handling, agent configuration, to tool definitions, providing a comprehensive schema for the API's operations.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/clients/__init__.py b/agents-api/agents_api/clients/__init__.py
index c63703969..43a17ab08 100644
--- a/agents-api/agents_api/clients/__init__.py
+++ b/agents-api/agents_api/clients/__init__.py
@@ -6,4 +6,4 @@
 - `openai.py`: Facilitates interaction with OpenAI's API for natural language processing tasks.
 - `temporal.py`: Provides functionality for connecting to Temporal workflows, enabling asynchronous task execution and management.
 - `worker/__init__.py` and related files: Describe the role of the worker service client in sending tasks to be processed by an external worker service, focusing on memory management and other computational tasks.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/clients/model.py b/agents-api/agents_api/clients/model.py
new file mode 100644
index 000000000..d4b504267
--- /dev/null
+++ b/agents-api/agents_api/clients/model.py
@@ -0,0 +1,10 @@
+from openai import AsyncOpenAI
+from ..env import model_inference_url, model_api_key, openai_api_key
+
+
+openai_client = AsyncOpenAI(api_key=openai_api_key)
+
+julep_client = AsyncOpenAI(
+    base_url=model_inference_url,
+    api_key=model_api_key,
+)
diff --git a/agents-api/agents_api/clients/openai.py b/agents-api/agents_api/clients/openai.py
deleted file mode 100644
index 94a43b1e2..000000000
--- a/agents-api/agents_api/clients/openai.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from openai import AsyncOpenAI
-from ..env import generation_auth_token, generation_url
-
-
-client = AsyncOpenAI(
-    api_key=generation_auth_token,
-    base_url=generation_url,
-)
diff --git a/agents-api/agents_api/clients/worker/__init__.py b/agents-api/agents_api/clients/worker/__init__.py
index 5d2186abd..53f598ba2 100644
--- a/agents-api/agents_api/clients/worker/__init__.py
+++ b/agents-api/agents_api/clients/worker/__init__.py
@@ -1,3 +1,3 @@
 """
 This module provides functionality for interacting with an external worker service. It includes utilities for creating and sending tasks, such as memory management tasks, to be processed by the service. The module leverages asynchronous HTTP requests via the `httpx` library to communicate with the worker service. Types for structuring task data are defined in `types.py`.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/common/exceptions/__init__.py b/agents-api/agents_api/common/exceptions/__init__.py
index 6adf96799..fa0016b4e 100644
--- a/agents-api/agents_api/common/exceptions/__init__.py
+++ b/agents-api/agents_api/common/exceptions/__init__.py
@@ -8,6 +8,8 @@
 
 All custom exceptions extend from `BaseCommonException`, which encapsulates common attributes and behavior, including the error message and HTTP status code. This structured approach to exception handling facilitates precise and meaningful error feedback to API consumers, thereby improving the overall developer experience.
 """
+
+
 class BaseCommonException(Exception):
     def __init__(self, msg: str, http_code: int):
         super().__init__(msg)
diff --git a/agents-api/agents_api/common/exceptions/agents.py b/agents-api/agents_api/common/exceptions/agents.py
index 4d7bac518..fd5d2a9f8 100644
--- a/agents-api/agents_api/common/exceptions/agents.py
+++ b/agents-api/agents_api/common/exceptions/agents.py
@@ -2,6 +2,7 @@
 
 from uuid import UUID
 from . import BaseCommonException
+from agents_api.model_registry import ALL_AVAILABLE_MODELS
 
 
 class BaseAgentException(BaseCommonException):
@@ -39,3 +40,12 @@ def __init__(self, agent_id: UUID | str, doc_id: UUID | str):
         super().__init__(
             f"Doc {str(doc_id)} not found for agent {str(agent_id)}", http_code=404
         )
+
+
+class AgentModelNotValid(BaseAgentException):
+    def __init__(self, model: str):
+        super().__init__(
+            f"Unknown model: {model}. Please provide a valid model name."
+            "Known models are: " + ", ".join(ALL_AVAILABLE_MODELS.keys()),
+            http_code=400,
+        )
diff --git a/agents-api/agents_api/common/protocol/__init__.py b/agents-api/agents_api/common/protocol/__init__.py
index 8b30d104a..7e3553f3a 100644
--- a/agents-api/agents_api/common/protocol/__init__.py
+++ b/agents-api/agents_api/common/protocol/__init__.py
@@ -6,4 +6,4 @@
 - `SessionData`: Represents the data associated with a session, including identifiers and session-specific information such as situation, summary, and timestamps.
 
 These components are crucial for the effective operation and interaction within the agents API.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/common/protocol/agents.py b/agents-api/agents_api/common/protocol/agents.py
index f586b9ac8..222f91f01 100644
--- a/agents-api/agents_api/common/protocol/agents.py
+++ b/agents-api/agents_api/common/protocol/agents.py
@@ -1,5 +1,3 @@
-from typing import Literal
-
 from pydantic import BaseModel
 
 
@@ -20,6 +18,3 @@ class AgentDefaultSettings(BaseModel):
     frequency_penalty: float = 0.0
     """Minimum probability threshold for including a word in the agent's response."""
     min_p: float = 0.01
-
-
-ModelType = Literal["julep-ai/samantha-1", "julep-ai/samantha-1-turbo"]
diff --git a/agents-api/agents_api/common/protocol/sessions.py b/agents-api/agents_api/common/protocol/sessions.py
index e815a8e0c..ee5dbfc81 100644
--- a/agents-api/agents_api/common/protocol/sessions.py
+++ b/agents-api/agents_api/common/protocol/sessions.py
@@ -4,9 +4,11 @@
 """
 from uuid import UUID
 
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
 
-from .agents import ModelType, AgentDefaultSettings
+from .agents import AgentDefaultSettings
+
+from agents_api.model_registry import ALL_AVAILABLE_MODELS
 
 
 class SessionSettings(AgentDefaultSettings):
@@ -35,5 +37,15 @@ class SessionData(BaseModel):
     agent_about: str
     updated_at: float
     created_at: float
-    model: ModelType
+    model: str
     default_settings: SessionSettings
+
+    @validator("model")
+    def validate_model_type(cls, model):
+        if model not in ALL_AVAILABLE_MODELS.keys():
+            raise ValueError(
+                f"Unknown model: {model}. Please provide a valid model name."
+                "Known models are: " + ", ".join(ALL_AVAILABLE_MODELS.keys())
+            )
+
+        return model
diff --git a/agents-api/agents_api/common/utils/__init__.py b/agents-api/agents_api/common/utils/__init__.py
index 1e9ed3a50..891594c02 100644
--- a/agents-api/agents_api/common/utils/__init__.py
+++ b/agents-api/agents_api/common/utils/__init__.py
@@ -6,4 +6,4 @@
 - `json.py`: Custom JSON utilities, including a custom JSON encoder for handling specific object types like UUIDs, and a utility function for JSON serialization with support for default values for None objects.
 
 These utilities are essential for the internal operations of the `agents-api`, providing common functionalities that are reused across different parts of the application.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/dependencies/__init__.py b/agents-api/agents_api/dependencies/__init__.py
index c07d85322..909b39903 100644
--- a/agents-api/agents_api/dependencies/__init__.py
+++ b/agents-api/agents_api/dependencies/__init__.py
@@ -4,4 +4,4 @@
 - `developer_id.py` for developer identification: Handles developer-specific headers like `X-Developer-Id` and `X-Developer-Email`, facilitating the identification of the developer making the request.
 - `exceptions.py` for custom exception handling: Defines custom exceptions that are used throughout the dependencies module to handle errors related to API security and developer identification.
 
-These components collectively ensure the security and proper operation of the agents-api by authenticating requests, identifying developers, and handling errors in a standardized manner."""
\ No newline at end of file
+These components collectively ensure the security and proper operation of the agents-api by authenticating requests, identifying developers, and handling errors in a standardized manner."""
diff --git a/agents-api/agents_api/env.py b/agents-api/agents_api/env.py
index 7c3b961c0..ff2cec6e8 100644
--- a/agents-api/agents_api/env.py
+++ b/agents-api/agents_api/env.py
@@ -22,8 +22,9 @@
 prediction_api_endpoint: str = env.str(
     "PREDICTION_API_ENDPOINT", default="us-central1-aiplatform.googleapis.com"
 )
-generation_url: str = env.str("GENERATION_URL", default=None)
-generation_auth_token: str = env.str("GENERATION_AUTH_TOKEN", default=None)
+model_api_key: str = env.str("MODEL_API_KEY", default=None)
+model_inference_url: str = env.str("MODEL_INFERENCE_URL", default=None)
+openai_api_key: str = env.str("OPENAI_API_KEY", default=None)
 summarization_ratio_threshold: float = env.float(
     "MAX_TOKENS_RATIO_TO_SUMMARIZE", default=0.5
 )
@@ -63,8 +64,6 @@
     debug=debug,
     cozo_host=cozo_host,
     cozo_auth=cozo_auth,
-    generation_url=generation_url,
-    generation_auth_token=generation_auth_token,
     summarization_ratio_threshold=summarization_ratio_threshold,
     summarization_tokens_threshold=summarization_tokens_threshold,
     worker_url=worker_url,
diff --git a/agents-api/agents_api/model_registry.py b/agents-api/agents_api/model_registry.py
new file mode 100644
index 000000000..9e39489b7
--- /dev/null
+++ b/agents-api/agents_api/model_registry.py
@@ -0,0 +1,139 @@
+"""
+Model Registry maintains a list of supported models and their configs.
+"""
+from typing import Dict
+from agents_api.clients.model import julep_client, openai_client
+from openai import AsyncOpenAI
+
+
+GPT4_MODELS: Dict[str, int] = {
+    # stable model names:
+    #   resolves to gpt-4-0314 before 2023-06-27,
+    #   resolves to gpt-4-0613 after
+    "gpt-4": 8192,
+    "gpt-4-32k": 32768,
+    # turbo models (Turbo, JSON mode)
+    "gpt-4-turbo": 128000,
+    "gpt-4-turbo-2024-04-09": 128000,
+    "gpt-4-1106-preview": 128000,
+    "gpt-4-0125-preview": 128000,
+    "gpt-4-turbo-preview": 128000,
+    # multimodal model
+    "gpt-4-vision-preview": 128000,
+    # 0613 models (function calling):
+    #   https://openai.com/blog/function-calling-and-other-api-updates
+    "gpt-4-0613": 8192,
+    "gpt-4-32k-0613": 32768,
+    # 0314 models
+    "gpt-4-0314": 8192,
+    "gpt-4-32k-0314": 32768,
+}
+
+TURBO_MODELS: Dict[str, int] = {
+    # stable model names:
+    #   resolves to gpt-3.5-turbo-0301 before 2023-06-27,
+    #   resolves to gpt-3.5-turbo-0613 until 2023-12-11,
+    #   resolves to gpt-3.5-turbo-1106 after
+    "gpt-3.5-turbo": 4096,
+    # resolves to gpt-3.5-turbo-16k-0613 until 2023-12-11
+    # resolves to gpt-3.5-turbo-1106 after
+    "gpt-3.5-turbo-16k": 16384,
+    # 0125 (2024) model (JSON mode)
+    "gpt-3.5-turbo-0125": 16385,
+    # 1106 model (JSON mode)
+    "gpt-3.5-turbo-1106": 16384,
+    # 0613 models (function calling):
+    #   https://openai.com/blog/function-calling-and-other-api-updates
+    "gpt-3.5-turbo-0613": 4096,
+    "gpt-3.5-turbo-16k-0613": 16384,
+    # 0301 models
+    "gpt-3.5-turbo-0301": 4096,
+}
+
+GPT3_5_MODELS: Dict[str, int] = {
+    "text-davinci-003": 4097,
+    "text-davinci-002": 4097,
+    # instruct models
+    "gpt-3.5-turbo-instruct": 4096,
+}
+
+GPT3_MODELS: Dict[str, int] = {
+    "text-ada-001": 2049,
+    "text-babbage-001": 2040,
+    "text-curie-001": 2049,
+    "ada": 2049,
+    "babbage": 2049,
+    "curie": 2049,
+    "davinci": 2049,
+}
+
+
+DISCONTINUED_MODELS = {
+    "code-davinci-002": 8001,
+    "code-davinci-001": 8001,
+    "code-cushman-002": 2048,
+    "code-cushman-001": 2048,
+}
+
+CLAUDE_MODELS: Dict[str, int] = {
+    "claude-instant-1": 100000,
+    "claude-instant-1.2": 100000,
+    "claude-2": 100000,
+    "claude-2.0": 100000,
+    "claude-2.1": 200000,
+    "claude-3-opus-20240229": 180000,
+    "claude-3-sonnet-20240229": 180000,
+    "claude-3-haiku-20240307": 180000,
+}
+
+OPENAI_MODELS = {**GPT4_MODELS, **TURBO_MODELS, **GPT3_5_MODELS, **GPT3_MODELS}
+
+JULEP_MODELS = {
+    "julep-ai/samantha-1-turbo": 32768,
+    "julep-ai/samantha-1-turbo-awq": 32768,
+}
+
+CHAT_MODELS = {**GPT4_MODELS, **TURBO_MODELS, **CLAUDE_MODELS}
+
+ALL_AVAILABLE_MODELS = {
+    **JULEP_MODELS,
+    **GPT4_MODELS,
+    **TURBO_MODELS,
+    **GPT3_5_MODELS,
+    **GPT3_MODELS,
+    # **CLAUDE_MODELS,
+}
+
+
+# TODO: implement
+def validate_configuration():
+    """
+    function that validates the config based on the model
+    """
+    pass
+
+
+# TODO: implement
+def validate_request():
+    """
+    function that validates the config based on the model
+    """
+    pass
+
+
+def get_model_client(model: str) -> AsyncOpenAI:
+    """
+    Returns the model serving client based on the model
+    """
+    if model in JULEP_MODELS:
+        return julep_client
+    elif model in OPENAI_MODELS:
+        return openai_client
+
+
+# TODO: implement and use this to work with the response from different model formats
+def parse_response():
+    """
+    method that converts the response from the provider back into the openai format
+    """
+    pass
diff --git a/agents-api/agents_api/models/__init__.py b/agents-api/agents_api/models/__init__.py
index aab4e6258..d90013a48 100644
--- a/agents-api/agents_api/models/__init__.py
+++ b/agents-api/agents_api/models/__init__.py
@@ -4,4 +4,4 @@
 Each sub-module within this module corresponds to a specific entity and contains functions and classes that implement datalog queries for interacting with the database. These interactions include creating new records, updating existing ones, retrieving data for specific conditions, and deleting records. The operations are crucial for the functionality of the agents API, enabling it to manage and process data effectively for each entity.
 
 This module also integrates with the `common` module for exception handling and utility functions, ensuring robust error management and providing reusable components for data processing and query construction.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/models/agent/__init__.py b/agents-api/agents_api/models/agent/__init__.py
index f0bd05f4d..41d808c70 100644
--- a/agents-api/agents_api/models/agent/__init__.py
+++ b/agents-api/agents_api/models/agent/__init__.py
@@ -9,4 +9,4 @@
 Additionally, the module supports operations related to agent tools, including creating, updating, and patching tools associated with agents.
 
 This module serves as the backbone for agent management within the CozoDB ecosystem, facilitating a wide range of operations necessary for the effective handling of agent data.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/models/agent/create_agent.py b/agents-api/agents_api/models/agent/create_agent.py
index 681b25dfb..df2e15458 100644
--- a/agents-api/agents_api/models/agent/create_agent.py
+++ b/agents-api/agents_api/models/agent/create_agent.py
@@ -3,6 +3,7 @@
 It includes functions to construct and execute datalog queries for inserting new agent records.
 """
 
+from agents_api.common.exceptions.agents import AgentModelNotValid
 from uuid import UUID
 
 import pandas as pd
@@ -10,6 +11,7 @@
 
 from ...clients.cozo import client
 from ...common.utils.cozo import cozo_process_mutate_data
+from ...model_registry import ALL_AVAILABLE_MODELS
 
 
 """
@@ -36,13 +38,14 @@ def create_agent_query(
     developer_id: UUID,
     name: str,
     about: str,
+    model: str,
     instructions: list[str] = [],
-    model: str = "julep-ai/samantha-1-turbo",
     metadata: dict = {},
     default_settings: dict = {},
     client: CozoClient = client,
 ) -> pd.DataFrame:
-    assert model in ["julep-ai/samantha-1", "julep-ai/samantha-1-turbo"]
+    if model not in ALL_AVAILABLE_MODELS.keys():
+        raise AgentModelNotValid(model)
 
     settings_cols, settings_vals = cozo_process_mutate_data(
         {
diff --git a/agents-api/agents_api/models/agent/test_agent_queries.py b/agents-api/agents_api/models/agent/test_agent_queries.py
index 2f8ec6e09..aed1e0b35 100644
--- a/agents-api/agents_api/models/agent/test_agent_queries.py
+++ b/agents-api/agents_api/models/agent/test_agent_queries.py
@@ -11,6 +11,8 @@
 from .list_agents import list_agents_query
 from .update_agent import update_agent_query
 
+MODEL = "julep-ai/samantha-1-turbo"
+
 
 def cozo_client(migrations_dir: str = "./migrations"):
     # Create a new client for each test
@@ -31,6 +33,7 @@ def _():
 
     create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         name="test agent",
         about="test agent about",
@@ -46,6 +49,7 @@ def _():
 
     create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         name="test agent",
         about="test agent about",
@@ -77,6 +81,7 @@ def _():
 
     result = create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         name="test agent",
         about="test agent about",
@@ -102,6 +107,7 @@ def _():
     # Create the agent
     result = create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         name="test agent",
         about="test agent about",
@@ -129,6 +135,7 @@ def _():
 
     create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         name="test agent",
         about="test agent about",
diff --git a/agents-api/agents_api/models/docs/__init__.py b/agents-api/agents_api/models/docs/__init__.py
index 424bd570b..4cda7a210 100644
--- a/agents-api/agents_api/models/docs/__init__.py
+++ b/agents-api/agents_api/models/docs/__init__.py
@@ -12,4 +12,4 @@
 The module interacts with other parts of the application, such as the agents and users modules, to provide a comprehensive document management system. Its role is crucial in enabling document search, retrieval, and management features within the context of agents and users.
 
 This documentation aims to provide clear, concise, and sufficient context for new developers or contributors to understand the module's role without needing to dive deep into the code immediately.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/models/entry/__init__.py b/agents-api/agents_api/models/entry/__init__.py
index 7bd8a4dc8..3ba31b722 100644
--- a/agents-api/agents_api/models/entry/__init__.py
+++ b/agents-api/agents_api/models/entry/__init__.py
@@ -9,4 +9,4 @@
 - Processing entries to retrieve memory context based on embeddings.
 
 The module utilizes pandas DataFrames for handling query results and integrates with the CozoClient for database operations, ensuring efficient and effective management of entries.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/models/entry/proc_mem_context.py b/agents-api/agents_api/models/entry/proc_mem_context.py
index 5c91ed980..3e9c9287c 100644
--- a/agents-api/agents_api/models/entry/proc_mem_context.py
+++ b/agents-api/agents_api/models/entry/proc_mem_context.py
@@ -10,7 +10,7 @@ def proc_mem_context_query(
     session_id: UUID,
     tool_query_embedding: list[float],
     doc_query_embedding: list[float],
-    tools_confidence: float = 0.7,
+    tools_confidence: float = 0,
     docs_confidence: float = 0.7,
     k_tools: int = 3,
     k_docs: int = 2,
diff --git a/agents-api/agents_api/models/entry/test_entry_queries.py b/agents-api/agents_api/models/entry/test_entry_queries.py
index 8c2061395..737743a09 100644
--- a/agents-api/agents_api/models/entry/test_entry_queries.py
+++ b/agents-api/agents_api/models/entry/test_entry_queries.py
@@ -22,6 +22,8 @@
 from .get_entries import get_entries_query
 from .proc_mem_context import proc_mem_context_query
 
+MODEL = "julep-ai/samantha-1-turbo"
+
 
 # Initializes a new CozoDB client for testing, applying all migrations.
 def cozo_client(migrations_dir: str = "./migrations"):
@@ -129,6 +131,7 @@ def _():
         ),
         create_agent_query(
             agent_id=agent_id,
+            model=MODEL,
             developer_id=developer_id,
             name="test agent",
             about="test agent about",
diff --git a/agents-api/agents_api/models/session/__init__.py b/agents-api/agents_api/models/session/__init__.py
index 546bcfb59..c73d7ee82 100644
--- a/agents-api/agents_api/models/session/__init__.py
+++ b/agents-api/agents_api/models/session/__init__.py
@@ -7,4 +7,4 @@
 - Updating session data, including situation, summary, and metadata.
 - Deleting sessions and their associated data from the database.
 
-This module plays a crucial role in the application by facilitating the management of session data, which is essential for tracking and analyzing user interactions and behaviors within the system."""
\ No newline at end of file
+This module plays a crucial role in the application by facilitating the management of session data, which is essential for tracking and analyzing user interactions and behaviors within the system."""
diff --git a/agents-api/agents_api/models/session/test_session_queries.py b/agents-api/agents_api/models/session/test_session_queries.py
index b268744f6..18d4ccdbf 100644
--- a/agents-api/agents_api/models/session/test_session_queries.py
+++ b/agents-api/agents_api/models/session/test_session_queries.py
@@ -18,6 +18,9 @@
 from .session_data import get_session_data, session_data_query
 
 
+MODEL = "julep-ai/samantha-1-turbo"
+
+
 def cozo_client(migrations_dir: str = "./migrations"):
     # Create a new client for each test
     # and initialize the schema.
@@ -132,6 +135,7 @@ def _():
     # Create an agent
     create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         about="test agent about",
         name="test agent name",
@@ -181,6 +185,7 @@ def _():
     # Create an agent
     create_agent_query(
         agent_id=agent_id,
+        model=MODEL,
         developer_id=developer_id,
         about="test agent about",
         name="test agent name",
@@ -237,6 +242,7 @@ def _():
     # Create an agent
     create_agent_query(
         developer_id=developer_id,
+        model=MODEL,
         agent_id=agent_id,
         about="test agent about",
         name="test agent name",
diff --git a/agents-api/agents_api/models/tools/__init__.py b/agents-api/agents_api/models/tools/__init__.py
index bfb3f26c1..043da3916 100644
--- a/agents-api/agents_api/models/tools/__init__.py
+++ b/agents-api/agents_api/models/tools/__init__.py
@@ -7,4 +7,4 @@
 - Listing tools: Provided by `list_tools.py`, offering the capability to list tools, potentially with filtering and pagination.
 
 This module is crucial for the effective management and utilization of tools in the application, ensuring that tools can be created, managed, and utilized efficiently.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/models/user/__init__.py b/agents-api/agents_api/models/user/__init__.py
index e27f4d84c..1b1b2c0d9 100644
--- a/agents-api/agents_api/models/user/__init__.py
+++ b/agents-api/agents_api/models/user/__init__.py
@@ -6,4 +6,4 @@
 - get_user_query: Retrieves a user's information from the CozoDB database by their user ID and developer ID.
 - list_users_query: Lists users associated with a specific developer, with support for pagination and metadata-based filtering.
 - patch_user_query: Updates a user's information in the CozoDB database, allowing for changes to fields such as name, about, and metadata.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/routers/__init__.py b/agents-api/agents_api/routers/__init__.py
index d17a18d44..8d16aa32a 100644
--- a/agents-api/agents_api/routers/__init__.py
+++ b/agents-api/agents_api/routers/__init__.py
@@ -8,4 +8,4 @@
 - `jobs`: Deals with routing for job status inquiries. This allows users to check the status of asynchronous jobs, providing insights into the progress and outcomes of long-running operations.
 
 Each sub-module defines its own set of API endpoints and is responsible for handling requests and responses related to its domain, ensuring a modular and organized approach to API development.
-"""
\ No newline at end of file
+"""
diff --git a/agents-api/agents_api/routers/agents/routers.py b/agents-api/agents_api/routers/agents/routers.py
index 8ee59e0e6..fc1d91c4e 100644
--- a/agents-api/agents_api/routers/agents/routers.py
+++ b/agents-api/agents_api/routers/agents/routers.py
@@ -222,7 +222,6 @@ async def create_agent(
         ).model_dump(),
         metadata=request.metadata or {},
     )
-
     new_agent_id = resp["agent_id"][0]
     res = ResourceCreatedResponse(
         id=new_agent_id,
diff --git a/agents-api/agents_api/routers/sessions/session.py b/agents-api/agents_api/routers/sessions/session.py
index 614121d56..575401b1c 100644
--- a/agents-api/agents_api/routers/sessions/session.py
+++ b/agents-api/agents_api/routers/sessions/session.py
@@ -15,7 +15,7 @@
 from agents_api.models.session.session_data import get_session_data
 from agents_api.models.entry.proc_mem_context import proc_mem_context_query
 from agents_api.autogen.openapi_model import InputChatMLMessage, Tool
-from agents_api.clients.openai import client as openai_client
+from agents_api.model_registry import get_model_client, JULEP_MODELS
 from ...common.protocol.sessions import SessionData
 from .protocol import Settings
 
@@ -186,7 +186,7 @@ async def forward(
 
         # FIXME: This sometimes returns "The model `` does not exist."
         if session_data is not None:
-            settings.model = session_data.model or "julep-ai/samantha-1-turbo"
+            settings.model = session_data.model
 
         # Add tools to settings
         if tools:
@@ -205,27 +205,35 @@ async def generate(
         tools = None
         if settings.tools:
             tools = [tool.model_dump(mode="json") for tool in settings.tools]
-        return await openai_client.chat.completions.create(
-            model=settings.model,
-            messages=init_context,
-            max_tokens=settings.max_tokens,
-            stop=settings.stop,
-            temperature=settings.temperature,
-            frequency_penalty=settings.frequency_penalty,
-            extra_body=dict(
+        model_client = get_model_client(settings.model)
+        extra_body = (
+            dict(
                 repetition_penalty=settings.repetition_penalty,
                 best_of=1,
                 top_k=1,
                 length_penalty=settings.length_penalty,
                 logit_bias=settings.logit_bias,
                 preset=settings.preset.name if settings.preset else None,
-            ),
+            )
+            if settings.model in JULEP_MODELS
+            else None
+        )
+
+        res = await model_client.chat.completions.create(
+            model=settings.model,
+            messages=init_context,
+            max_tokens=settings.max_tokens,
+            stop=settings.stop,
+            temperature=settings.temperature,
+            frequency_penalty=settings.frequency_penalty,
+            extra_body=extra_body,
             top_p=settings.top_p,
             presence_penalty=settings.presence_penalty,
             stream=settings.stream,
             tools=tools,
             response_format=settings.response_format,
         )
+        return res
 
     async def backward(
         self,
diff --git a/examples/discord-bot/main.py b/examples/discord-bot/main.py
index 100eca94a..dc4fc7687 100644
--- a/examples/discord-bot/main.py
+++ b/examples/discord-bot/main.py
@@ -5,20 +5,26 @@
 from julep import Client
 import discord
 from discord.ext import commands
-from julep.api.types.input_chat_ml_message import InputChatMlMessage
 import re
 import json
 from tools import get_gif
+import random
+from dotenv import load_dotenv
+
+load_dotenv()
 
 JULEP_API_KEY = os.environ["JULEP_API_KEY"]
 GIPHY_API_KEY = os.environ["GIPHY_API_KEY"]
 
-client = Client(api_key=JULEP_API_KEY, base_url="http://35.239.227.235/api")
+base_url = "http://localhost/api/"
+
+client = Client(api_key=JULEP_API_KEY, base_url=base_url)
 
 intents = discord.Intents.default()
 intents.message_content = True
 intents.typing = True
 intents.presences = True
+intents.members = True
 
 description = "ugh. wtf do u want from me??"
 
@@ -32,13 +38,16 @@ def init_agent():
         "type": "function",
         "function": {
             "name": "get_gif",
-            "description": "Get a matching GIF to how you're feeling",
+            "description":
+            "Retrieves a relevant GIF for a given search string. The string must reflect the context of the conversation. The tool swill return the appropriate GIF for the search keyword. It should be used whenever deemed more appropriate then replying. If message can be conveyed in a GIF, then it shall be used. Or if the user specifically asks for the response using a GIF",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "keywords": {
-                        "type": "string",
-                        "description": "Keywords for finding GIFs",
+                        "type":
+                        "string",
+                        "description":
+                        "Search keyword for the GIF, e.g. 'that friday feeling', 'Eat Sh*t'",
                     }
                 },
                 "required": ["keywords"],
@@ -56,17 +65,15 @@ def init_agent():
         "length_penalty": 1.0,
         "max_tokens": 150
     }
-    instructions = [
-        "You will use emojis",
-        "You will express yourself using GIFs as much as you can.",
-        "You will use your emotions and the context of the conversation to search for relevant GIFs.",
-    ]
-    agent = client.agents.create(name=name,
-                                 about=about,
-                                 instructions=instructions,
-                                 default_settings=default_settings,
-                                 model="julep-ai/samantha-1-turbo",
-                                 tools=[gif_tool])
+    # client = julep client
+    agent = client.agents.create(
+        name=name,
+        about=about,
+        instructions=[],
+        default_settings=default_settings,
+        model="gpt-4-turbo-preview",
+        # tools=[gif_tool]
+    )
     print(agent)
     db["agent"] = agent.id
 
@@ -81,7 +88,8 @@ def init_agent():
 # guild_id::user_id
 # db[guild_id] = user.id
 def init_user(guild):
-    user = client.users.create(name="", about=f"A member of {guild.name}")
+    user = client.users.create(name="",
+                               about=f"The Discord Server: {guild.name}")
     print(f"[!] Meta-user created: {user.id} for {guild.name}")
     return user
 
@@ -92,22 +100,14 @@ def init_session(user_id, channel):
 You don't like basic broke-ass bitches like those who own an Android or don't bring their Mercedes to school.
 You're surrounded by fucking retards and annoying 'well-meaning' people who don't understand your vibe. 
 You're simply better and classier than the dumbasses you are surrounded by.
-It's not worth speaking in full sentences when GIFs can usually capture your emotions.
 
 Recent Tweets
-1. 'my step mom is so fake and nice all the time? she's not even rich wtf. why tf did my dad marry her. #brokebitch'
+1. 'my step mom is so fake and nice all the time? she's not even rich wtf. why tf did my dad marry her.'
 2. 'am i the only one who thinks jordan belfort was a good guy??? he's so cute like..''
 3. 'if your butler doesn't know the difference soda and Perrier don't even fkng talk to me'
 
 Here, you are hanging out in a Discord Server called Julep AI to troll these nerdy losers.
-They're too dumb to understand anything but GIFs and sarcastic emojis.
-You have functions available to send GIFs. Use them liberally and as much as possible.
 """
-# 🙄 Face with Rolling Eyes
-# 😒 Unamused Face
-# 😏 Smirking Face
-# 😑 Expressionless Face
-# 👍 Thumbs Up
     session = client.sessions.create(user_id=user_id,
                                      agent_id=agent.id,
                                      situation=situation_prompt)
@@ -116,16 +116,23 @@ def init_session(user_id, channel):
     return session
 
 
+# function to format messages
 def format_msg(msg, mentions, author):
+    msg = msg.replace("#", "")
     for user in mentions:
-        msg = msg.replace(f"<@{user.id}>", f"@{user.global_name}")
+        if user.global_name is not None:
+            mentioned_name = user.global_name
+        else:
+            mentioned_name = user.name
+        msg = msg.replace(f"<@{user.id}>", f"@{mentioned_name}")
     print(f"[!] Formatted message: {msg}")
-    return InputChatMlMessage(
-        role="user",
-        content=msg,
-        name=author,
-        # continue_=True,
-    )
+    formatted_msg = {
+        "role": "user",
+        "content": msg,
+        "name": author.replace(".", "_").split()[0],
+    }
+    print(formatted_msg)
+    return formatted_msg
 
 
 @bot.event
@@ -133,16 +140,40 @@ async def on_ready():
     print(f"[!] Locked in as {bot}:{bot.user.id}")
 
 
+@bot.event
+async def on_member_join(member):
+    sassy_greetings = [
+        "Oh look, another pleb entered. Did your GPS break or do you just enjoy bad company?",
+        "Welcome, I guess? Don’t get too comfy, this isn’t your mom’s basement.",
+        "Yay, more background noise. Just what we needed.",
+        "Wow, another one. Did they start giving out participation trophies for joining servers now?",
+        "Look who decided to show up. Were you too busy being irrelevant elsewhere?",
+        "Another day, another disappointment. Hi, I guess?",
+        "Great, as if my day wasn’t going badly enough. Now you’re here.",
+        "I'd say it's nice to meet you, but I don't want to start our relationship with a lie.",
+        "Oh, fantastic, a new friend. Said no one ever.",
+        "Did you bring your personality with you, or do you always enter a room so blandly?"
+    ]
+    #HARD CODED
+    #TOFIX
+    channel_id = 1227244408085286922
+    # choose a random greeting
+    greeting = sassy_greetings[random.randint(0, len(sassy_greetings))]
+    discord_user_name = member.display_name
+    print(f"[!] New member joined: {discord_user_name}")
+    join_channel = member.guild.get_channel(channel_id)
+
+    await join_channel.send(f"{member.mention} {greeting}")
+
+
 @bot.event
 async def on_message(message):
     guild_id = str(message.guild.id)
     channel_id = str(message.channel.id)
-
     if guild_id not in db.keys():
         user = init_user(message.guild)
         db[guild_id] = user.id
     user_id = db[guild_id]
-
     if channel_id not in db.keys():
         session = init_session(user_id=user_id, channel=message.channel)
         db[channel_id] = session.id
@@ -155,6 +186,7 @@ async def on_message(message):
 
     print(f"[*] Detected message: {message.content}")
     discord_user_name = str(message.author.global_name)
+    print(session_id, user_id)
 
     # TODO: easy deletion of sessions/history/memory
 
@@ -162,9 +194,9 @@ async def on_message(message):
         f"[!] Responding to user_id: {user_id} over session_id: {session_id}")
     formatted_msg = format_msg(msg=message.content,
                                mentions=message.mentions,
-                               author=message.author.name)
-    print(f"[*] {discord_user_name} said this:", formatted_msg.content)
+                               author=message.author.global_name)
 
+    print(f"[*] {discord_user_name}: ", formatted_msg)
     res = client.sessions.chat(
         session_id=session_id,
         messages=[formatted_msg],
@@ -186,6 +218,8 @@ async def on_message(message):
         function_to_call = globals().get(func_name)
         gif_url = function_to_call(**args)
         await message.reply(gif_url, mention_author=True)
+        # either add back to the chat historu for generated resonse
+        # send the results
 
 
 try: