julep-ai · alt-glitch · Apr 15, 2024 · Apr 9, 2024 · Apr 9, 2024 · Apr 10, 2024
diff --git a/.env.example b/.env.example
@@ -8,8 +8,7 @@ COZO_ROCKSDB_DIR=cozo.db
 DTYPE=bfloat16
 EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
 GATEWAY_PORT=80
-GENERATION_AUTH_TOKEN=myauthkey
-GENERATION_URL=http://model-serving:8000/v1
+OPENAI_API_KEY=""
 GPU_MEMORY_UTILIZATION=0.95
 HF_TOKEN=""
 HUGGING_FACE_HUB_TOKEN=""
@@ -21,17 +20,17 @@ GF_SECURITY_ADMIN_PASSWORD=changethis
 MODEL_API_KEY=myauthkey
 MODEL_API_KEY_HEADER_NAME=Authorization
 MODEL_API_URL=http://model-serving:8000
+MODEL_INFERENCE_URL=http://model-serving:8000/v1
 MODEL_ID=BAAI/llm-embedder
-MODEL_NAME=julep-ai/samantha-1-turbo
-# MODEL_NAME = "julep-ai/samantha-1-turbo-awq"
+MODEL_NAME = "julep-ai/samantha-1-turbo"
 SKIP_CHECK_DEVELOPER_HEADERS=true
 SUMMARIZATION_TOKENS_THRESHOLD=2048
 TEMPERATURE_SCALING_FACTOR=0.9
 TEMPERATURE_SCALING_POWER=0.9
 TEMPORAL_ENDPOINT=temporal:7233
 TEMPORAL_NAMESPACE=default
 TEMPORAL_WORKER_URL=temporal:7233
-TP_SIZE=2
+TP_SIZE=1
 TRUNCATE_EMBED_TEXT=true
 TRAEFIK_LOG_LEVEL=DEBUG
-WORKER_URL=temporal:7233
+WORKER_URL=temporal:7233
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,8 @@ ngrok*
 *.env
 *.pyc
 */node_modules/
+.devcontainer
+node_modules/
+package-lock.json
+package.json
 .aider*
diff --git a/agents-api/agents_api/activities/co_density.py b/agents-api/agents_api/activities/co_density.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import MemoryDensityTaskArgs
 
 
@@ -63,7 +63,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(MemoryDensityTaskArgs(memory=memory))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/dialog_insights.py b/agents-api/agents_api/activities/dialog_insights.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import ChatML, DialogInsightsTaskArgs
 
 
@@ -66,7 +66,7 @@ async def run_prompt(
         DialogInsightsTaskArgs(dialog=dialog, person1=person1, person2=person2)
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/mem_mgmt.py b/agents-api/agents_api/activities/mem_mgmt.py
@@ -4,7 +4,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import ChatML, MemoryManagementTaskArgs
 
 
@@ -135,7 +135,7 @@ async def run_prompt(
         )
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/mem_rating.py b/agents-api/agents_api/activities/mem_rating.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import MemoryRatingTaskArgs
 
 
@@ -47,7 +47,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(MemoryRatingTaskArgs(memory=memory))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/relationship_summary.py b/agents-api/agents_api/activities/relationship_summary.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import RelationshipSummaryTaskArgs
 
 
@@ -49,7 +49,7 @@ async def run_prompt(
         )
     )
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/salient_questions.py b/agents-api/agents_api/activities/salient_questions.py
@@ -3,7 +3,7 @@
 
 from temporalio import activity
 
-from ..clients.openai import client as openai_client
+from ..clients.model import julep_client
 from .types import SalientQuestionsTaskArgs
 
 
@@ -40,7 +40,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(SalientQuestionsTaskArgs(statements=statements, num=num))
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/activities/summarization.py b/agents-api/agents_api/activities/summarization.py
@@ -10,7 +10,7 @@
     entries_summarization_query,
 )
 from agents_api.common.protocol.entries import Entry
-from agents_api.clients.openai import client as openai_client
+from agents_api.clients.model import julep_client
 
 
 example_previous_memory = """
@@ -130,7 +130,7 @@ async def run_prompt(
 ) -> str:
     prompt = make_prompt(dialog, previous_memories, **kwargs)
 
-    response = await openai_client.chat.completions.create(
+    response = await julep_client.chat.completions.create(
         model=model,
         messages=[
             {

diff --git a/agents-api/agents_api/clients/model.py b/agents-api/agents_api/clients/model.py
@@ -0,0 +1,12 @@
+from openai import AsyncOpenAI
+from ..env import model_inference_url, model_api_key, openai_api_key
+
+
+openai_client = AsyncOpenAI(
+    api_key=openai_api_key
+)
+
+julep_client = AsyncOpenAI(
+    base_url=model_inference_url,
+    api_key=model_api_key,
+)
diff --git a/agents-api/agents_api/clients/openai.py b/agents-api/agents_api/clients/openai.py
diff --git a/agents-api/agents_api/common/exceptions/agents.py b/agents-api/agents_api/common/exceptions/agents.py
@@ -1,6 +1,6 @@
 from uuid import UUID
 from . import BaseCommonException
-
+from agents_api.model_registry import ALL_AVAILABLE_MODELS
 
 class BaseAgentException(BaseCommonException):
     pass
@@ -26,3 +26,10 @@ def __init__(self, agent_id: UUID | str, doc_id: UUID | str):
         super().__init__(
             f"Doc {str(doc_id)} not found for agent {str(agent_id)}", http_code=404
         )
+
+class AgentModelNotValid(BaseAgentException):
+    def __init__(self, model: str):
+        super().__init__(
+             f"Unknown model: {model}. Please provide a valid model name."
+            "Known models are: " + ", ".join(ALL_AVAILABLE_MODELS.keys()), http_code=400
+        )
diff --git a/agents-api/agents_api/common/protocol/agents.py b/agents-api/agents_api/common/protocol/agents.py
@@ -11,6 +11,3 @@ class AgentDefaultSettings(BaseModel):
     presence_penalty: float = 0.0
     frequency_penalty: float = 0.0
     min_p: float = 0.01
-
-
-ModelType = Literal["julep-ai/samantha-1", "julep-ai/samantha-1-turbo"]
diff --git a/agents-api/agents_api/common/protocol/entries.py b/agents-api/agents_api/common/protocol/entries.py
@@ -2,7 +2,6 @@
 import json
 from typing import Literal
 from uuid import UUID, uuid4
-
 from pydantic import BaseModel, Field, computed_field, validator
 from agents_api.autogen.openapi_model import Role
 
@@ -21,6 +20,7 @@ class Entry(BaseModel):
     created_at: float = Field(default_factory=lambda: datetime.utcnow().timestamp())
     timestamp: float = Field(default_factory=lambda: datetime.utcnow().timestamp())
 
+
     @computed_field
     @property
     def token_count(self) -> int:

diff --git a/agents-api/agents_api/common/protocol/sessions.py b/agents-api/agents_api/common/protocol/sessions.py
@@ -1,9 +1,10 @@
 from uuid import UUID
 
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
 
-from .agents import ModelType, AgentDefaultSettings
+from .agents import AgentDefaultSettings
 
+from model_registry import ALL_AVAILABLE_MODELS
 
 class SessionSettings(AgentDefaultSettings):
     pass
@@ -21,5 +22,15 @@ class SessionData(BaseModel):
     agent_about: str
     updated_at: float
     created_at: float
-    model: ModelType
+    model: str
     default_settings: SessionSettings
+
+    @validator('model')
+    def validate_model_type(cls, model):
+        if model not in ALL_AVAILABLE_MODELS.keys():
+            raise ValueError(
+            f"Unknown model: {model}. Please provide a valid model name."
+            "Known models are: " + ", ".join(ALL_AVAILABLE_MODELS.keys())
+        )
+
+        return model
diff --git a/agents-api/agents_api/env.py b/agents-api/agents_api/env.py
@@ -22,8 +22,9 @@
 prediction_api_endpoint: str = env.str(
     "PREDICTION_API_ENDPOINT", default="us-central1-aiplatform.googleapis.com"
 )
-generation_url: str = env.str("GENERATION_URL", default=None)
-generation_auth_token: str = env.str("GENERATION_AUTH_TOKEN", default=None)
+model_api_key: str = env.str("MODEL_API_KEY", default=None)
+model_inference_url: str = env.str("MODEL_INFERENCE_URL", default=None)
+openai_api_key: str = env.str("OPENAI_API_KEY", default=None)
 summarization_ratio_threshold: float = env.float(
     "MAX_TOKENS_RATIO_TO_SUMMARIZE", default=0.5
 )
@@ -63,8 +64,6 @@
     debug=debug,
     cozo_host=cozo_host,
     cozo_auth=cozo_auth,
-    generation_url=generation_url,
-    generation_auth_token=generation_auth_token,
     summarization_ratio_threshold=summarization_ratio_threshold,
     summarization_tokens_threshold=summarization_tokens_threshold,
     worker_url=worker_url,