diff --git a/agents-api/agents_api/activities/dialog_insights.py b/agents-api/agents_api/activities/dialog_insights.py deleted file mode 100644 index 1d5adec39..000000000 --- a/agents-api/agents_api/activities/dialog_insights.py +++ /dev/null @@ -1,118 +0,0 @@ -from textwrap import dedent -from typing import Callable - -from temporalio import activity - -from agents_api.clients import litellm - -from .types import ChatML, DialogInsightsTaskArgs - - -def make_prompt( - args: DialogInsightsTaskArgs, - max_turns: int = 20, -): - # Unpack - dialog = args.dialog - person1 = args.person1 - person2 = args.person2 - - # Template - template = dedent( - """\ - [[Conversation]] - {dialog_context} - - --- - - Write down if there are any details from the conversation above that {person1} might have found interesting from {person2}'s perspective, in a full sentence. Write down point by point only the most important points. Answer must be in third person. - - Answer: " - """ - ).strip() - - # Filter dialog (keep only user and assistant sections) - dialog = [entry for entry in dialog if entry.role != "system"] - - # Truncate to max_turns - dialog = dialog[-max_turns:] - - # Prepare dialog context - dialog_context = "\n".join( - [ - f'{e.name or ("User" if e.role == "user" else "Assistant")}: {e.content}' - for e in dialog - ] - ) - - prompt = template.format( - dialog_context=dialog_context, - person1=person1, - person2=person2, - ) - - return prompt - - -async def run_prompt( - dialog: list[ChatML], - person1: str, - person2: str, - model: str = "gpt-4o", - max_tokens: int = 400, - temperature: float = 0.4, - parser: Callable[[str], str] = lambda x: x, -) -> str: - prompt = make_prompt( - DialogInsightsTaskArgs(dialog=dialog, person1=person1, person2=person2) - ) - - response = await litellm.acompletion( - model=model, - messages=[ - { - "content": prompt, - "role": "user", - } - ], - max_tokens=max_tokens, - temperature=temperature, - stop=["<", "<|"], - stream=False, - ) - - content = response.choices[0].message.content - - return parser(content.strip() if content is not None else "") - - -@activity.defn -async def dialog_insights(dialog: list[ChatML], person1: str, person2: str) -> None: - # session_id = UUID(session_id) - # entries = [ - # Entry(**row) - # for _, row in client.run( - # get_toplevel_entries_query(session_id=session_id) - # ).iterrows() - # ] - - # assert len(entries) > 0, "no need to summarize on empty entries list" - - await run_prompt(dialog, person1, person2) - - # new_entry = Entry( - # session_id=session_id, - # source="summarizer", - # role="system", - # name="information", - # content=response, - # timestamp=entries[-1].timestamp + 0.01, - # ) - - # client.run( - # entries_summarization_query( - # session_id=session_id, - # new_entry=new_entry, - # old_entry_ids=[e.id for e in entries], - # ) - # ) diff --git a/agents-api/agents_api/activities/relationship_summary.py b/agents-api/agents_api/activities/relationship_summary.py deleted file mode 100644 index 997eaf40a..000000000 --- a/agents-api/agents_api/activities/relationship_summary.py +++ /dev/null @@ -1,102 +0,0 @@ -from textwrap import dedent -from typing import Callable - -from temporalio import activity - -from ..clients import litellm -from .types import RelationshipSummaryTaskArgs - - -def make_prompt(args: RelationshipSummaryTaskArgs): - # Unpack - statements = args.statements - person1 = args.person1 - person2 = args.person2 - - # Template - template = dedent( - """\ - Statements: - - {statements_joined} - - Based on the statements above, summarize {person1} and {person2}'s relationship in a 2-3 sentences. What do they feel or know about each other? - - Answer: " - """ - ).strip() - - prompt = template.format( - statements_joined="\n- ".join(statements), - person1=person1, - person2=person2, - ) - - return prompt - - -async def run_prompt( - statements: list[str], - person1: str, - person2: str, - model: str = "gpt-4o", - max_tokens: int = 400, - temperature: float = 0.6, - parser: Callable[[str], str] = lambda x: x, -) -> str: - prompt = make_prompt( - RelationshipSummaryTaskArgs( - statements=statements, person1=person1, person2=person2 - ) - ) - - response = await litellm.acompletion( - model=model, - messages=[ - { - "content": prompt, - "role": "user", - } - ], - max_tokens=max_tokens, - temperature=temperature, - stop=["<", "<|"], - stream=False, - ) - - content = response.choices[0].message.content - - return parser(content.strip() if content is not None else "") - - -@activity.defn -async def relationship_summary( - statements: list[str], person1: str, person2: str -) -> None: - # session_id = UUID(session_id) - # entries = [ - # Entry(**row) - # for _, row in client.run( - # get_toplevel_entries_query(session_id=session_id) - # ).iterrows() - # ] - - # assert len(entries) > 0, "no need to summarize on empty entries list" - - await run_prompt(statements=statements, person1=person1, person2=person2) - - # new_entry = Entry( - # session_id=session_id, - # source="summarizer", - # role="system", - # name="information", - # content=response, - # timestamp=entries[-1].timestamp + 0.01, - # ) - - # client.run( - # entries_summarization_query( - # session_id=session_id, - # new_entry=new_entry, - # old_entry_ids=[e.id for e in entries], - # ) - # ) diff --git a/agents-api/agents_api/activities/salient_questions.py b/agents-api/agents_api/activities/salient_questions.py deleted file mode 100644 index 0194e8c72..000000000 --- a/agents-api/agents_api/activities/salient_questions.py +++ /dev/null @@ -1,91 +0,0 @@ -from textwrap import dedent -from typing import Callable - -from temporalio import activity - -from ..clients import litellm -from .types import SalientQuestionsTaskArgs - - -def make_prompt(args: SalientQuestionsTaskArgs): - # Unpack - statements = args.statements - num = args.num - - # Template - template = dedent( - """\ - Statements: - - {statements_joined} - - Given only the information above, what are the {num} most salient high-level questions we can answer about the subjects grounded in the statements? - - """ - ).strip() - - prompt = template.format( - statements_joined="\n- ".join(statements), - num=num, - ) - - return prompt - - -async def run_prompt( - statements: list[str], - num: int = 3, - model: str = "gpt-4o", - max_tokens: int = 400, - temperature: float = 0.6, - parser: Callable[[str], str] = lambda x: x, -) -> str: - prompt = make_prompt(SalientQuestionsTaskArgs(statements=statements, num=num)) - - response = await litellm.acompletion( - model=model, - messages=[ - { - "content": prompt, - "role": "user", - } - ], - max_tokens=max_tokens, - temperature=temperature, - stop=["<", "<|"], - stream=False, - ) - - content = response.choices[0].message.content - - return parser(content.strip() if content is not None else "") - - -@activity.defn -async def salient_questions(statements: list[str], num: int = 3) -> None: - # session_id = UUID(session_id) - # entries = [ - # Entry(**row) - # for _, row in client.run( - # get_toplevel_entries_query(session_id=session_id) - # ).iterrows() - # ] - - # assert len(entries) > 0, "no need to summarize on empty entries list" - - await run_prompt(statements=statements, num=num) - - # new_entry = Entry( - # session_id=session_id, - # source="summarizer", - # role="system", - # name="information", - # content=response, - # timestamp=entries[-1].timestamp + 0.01, - # ) - - # client.run( - # entries_summarization_query( - # session_id=session_id, - # new_entry=new_entry, - # old_entry_ids=[e.id for e in entries], - # ) - # ) diff --git a/agents-api/agents_api/activities/task_steps/__init__.py b/agents-api/agents_api/activities/task_steps/__init__.py index a9818d515..494226a5b 100644 --- a/agents-api/agents_api/activities/task_steps/__init__.py +++ b/agents-api/agents_api/activities/task_steps/__init__.py @@ -14,7 +14,9 @@ UpdateExecutionRequest, YieldStep, ) -from ...clients.litellm import acompletion +from ...clients import ( + litellm, # We dont directly import `acompletion` so we can mock it +) from ...clients.worker.types import ChatML from ...common.protocol.tasks import ( StepContext, @@ -57,7 +59,7 @@ async def prompt_step(context: StepContext) -> dict: settings: dict = context.definition.settings.model_dump() # Get settings and run llm - response = await acompletion( + response = await litellm.acompletion( messages=messages, **settings, ) diff --git a/agents-api/agents_api/activities/types.py b/agents-api/agents_api/activities/types.py index 37fd8015d..f550b5c75 100644 --- a/agents-api/agents_api/activities/types.py +++ b/agents-api/agents_api/activities/types.py @@ -1,111 +1,27 @@ -from typing import Any, Callable, Literal, Optional, Protocol, TypedDict +from typing import Literal from uuid import UUID from pydantic import BaseModel +from ..autogen.openapi_model import InputChatMLMessage -class PromptModule(Protocol): - stop: list[str] - temperature: float - parser: Callable[[str], str] - make_prompt: Callable[..., str] - -class ChatML(BaseModel): - role: Literal["system", "user", "assistant"] - content: str - - name: Optional[str] = None - entry_id: Optional[UUID] = None - - processed: bool = False - parent_id: Optional[UUID] = None - session_id: Optional[UUID] = None - timestamp: Optional[float] = None - token_count: Optional[int] = None - - -class BaseTask(BaseModel): ... - - -class BaseTaskArgs(BaseModel): ... - - -class AddPrinciplesTaskArgs(BaseTaskArgs): - scores: dict[str, Any] - full: bool = False - name: Optional[str] = None - user_id: Optional[UUID] = None - character_id: Optional[UUID] = None - - -class AddPrinciplesTask(BaseTask): - name: Literal["add_principles.v1"] - args: AddPrinciplesTaskArgs - - -class MemoryManagementTaskArgs(BaseTaskArgs): +class MemoryManagementTaskArgs(BaseModel): session_id: UUID model: str - dialog: list[ChatML] + dialog: list[InputChatMLMessage] previous_memories: list[str] = [] -class MemoryManagementTask(BaseTask): +class MemoryManagementTask(BaseModel): name: Literal["memory_management.v1"] args: MemoryManagementTaskArgs -class MemoryDensityTaskArgs(BaseTaskArgs): - memory: str - - -class MemoryDensityTask(BaseTask): - name: Literal["memory_density.v1"] - args: MemoryDensityTaskArgs - - -class MemoryRatingTaskArgs(BaseTaskArgs): +class MemoryRatingTaskArgs(BaseModel): memory: str -class MemoryRatingTask(BaseTask): +class MemoryRatingTask(BaseModel): name: Literal["memory_rating.v1"] args: MemoryRatingTaskArgs - - -class DialogInsightsTaskArgs(BaseTaskArgs): - dialog: list[ChatML] - person1: str - person2: str - - -class DialogInsightsTask(BaseTask): - name: Literal["dialog_insights.v1"] - args: DialogInsightsTaskArgs - - -class RelationshipSummaryTaskArgs(BaseTaskArgs): - statements: list[str] - person1: str - person2: str - - -class RelationshipSummaryTask(BaseTask): - name: Literal["relationship_summary.v1"] - args: RelationshipSummaryTaskArgs - - -class SalientQuestionsTaskArgs(BaseTaskArgs): - statements: list[str] - num: int = 3 - - -class SalientQuestionsTask(BaseTask): - name: Literal["salient_questions.v1"] - args: SalientQuestionsTaskArgs - - -class CombinedTask(TypedDict): - name: str - args: dict[Any, Any] diff --git a/agents-api/agents_api/routers/agents/create_agent.py b/agents-api/agents_api/routers/agents/create_agent.py index 56e2eadf7..d1cac0d6b 100644 --- a/agents-api/agents_api/routers/agents/create_agent.py +++ b/agents-api/agents_api/routers/agents/create_agent.py @@ -19,7 +19,6 @@ async def create_agent( x_developer_id: Annotated[UUID4, Depends(get_developer_id)], data: CreateAgentRequest, ) -> ResourceCreatedResponse: - print("create_agent", x_developer_id, data) agent = models.agent.create_agent( developer_id=x_developer_id, data=data, diff --git a/agents-api/agents_api/routers/sessions/chat.py b/agents-api/agents_api/routers/sessions/chat.py index afe7e3e2d..e6103c15e 100644 --- a/agents-api/agents_api/routers/sessions/chat.py +++ b/agents-api/agents_api/routers/sessions/chat.py @@ -118,6 +118,11 @@ async def chat( # Get the tools tools = settings.get("tools") or chat_context.get_active_tools() + # Truncate the messages if necessary + if chat_context.session.context_overflow == "truncate": + # messages = messages[-settings["max_tokens"] :] + raise NotImplementedError("Truncation is not yet implemented") + # Get the response from the model model_response = await litellm.acompletion( messages=messages, @@ -129,9 +134,12 @@ async def chat( # Save the input and the response to the session history if input.save: + # TODO: Count the number of tokens before saving it to the session + new_entries = [ CreateEntryRequest(**msg, source="api_request") for msg in new_messages ] + background_tasks.add_task( create_entries, developer_id=developer.id, @@ -140,12 +148,19 @@ async def chat( mark_session_as_updated=True, ) + # Adaptive context handling + jobs = [] + if chat_context.session.context_overflow == "adaptive": + # TODO: Start the adaptive context workflow + # jobs = [await start_adaptive_context_workflow] + raise NotImplementedError("Adaptive context is not yet implemented") + # Return the response chat_response_class = ChunkChatResponse if input.stream else MessageChatResponse chat_response: ChatResponse = chat_response_class( id=uuid4(), created_at=utcnow(), - jobs=[], + jobs=jobs, docs=doc_references, usage=model_response.usage.model_dump(), choices=[choice.model_dump() for choice in model_response.choices], diff --git a/agents-api/agents_api/routers/users/delete_user.py b/agents-api/agents_api/routers/users/delete_user.py index 3a63e42e9..fd1d02a94 100644 --- a/agents-api/agents_api/routers/users/delete_user.py +++ b/agents-api/agents_api/routers/users/delete_user.py @@ -14,5 +14,4 @@ async def delete_user( user_id: UUID4, x_developer_id: Annotated[UUID4, Depends(get_developer_id)] ) -> ResourceDeletedResponse: - print(user_id) return delete_user_query(developer_id=x_developer_id, user_id=user_id)