Merge branch 'main' into chat-stream-finish-reason

Aleph-Alpha · Feb 21, 2025 · ae762c4 · ae762c4
2 parents 8149518 + 407e4a1
commit ae762c4
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 5 deletions.
diff --git a/aleph_alpha_client/__init__.py b/aleph_alpha_client/__init__.py
@@ -17,6 +17,7 @@
     QuotaError,
 )
 from .completion import CompletionRequest, CompletionResponse
+from .chat import ChatRequest, Message, ChatResponse
 from .detokenization import DetokenizationRequest, DetokenizationResponse
 from .document import Document
 from .embedding import (

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
@@ -324,14 +324,19 @@ def chat(
                 Always the latest version of model is used.
 
         Examples:
+            >>> import os
+            >>> from aleph_alpha_client import Client, ChatRequest, Message
+            >>>
+            >>> client = Client(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
+            >>> model = "llama-3.1-8b-instruct"
             >>> # create a chat request
             >>> request = ChatRequest(
                     messages=[Message(role="user", content="Hello, how are you?")],
                     model=model,
                 )
             >>>
-            >>> # chat with the model
-            >>> result = client.chat(request, model=model_name)
+            >>> result = client.chat(request, model=model)
+            >>> print(result.message)
         """
         response = self._post_request("chat/completions", request, model)
         return ChatResponse.from_json(response)
@@ -916,14 +921,20 @@ async def chat(
                 Always the latest version of model is used.
 
         Examples:
+            >>> import os
+            >>> from aleph_alpha_client import AsyncClient, ChatRequest, Message
+            >>>
+            >>> client = AsyncClient(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
+            >>> model = "llama-3.1-8b-instruct"
             >>> # create a chat request
             >>> request = ChatRequest(
                     messages=[Message(role="user", content="Hello, how are you?")],
                     model=model,
                 )
             >>>
             >>> # chat with the model
-            >>> result = await client.chat(request, model=model_name)
+            >>> result = await client.chat(request, model=model)
+            >>> print(result.message)
         """
         response = await self._post_request(
             "chat/completions",
@@ -991,18 +1002,23 @@ async def chat_with_streaming(
                 Always the latest version of model is used.
 
         Examples:
+            >>> import os
+            >>> from aleph_alpha_client import AsyncClient, ChatRequest, Message
+            >>>
+            >>> client = AsyncClient(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
+            >>> model = "llama-3.1-8b-instruct"
             >>> # create a chat request
             >>> request = ChatRequest(
                     messages=[Message(role="user", content="Hello, how are you?")],
                     model=model,
                 )
             >>>
             >>> # chat with the model
-            >>> result = await client.chat_with_streaming(request, model=model_name)
+            >>> result = client.chat_with_streaming(request, model=model)
             >>>
             >>> # consume the chat stream
             >>> async for stream_item in result:
-            >>>     do_something_with(stream_item)
+            >>>     print(stream_item)
         """
         async for stream_item_json in self._post_request_with_streaming(
             "chat/completions",