Skip to content

Commit

Permalink
Merge branch 'main' into chat-stream-finish-reason
Browse files Browse the repository at this point in the history
  • Loading branch information
moldhouse authored Feb 21, 2025
2 parents 8149518 + 407e4a1 commit ae762c4
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
1 change: 1 addition & 0 deletions aleph_alpha_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
QuotaError,
)
from .completion import CompletionRequest, CompletionResponse
from .chat import ChatRequest, Message, ChatResponse
from .detokenization import DetokenizationRequest, DetokenizationResponse
from .document import Document
from .embedding import (
Expand Down
26 changes: 21 additions & 5 deletions aleph_alpha_client/aleph_alpha_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,19 @@ def chat(
Always the latest version of model is used.
Examples:
>>> import os
>>> from aleph_alpha_client import Client, ChatRequest, Message
>>>
>>> client = Client(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
>>> model = "llama-3.1-8b-instruct"
>>> # create a chat request
>>> request = ChatRequest(
messages=[Message(role="user", content="Hello, how are you?")],
model=model,
)
>>>
>>> # chat with the model
>>> result = client.chat(request, model=model_name)
>>> result = client.chat(request, model=model)
>>> print(result.message)
"""
response = self._post_request("chat/completions", request, model)
return ChatResponse.from_json(response)
Expand Down Expand Up @@ -916,14 +921,20 @@ async def chat(
Always the latest version of model is used.
Examples:
>>> import os
>>> from aleph_alpha_client import AsyncClient, ChatRequest, Message
>>>
>>> client = AsyncClient(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
>>> model = "llama-3.1-8b-instruct"
>>> # create a chat request
>>> request = ChatRequest(
messages=[Message(role="user", content="Hello, how are you?")],
model=model,
)
>>>
>>> # chat with the model
>>> result = await client.chat(request, model=model_name)
>>> result = await client.chat(request, model=model)
>>> print(result.message)
"""
response = await self._post_request(
"chat/completions",
Expand Down Expand Up @@ -991,18 +1002,23 @@ async def chat_with_streaming(
Always the latest version of model is used.
Examples:
>>> import os
>>> from aleph_alpha_client import AsyncClient, ChatRequest, Message
>>>
>>> client = AsyncClient(token=os.environ["TOKEN"], host="https://inference-api.your.domain")
>>> model = "llama-3.1-8b-instruct"
>>> # create a chat request
>>> request = ChatRequest(
messages=[Message(role="user", content="Hello, how are you?")],
model=model,
)
>>>
>>> # chat with the model
>>> result = await client.chat_with_streaming(request, model=model_name)
>>> result = client.chat_with_streaming(request, model=model)
>>>
>>> # consume the chat stream
>>> async for stream_item in result:
>>> do_something_with(stream_item)
>>> print(stream_item)
"""
async for stream_item_json in self._post_request_with_streaming(
"chat/completions",
Expand Down

0 comments on commit ae762c4

Please sign in to comment.