Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prompt caching support for Claude. #226

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ A list of models to use:
- [OpenAI models on Azure](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models)
- [Gemini AI models](https://ai.google.dev/gemini-api/docs/models/gemini)

## Prompt caching

ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar.

- [ChatGPT's prompt caching](https://openai.com/index/api-prompt-caching/) is automatic for prompts longer than 1024 tokens, caching the longest common prefix.
- [Claude's prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) is not automatic. It's prefixing processes tools, system, and then messages, in that order, up to and including the block designated with {"cache_control": {"type": "ephemeral"}} . See LangChain.ChatModels.ChatAnthropicTest and for an example.
- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain.

## Usage

The central module in this library is `LangChain.Chains.LLMChain`. Most other pieces are either inputs to this, or structures used by it. For understanding how to use the library, start there.
Expand Down
31 changes: 22 additions & 9 deletions lib/chat_models/chat_anthropic.ex
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
def for_api(%ChatAnthropic{} = anthropic, messages, tools) do
# separate the system message from the rest. Handled separately.
{system, messages} =
Utils.split_system_message(messages, "Anthropic only supports a single System message")
Utils.split_system_message(
messages,
"Anthropic only supports a single System message, however, you may use multiple ContentParts for the System message to indicate where prompt caching should be used."
)

system_text =
case system do
nil ->
nil

%Message{role: :system, content: [_ | _]} = message ->
for_api(message)

%Message{role: :system, content: content} ->
content
end
Expand Down Expand Up @@ -806,7 +812,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

@doc """
Convert a LangChain structure to the expected map of data for the OpenAI API.
Convert a LangChain structure to the expected map of data for the Anthropic API.
"""
@spec for_api(Message.t() | ContentPart.t() | Function.t()) ::
%{String.t() => any()} | no_return()
Expand Down Expand Up @@ -859,8 +865,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
}
end

def for_api(%Message{role: :system, content: content}) when is_list(content) do
Enum.map(content, &for_api(&1))
end

def for_api(%ContentPart{type: :text} = part) do
%{"type" => "text", "text" => part.content}
case Keyword.fetch(part.options || [], :cache_control) do
:error ->
%{"type" => "text", "text" => part.content}

{:ok, setting} ->
setting = if setting == true, do: %{"type" => "ephemeral"}, else: setting
%{"type" => "text", "text" => part.content, "cache_control" => setting}
end
end

def for_api(%ContentPart{type: :image} = part) do
Expand Down Expand Up @@ -1013,12 +1030,8 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

defp get_token_usage(%{"usage" => usage} = _response_body) do
# extract out the reported response token usage
#
# defp get_token_usage(%{"usage" => usage} = _response_body) do
# extract out the reported response token usage
#
# https://platform.openai.com/docs/api-reference/chat/object#chat/object-usage
# if prompt caching has been used the response will also contain
# "cache_creation_input_tokens" and "cache_read_input_tokens"
TokenUsage.new!(%{
input: Map.get(usage, "input_tokens"),
output: Map.get(usage, "output_tokens")
Expand Down
4 changes: 2 additions & 2 deletions lib/message.ex
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,15 @@ defmodule LangChain.Message do
changeset

{:ok, content} when is_list(content) ->
if role in [:user, :assistant] do
if role in [:user, :assistant, :system] do
# if a list, verify all elements are a ContentPart or PromptTemplate
if Enum.all?(content, &(match?(%ContentPart{}, &1) or match?(%PromptTemplate{}, &1))) do
changeset
else
add_error(changeset, :content, "must be text or a list of ContentParts")
end
else
# only a user message can have ContentParts
# only a user message can have ContentParts (except for ChatAnthropic system messages)
Logger.error(
"Invalid message content #{inspect(get_field(changeset, :content))} for role #{role}"
)
Expand Down
4 changes: 2 additions & 2 deletions lib/message/content_part.ex
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ defmodule LangChain.Message.ContentPart do
Create a new ContentPart that contains text. Raises an exception if not valid.
"""
@spec text!(String.t()) :: t() | no_return()
def text!(content) do
new!(%{type: :text, content: content})
def text!(content, opts \\ []) do
new!(%{type: :text, content: content, options: opts})
end

@doc """
Expand Down
35 changes: 35 additions & 0 deletions test/chat_models/chat_anthropic_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,41 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
assert "You are my helpful hero." == data[:system]
end

test "supports prompt caching in the system message" do
{:ok, anthropic} = ChatAnthropic.new()

# this example is from https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching.
data =
ChatAnthropic.for_api(
anthropic,
[
Message.new_system!([
ContentPart.text!(
"You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n"
),
ContentPart.text!("<the entire contents of Pride and Prejudice>",
cache_control: true
)
])
],
[]
)

assert data.system ==
[
%{
"text" =>
"You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n",
"type" => "text"
},
%{
"cache_control" => %{"type" => "ephemeral"},
"text" => "<the entire contents of Pride and Prejudice>",
"type" => "text"
}
]
end

test "generates a map for an API call with max_tokens set" do
{:ok, anthropic} =
ChatAnthropic.new(%{
Expand Down
6 changes: 1 addition & 5 deletions test/message_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,6 @@ defmodule LangChain.MessageTest do
{:ok, message} = Message.new_user("Hi")
assert message.content == "Hi"

# content parts not allowed for other role types
{:error, changeset} = Message.new_system([part])
assert {"is invalid for role system", _} = changeset.errors[:content]

{:error, changeset} =
Message.new(%{
role: :tool,
Expand Down Expand Up @@ -162,7 +158,7 @@ defmodule LangChain.MessageTest do
assert msg.role == :user

assert msg.content == [
%ContentPart{type: :text, content: "Describe what is in this image:"},
%ContentPart{type: :text, content: "Describe what is in this image:", options: []},
%ContentPart{type: :image, content: "ZmFrZV9pbWFnZV9kYXRh", options: []}
]
end
Expand Down
Loading