Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add bedrock claude 3 support #367

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,18 @@ We also just added experimental support for taking a video/screen recording of a

## 🛠 Getting Started

The app has a React/Vite frontend and a FastAPI backend.
### 使用AWS Bedrock Claude 3/3.5 sonnet注意事项
- 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/, 并配置aws iam 账号的ak sk,另外还需要开通该账号Bedrock Claude 3 访问的权限。
- 如果使用Bedrock Claude 3/3.5,则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中

Keys needed:
The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.

* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.

Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):

```bash
cd backend
# optional
echo "OPENAI_API_KEY=sk-your-key" > .env
poetry install
poetry shell
Expand Down
167 changes: 119 additions & 48 deletions backend/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,51 @@
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
from config import IS_DEBUG_ENABLED
from debug.DebugFileWriter import DebugFileWriter

# import boto3
from utils import pprint_prompt

import json
import os
import sys
import boto3
import base64
import requests
import time
import pprint
from datetime import datetime
from botocore.config import Config

#get modelARN
REGION = os.environ.get('region','us-east-1')
PROFILE = os.environ.get('profile', 'default')
session = boto3.Session(profile_name=PROFILE,region_name=REGION)
bedrock_runtime = session.client(
service_name="bedrock-runtime",
region_name=REGION
)

# Actual model versions that are passed to the LLMs and stored in our logs
class Llm(Enum):
GPT_4_VISION = "gpt-4-vision-preview"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"


BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0',
Llm.CLAUDE_3_SONNET: 'anthropic.claude-3-5-sonnet-20240620-v1:0',}

# Will throw errors if you send a garbage string
def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
if frontend_str == "gpt_4_vision":
return Llm.GPT_4_VISION
elif frontend_str == "claude_3_sonnet":
return Llm.CLAUDE_3_SONNET
elif frontend_str == "claude_3_5_sonnet":
return Llm.CLAUDE_3_5_SONNET
else:
return Llm(frontend_str)

Expand All @@ -38,7 +62,7 @@ async def stream_openai_response(
model: Llm,
) -> str:
client = AsyncOpenAI(api_key=api_key, base_url=base_url)

print(f"--stream_openai_response--{model}")
# Base parameters
params = {
"model": model.value,
Expand Down Expand Up @@ -75,16 +99,15 @@ async def stream_openai_response(
return full_response


# TODO: Have a seperate function that translates OpenAI messages to Claude messages
async def stream_claude_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
model: Llm,
) -> str:

client = AsyncAnthropic(api_key=api_key)

print(f"--stream_openai_response--{model}")
# client = AsyncAnthropic(api_key=api_key)
modelId = BEDROCK_LLM_MODELID_LIST[model]
# Base parameters
max_tokens = 4096
temperature = 0.0
Expand Down Expand Up @@ -112,39 +135,56 @@ async def stream_claude_response(
content["source"] = {
"type": "base64",
"media_type": media_type,
"data": base64_data,
"data": base64_data
}

payload = {
"modelId": modelId,
"contentType": "application/json",
"accept": "application/json",
"body": {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": max_tokens,
"messages": claude_messages,
"temperature":temperature,
"system":system_prompt,
}
}

# Convert the payload to bytes
body_bytes = json.dumps(payload['body']).encode('utf-8')

# Invoke the model
response = bedrock_runtime.invoke_model_with_response_stream(
body=body_bytes, modelId=payload['modelId'], accept=payload['accept'], contentType=payload['contentType']
)
stream = response.get('body')
chunk_obj = {}
# Stream Claude response
async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=claude_messages, # type: ignore
) as stream:
async for text in stream.text_stream:
await callback(text)

response_text = ''
if stream:
for event in stream:
chunk = event.get('chunk')
if chunk:
chunk_obj = json.loads(chunk.get('bytes').decode())
if chunk_obj['delta']['type'] == 'text_delta':
response_text += chunk_obj['delta']['text']
await callback(chunk_obj['delta']['text'])
# Return final message
response = await stream.get_final_message()

# Close the Anthropic client
await client.close()

return response.content[0].text

response = response_text
return response

async def stream_claude_response_native(
system_prompt: str,
messages: list[Any],
api_key: str,
callback: Callable[[str], Awaitable[None]],
include_thinking: bool = False,
model: Llm = Llm.CLAUDE_3_OPUS,
model: Llm = Llm.CLAUDE_3_5_SONNET,
) -> str:

client = AsyncAnthropic(api_key=api_key)
# client = AsyncAnthropic(api_key=api_key)
modelId = BEDROCK_LLM_MODELID_LIST[model]

# Base model parameters
max_tokens = 4096
Expand All @@ -171,22 +211,52 @@ async def stream_claude_response_native(
else messages
)

pprint_prompt(messages_to_send)

async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=messages_to_send, # type: ignore
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
full_stream += text
await callback(text)

response = await stream.get_final_message()
response_text = response.content[0].text
# pprint_prompt(messages_to_send)

payload = {
"modelId": modelId,
"contentType": "application/json",
"accept": "application/json",
"body": {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": max_tokens,
"messages": messages_to_send,
"temperature":temperature,
"system":system_prompt,
}
}
# Convert the payload to bytes
body_bytes = json.dumps(payload['body']).encode('utf-8')

# Invoke the model
response = bedrock_runtime.invoke_model_with_response_stream(
body=body_bytes, modelId=payload['modelId'], accept=payload['accept'], contentType=payload['contentType']
)
stream = response.get('body')
chunk_obj = {}
# Stream Claude response
response_text = ''
input_tokens= 0
output_tokens = 0
if stream:
for event in stream:
chunk = event.get('chunk')
if chunk:
chunk_obj = json.loads(chunk.get('bytes').decode())
# print(chunk_obj)
if chunk_obj['type'] == 'message_delta':
print(f"\nStop reason: {chunk_obj['delta']['stop_reason']}")
print(f"Stop sequence: {chunk_obj['delta']['stop_sequence']}")
print(f"Output tokens: {chunk_obj['usage']['output_tokens']}")
output_tokens = chunk_obj['usage']['output_tokens']

if chunk_obj['type'] == 'content_block_delta':
if chunk_obj['delta']['type'] == 'text_delta':
print(chunk_obj['delta']['text'], end="")
response_text += chunk_obj['delta']['text']
await callback(chunk_obj['delta']['text'])

print(response_text)

# Write each pass's code to .html file and thinking to .txt file
if IS_DEBUG_ENABLED:
Expand All @@ -201,24 +271,25 @@ async def stream_claude_response_native(

# Set up messages array for next pass
messages += [
{"role": "assistant", "content": str(prefix) + response.content[0].text},
{"role": "assistant", "content": str(prefix) + response_text},
{
"role": "user",
"content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
},
]

print(
f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
f"Token usage: Input Tokens: {input_tokens}, Output Tokens: {output_tokens}"
)
print(messages[-2:])

# Close the Anthropic client
await client.close()
# await client.close()

if IS_DEBUG_ENABLED:
debug_file_writer.write_to_file("full_stream.txt", full_stream)

if not response:
raise Exception("No HTML response found in AI response")
else:
return response.content[0].text
return response_text
Loading