abi · xiehust · May 21, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
diff --git a/README.md b/README.md
@@ -42,17 +42,18 @@ We also just added experimental support for taking a video/screen recording of a
 
 ## 🛠 Getting Started
 
-The app has a React/Vite frontend and a FastAPI backend. 
+### 使用AWS Bedrock Claude 3/3.5 sonnet注意事项
+- 如果使用Bedrock Claude 3/3.5需要在运行机器上安装 https://aws.amazon.com/cn/cli/， 并配置aws iam 账号的ak sk，另外还需要开通该账号Bedrock Claude 3 访问的权限。
+- 如果使用Bedrock Claude 3/3.5，则无须配置OPENAI_API_KEY 或者 ANTHROPIC_API_KEY 到.env中
 
-Keys needed:
+The app has a React/Vite frontend and a FastAPI backend. You will need an OpenAI API key with access to the GPT-4 Vision API or an Anthropic key if you want to use Claude Sonnet, or for experimental video support.
 
-* [OpenAI API key with access to GPT-4](https://github.com/abi/screenshot-to-code/blob/main/Troubleshooting.md)
-* Anthropic key (optional) - only if you want to use Claude Sonnet, or for experimental video support.
 
 Run the backend (I use Poetry for package management - `pip install poetry` if you don't have it):
 
 ```bash
 cd backend
+# optional
 echo "OPENAI_API_KEY=sk-your-key" > .env
 poetry install
 poetry shell

diff --git a/backend/llm.py b/backend/llm.py
@@ -5,27 +5,51 @@
 from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
 from config import IS_DEBUG_ENABLED
 from debug.DebugFileWriter import DebugFileWriter
-
+# import boto3
 from utils import pprint_prompt
-
+import json
+import os
+import sys
+import boto3
+import base64
+import requests
+import time
+import pprint
+from datetime import datetime
+from botocore.config import Config
+
+#get modelARN
+REGION = os.environ.get('region','us-east-1') 
+PROFILE = os.environ.get('profile', 'default')
+session = boto3.Session(profile_name=PROFILE,region_name=REGION)
+bedrock_runtime = session.client(
+    service_name="bedrock-runtime",
+    region_name=REGION
+)
 
 # Actual model versions that are passed to the LLMs and stored in our logs
 class Llm(Enum):
     GPT_4_VISION = "gpt-4-vision-preview"
     GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
     GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
     CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
+    CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20240620"
     CLAUDE_3_OPUS = "claude-3-opus-20240229"
     CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
     CLAUDE_3_5_SONNET_2024_06_20 = "claude-3-5-sonnet-20240620"
 
 
+BEDROCK_LLM_MODELID_LIST = {Llm.CLAUDE_3_5_SONNET: 'anthropic.claude-3-sonnet-20240229-v1:0',
+                            Llm.CLAUDE_3_SONNET: 'anthropic.claude-3-5-sonnet-20240620-v1:0',}
+
 # Will throw errors if you send a garbage string
 def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
     if frontend_str == "gpt_4_vision":
         return Llm.GPT_4_VISION
     elif frontend_str == "claude_3_sonnet":
         return Llm.CLAUDE_3_SONNET
+    elif frontend_str == "claude_3_5_sonnet":
+        return Llm.CLAUDE_3_5_SONNET
     else:
         return Llm(frontend_str)
 
@@ -38,7 +62,7 @@ async def stream_openai_response(
     model: Llm,
 ) -> str:
     client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-
+    print(f"--stream_openai_response--{model}")
     # Base parameters
     params = {
         "model": model.value,
@@ -75,16 +99,15 @@ async def stream_openai_response(
     return full_response
 
 
-# TODO: Have a seperate function that translates OpenAI messages to Claude messages
 async def stream_claude_response(
     messages: List[ChatCompletionMessageParam],
     api_key: str,
     callback: Callable[[str], Awaitable[None]],
     model: Llm,
 ) -> str:
-
-    client = AsyncAnthropic(api_key=api_key)
-
+    print(f"--stream_openai_response--{model}")
+    # client = AsyncAnthropic(api_key=api_key)
+    modelId = BEDROCK_LLM_MODELID_LIST[model]
     # Base parameters
     max_tokens = 4096
     temperature = 0.0
@@ -112,39 +135,56 @@ async def stream_claude_response(
                 content["source"] = {
                     "type": "base64",
                     "media_type": media_type,
-                    "data": base64_data,
+                    "data": base64_data
                 }
-
+    payload = {
+        "modelId": modelId,
+        "contentType": "application/json",
+        "accept": "application/json",
+        "body": {
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": max_tokens,
+            "messages": claude_messages,
+            "temperature":temperature,
+            "system":system_prompt,
+        }
+    }
+
+    # Convert the payload to bytes
+    body_bytes = json.dumps(payload['body']).encode('utf-8')
+
+    # Invoke the model
+    response = bedrock_runtime.invoke_model_with_response_stream(
+        body=body_bytes, modelId=payload['modelId'], accept=payload['accept'], contentType=payload['contentType']
+    )
+    stream = response.get('body')
+    chunk_obj = {}
     # Stream Claude response
-    async with client.messages.stream(
-        model=model.value,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        system=system_prompt,
-        messages=claude_messages,  # type: ignore
-    ) as stream:
-        async for text in stream.text_stream:
-            await callback(text)
-
+    response_text = ''
+    if stream:
+        for event in stream:
+            chunk = event.get('chunk')
+            if chunk:
+                chunk_obj = json.loads(chunk.get('bytes').decode())
+                if chunk_obj['delta']['type'] == 'text_delta':
+                    response_text += chunk_obj['delta']['text']
+                    await callback(chunk_obj['delta']['text'])
+                
     # Return final message
-    response = await stream.get_final_message()
-
-    # Close the Anthropic client
-    await client.close()
-
-    return response.content[0].text
-
+    response = response_text
+    return response
 
 async def stream_claude_response_native(
     system_prompt: str,
     messages: list[Any],
     api_key: str,
     callback: Callable[[str], Awaitable[None]],
     include_thinking: bool = False,
-    model: Llm = Llm.CLAUDE_3_OPUS,
+    model: Llm = Llm.CLAUDE_3_5_SONNET,
 ) -> str:
 
-    client = AsyncAnthropic(api_key=api_key)
+    # client = AsyncAnthropic(api_key=api_key)
+    modelId = BEDROCK_LLM_MODELID_LIST[model]
 
     # Base model parameters
     max_tokens = 4096
@@ -171,22 +211,52 @@ async def stream_claude_response_native(
             else messages
         )
 
-        pprint_prompt(messages_to_send)
-
-        async with client.messages.stream(
-            model=model.value,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            system=system_prompt,
-            messages=messages_to_send,  # type: ignore
-        ) as stream:
-            async for text in stream.text_stream:
-                print(text, end="", flush=True)
-                full_stream += text
-                await callback(text)
-
-        response = await stream.get_final_message()
-        response_text = response.content[0].text
+        # pprint_prompt(messages_to_send)
+
+        payload = {
+        "modelId": modelId,
+        "contentType": "application/json",
+        "accept": "application/json",
+        "body": {
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": max_tokens,
+            "messages": messages_to_send,
+            "temperature":temperature,
+            "system":system_prompt,
+            }
+        }
+        # Convert the payload to bytes
+        body_bytes = json.dumps(payload['body']).encode('utf-8')
+
+        # Invoke the model
+        response = bedrock_runtime.invoke_model_with_response_stream(
+            body=body_bytes, modelId=payload['modelId'], accept=payload['accept'], contentType=payload['contentType']
+        )
+        stream = response.get('body')
+        chunk_obj = {}
+        # Stream Claude response
+        response_text = ''
+        input_tokens= 0 
+        output_tokens = 0
+        if stream:
+            for event in stream:
+                chunk = event.get('chunk')
+                if chunk:
+                    chunk_obj = json.loads(chunk.get('bytes').decode())
+                    # print(chunk_obj)
+                    if chunk_obj['type'] == 'message_delta':
+                        print(f"\nStop reason: {chunk_obj['delta']['stop_reason']}")
+                        print(f"Stop sequence: {chunk_obj['delta']['stop_sequence']}")
+                        print(f"Output tokens: {chunk_obj['usage']['output_tokens']}")
+                        output_tokens = chunk_obj['usage']['output_tokens']
+
+                    if chunk_obj['type'] == 'content_block_delta':
+                        if chunk_obj['delta']['type'] == 'text_delta':
+                            print(chunk_obj['delta']['text'], end="")
+                            response_text += chunk_obj['delta']['text']
+                            await callback(chunk_obj['delta']['text'])
+
+        print(response_text)
 
         # Write each pass's code to .html file and thinking to .txt file
         if IS_DEBUG_ENABLED:
@@ -201,24 +271,25 @@ async def stream_claude_response_native(
 
         # Set up messages array for next pass
         messages += [
-            {"role": "assistant", "content": str(prefix) + response.content[0].text},
+            {"role": "assistant", "content": str(prefix) + response_text},
             {
                 "role": "user",
                 "content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
             },
         ]
 
         print(
-            f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
+            f"Token usage: Input Tokens: {input_tokens}, Output Tokens: {output_tokens}"
         )
+        print(messages[-2:])
 
     # Close the Anthropic client
-    await client.close()
+    # await client.close()
 
     if IS_DEBUG_ENABLED:
         debug_file_writer.write_to_file("full_stream.txt", full_stream)
 
     if not response:
         raise Exception("No HTML response found in AI response")
     else:
-        return response.content[0].text
+        return response_text