chore: use llama3 instead of claude for bedrock integ-tests (#1160)

Claude has a much lower limit (5 req/min vs 250 req/min) in our Bedrock setup for some bizarre reason. Alternative option is to use client-side rate limiting, but that involves wrapping every `b.TestAws` call: https://gist.github.com/sxlijin/0c8b67049803c3682b3dfcb6b4155ac4
BoundaryML · Nov 12, 2024 · 603d9f7 · 603d9f7
1 parent a578cc2
commit 603d9f7
Show file tree

Hide file tree

Showing 7 changed files with 31 additions and 30 deletions.
diff --git a/integ-tests/baml_src/clients.baml b/integ-tests/baml_src/clients.baml
@@ -106,9 +106,9 @@ client<llm> AwsBedrock {
     inference_configuration {
       max_tokens 100
     }
-    model "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    // model "anthropic.claude-3-5-sonnet-20240620-v1:0"
     // model_id "anthropic.claude-3-haiku-20240307-v1:0"
-    // model_id "meta.llama3-8b-instruct-v1:0"
+    model_id "meta.llama3-8b-instruct-v1:0"
     // model_id "mistral.mistral-7b-instruct-v0:2"
     api_key ""
   }
@@ -121,9 +121,9 @@ client<llm> AwsBedrockInvalidRegion {
     inference_configuration {
       max_tokens 100
     }
-    model "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    // model "anthropic.claude-3-5-sonnet-20240620-v1:0"
     // model_id "anthropic.claude-3-haiku-20240307-v1:0"
-    // model_id "meta.llama3-8b-instruct-v1:0"
+    model_id "meta.llama3-8b-instruct-v1:0"
     // model_id "mistral.mistral-7b-instruct-v0:2"
     api_key ""
   }

diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
@@ -16,7 +16,7 @@
 
 file_map = {
 
-    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}  \n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n} \n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n} \n\nclient<llm> GPT35 {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama2\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-1.5-pro-001\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n\n    }\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai  \n  options {\n    model gemini-1.5-pro\n    project_id sam-project-vertex-1\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS\n\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 100\n    }\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    // model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n    api_key \"\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    // model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n    api_key \"\"\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n} \n \nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      GPT35\n      Claude\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n",
+    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}  \n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n} \n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n} \n\nclient<llm> GPT35 {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama2\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-1.5-pro-001\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n\n    }\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai  \n  options {\n    model gemini-1.5-pro\n    project_id sam-project-vertex-1\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS\n\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n    api_key \"\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n    api_key \"\"\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n} \n \nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      GPT35\n      Claude\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n",
     "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n\nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
     "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
     "fiddle-examples/chat-roles.baml": "// This will be available as an enum in your Python and Typescript code.\nenum Category2 {\n    Refund\n    CancelOrder\n    TechnicalSupport\n    AccountIssue\n    Question\n}\n\nfunction ClassifyMessage2(input: string) -> Category {\n  client GPT4\n\n  prompt #\"\n    {{ _.role(\"system\") }}\n    // You can use _.role(\"system\") to indicate that this text should be a system message\n\n    Classify the following INPUT into ONE\n    of the following categories:\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\") }}\n    // And _.role(\"user\") to indicate that this text should be a user message\n\n    INPUT: {{ input }}\n\n    Response:\n  \"#\n}",

diff --git a/integ-tests/python/tests/test_functions.py b/integ-tests/python/tests/test_functions.py
@@ -748,7 +748,7 @@ async def test_dynamic_class_output():
         baml_options={"tb": tb},
     )
     print(output.model_dump_json())
-    assert output.hair_color == "black" # type: ignore (dynamic property)
+    assert output.hair_color == "black"  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio
@@ -834,7 +834,7 @@ async def test_stream_dynamic_class_output():
     print("final ", final)
     print("final ", final.model_dump())
     print("final ", final.model_dump_json())
-    assert final.hair_color == "black" # type: ignore (dynamic property)
+    assert final.hair_color == "black"  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio
@@ -868,12 +868,12 @@ async def test_dynamic_inputs_list2():
         ],
         {"tb": tb},
     )
-    assert res[0].new_key == "hi1" # type: ignore (dynamic property)
+    assert res[0].new_key == "hi1"  # type: ignore (dynamic property)
     assert res[0].testKey == "myTest"
-    assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
-    assert res[1].new_key == "hi" # type: ignore (dynamic property)
+    assert res[0].blah["nestedKey1"] == "nestedVal"  # type: ignore (dynamic property)
+    assert res[1].new_key == "hi"  # type: ignore (dynamic property)
     assert res[1].testKey == "myTest"
-    assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
+    assert res[1].blah["nestedKey1"] == "nestedVal"  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio
@@ -954,12 +954,12 @@ async def test_dynamic_inputs_list():
         ],
         {"tb": tb},
     )
-    assert res[0].new_key == "hi" # type: ignore (dynamic property)
+    assert res[0].new_key == "hi"  # type: ignore (dynamic property)
     assert res[0].testKey == "myTest"
-    assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
-    assert res[1].new_key == "hi" # type: ignore (dynamic property)
+    assert res[0].blah["nestedKey1"] == "nestedVal"  # type: ignore (dynamic property)
+    assert res[1].new_key == "hi"  # type: ignore (dynamic property)
     assert res[1].testKey == "myTest"
-    assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
+    assert res[1].blah["nestedKey1"] == "nestedVal"  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio
@@ -981,9 +981,9 @@ async def test_dynamic_output_map():
     print("final ", res)
     print("final ", res.model_dump())
     print("final ", res.model_dump_json())
-    assert res.hair_color == "black" # type: ignore (dynamic property)
-    assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
-    assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
+    assert res.hair_color == "black"  # type: ignore (dynamic property)
+    assert res.attributes["eye_color"] == "blue"  # type: ignore (dynamic property)
+    assert res.attributes["facial_hair"] == "beard"  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio
@@ -1015,10 +1015,10 @@ async def test_dynamic_output_union():
     print("final ", res)
     print("final ", res.model_dump())
     print("final ", res.model_dump_json())
-    assert res.hair_color == "black" # type: ignore (dynamic property)
-    assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
-    assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
-    assert res.height["feet"] == 6 # type: ignore (dynamic property)
+    assert res.hair_color == "black"  # type: ignore (dynamic property)
+    assert res.attributes["eye_color"] == "blue"  # type: ignore (dynamic property)
+    assert res.attributes["facial_hair"] == "beard"  # type: ignore (dynamic property)
+    assert res.height["feet"] == 6  # type: ignore (dynamic property)
 
     res = await b.MyFunc(
         input="My name is Harrison. My hair is black and I'm 1.8 meters tall. I have blue eyes and a beard. I am 30 years old.",
@@ -1028,10 +1028,10 @@ async def test_dynamic_output_union():
     print("final ", res)
     print("final ", res.model_dump())
     print("final ", res.model_dump_json())
-    assert res.hair_color == "black" # type: ignore (dynamic property)
-    assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
-    assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
-    assert res.height["meters"] == 1.8 # type: ignore (dynamic property)
+    assert res.hair_color == "black"  # type: ignore (dynamic property)
+    assert res.attributes["eye_color"] == "blue"  # type: ignore (dynamic property)
+    assert res.attributes["facial_hair"] == "beard"  # type: ignore (dynamic property)
+    assert res.height["meters"] == 1.8  # type: ignore (dynamic property)
 
 
 @pytest.mark.asyncio