Skip to content

Commit

Permalink
chore: use llama3 instead of claude for bedrock integ-tests (#1160)
Browse files Browse the repository at this point in the history
Claude has a much lower limit (5 req/min vs 250 req/min) in our Bedrock
setup for some bizarre reason.

Alternative option is to use client-side rate limiting, but that
involves wrapping every `b.TestAws` call:
https://gist.github.com/sxlijin/0c8b67049803c3682b3dfcb6b4155ac4
  • Loading branch information
sxlijin authored Nov 12, 2024
1 parent a578cc2 commit 603d9f7
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 30 deletions.
8 changes: 4 additions & 4 deletions integ-tests/baml_src/clients.baml
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ client<llm> AwsBedrock {
inference_configuration {
max_tokens 100
}
model "anthropic.claude-3-5-sonnet-20240620-v1:0"
// model "anthropic.claude-3-5-sonnet-20240620-v1:0"
// model_id "anthropic.claude-3-haiku-20240307-v1:0"
// model_id "meta.llama3-8b-instruct-v1:0"
model_id "meta.llama3-8b-instruct-v1:0"
// model_id "mistral.mistral-7b-instruct-v0:2"
api_key ""
}
Expand All @@ -121,9 +121,9 @@ client<llm> AwsBedrockInvalidRegion {
inference_configuration {
max_tokens 100
}
model "anthropic.claude-3-5-sonnet-20240620-v1:0"
// model "anthropic.claude-3-5-sonnet-20240620-v1:0"
// model_id "anthropic.claude-3-haiku-20240307-v1:0"
// model_id "meta.llama3-8b-instruct-v1:0"
model_id "meta.llama3-8b-instruct-v1:0"
// model_id "mistral.mistral-7b-instruct-v0:2"
api_key ""
}
Expand Down
2 changes: 1 addition & 1 deletion integ-tests/python/baml_client/inlinedbaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

file_map = {

"clients.baml": "retry_policy Bar {\n max_retries 3\n strategy {\n type exponential_backoff\n }\n}\n\nretry_policy Foo {\n max_retries 3\n strategy {\n type constant_delay\n delay_ms 100\n }\n}\n\nclient<llm> GPT4 {\n provider openai\n options {\n model gpt-4o\n api_key env.OPENAI_API_KEY\n }\n} \n\n\nclient<llm> GPT4o {\n provider openai\n options {\n model gpt-4o\n api_key env.OPENAI_API_KEY\n }\n} \n\n\nclient<llm> GPT4Turbo {\n retry_policy Bar\n provider openai\n options {\n model gpt-4-turbo\n api_key env.OPENAI_API_KEY\n }\n} \n\nclient<llm> GPT35 {\n provider openai\n options {\n model \"gpt-3.5-turbo\"\n api_key env.OPENAI_API_KEY\n }\n}\n\nclient<llm> GPT35LegacyProvider {\n provider openai\n options {\n model \"gpt-3.5-turbo\"\n api_key env.OPENAI_API_KEY\n }\n}\n\n\nclient<llm> Ollama {\n provider ollama\n options {\n model llama2\n }\n}\n\nclient<llm> GPT35Azure {\n provider azure-openai\n options {\n resource_name \"west-us-azure-baml\"\n deployment_id \"gpt-35-turbo-default\"\n // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n api_version \"2024-02-01\"\n api_key env.AZURE_OPENAI_API_KEY\n }\n}\n\nclient<llm> Gemini {\n provider google-ai\n options {\n model gemini-1.5-pro-001\n api_key env.GOOGLE_API_KEY\n safetySettings {\n category HARM_CATEGORY_HATE_SPEECH\n threshold BLOCK_LOW_AND_ABOVE\n\n }\n }\n}\n\nclient<llm> Vertex {\n provider vertex-ai \n options {\n model gemini-1.5-pro\n project_id sam-project-vertex-1\n location us-central1\n credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS\n\n }\n}\n\n\nclient<llm> AwsBedrock {\n provider aws-bedrock\n options {\n inference_configuration {\n max_tokens 100\n }\n model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n // model_id \"meta.llama3-8b-instruct-v1:0\"\n // model_id \"mistral.mistral-7b-instruct-v0:2\"\n api_key \"\"\n }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n provider aws-bedrock\n options {\n region \"us-invalid-7\"\n inference_configuration {\n max_tokens 100\n }\n model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n // model_id \"meta.llama3-8b-instruct-v1:0\"\n // model_id \"mistral.mistral-7b-instruct-v0:2\"\n api_key \"\"\n }\n}\n\nclient<llm> Sonnet {\n provider anthropic\n options {\n model claude-3-5-sonnet-20241022\n api_key env.ANTHROPIC_API_KEY\n }\n}\n\nclient<llm> Claude {\n provider anthropic\n options {\n model claude-3-haiku-20240307\n api_key env.ANTHROPIC_API_KEY\n max_tokens 1000\n }\n}\n\nclient<llm> ClaudeWithCaching {\n provider anthropic\n options {\n model claude-3-haiku-20240307\n api_key env.ANTHROPIC_API_KEY\n max_tokens 1000\n allowed_role_metadata [\"cache_control\"]\n headers {\n \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n }\n }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n retry_policy Foo\n provider baml-fallback\n options {\n strategy [\n GPT4Turbo\n GPT35\n Lottery_SimpleSyntax\n ]\n }\n} \n \nclient<llm> Lottery_SimpleSyntax {\n provider baml-round-robin\n options {\n start 0\n strategy [\n GPT35\n Claude\n ]\n }\n}\n\nclient<llm> TogetherAi {\n provider \"openai-generic\"\n options {\n base_url \"https://api.together.ai/v1\"\n api_key env.TOGETHER_API_KEY\n model \"meta-llama/Llama-3-70b-chat-hf\"\n }\n}\n\n",
"clients.baml": "retry_policy Bar {\n max_retries 3\n strategy {\n type exponential_backoff\n }\n}\n\nretry_policy Foo {\n max_retries 3\n strategy {\n type constant_delay\n delay_ms 100\n }\n}\n\nclient<llm> GPT4 {\n provider openai\n options {\n model gpt-4o\n api_key env.OPENAI_API_KEY\n }\n} \n\n\nclient<llm> GPT4o {\n provider openai\n options {\n model gpt-4o\n api_key env.OPENAI_API_KEY\n }\n} \n\n\nclient<llm> GPT4Turbo {\n retry_policy Bar\n provider openai\n options {\n model gpt-4-turbo\n api_key env.OPENAI_API_KEY\n }\n} \n\nclient<llm> GPT35 {\n provider openai\n options {\n model \"gpt-3.5-turbo\"\n api_key env.OPENAI_API_KEY\n }\n}\n\nclient<llm> GPT35LegacyProvider {\n provider openai\n options {\n model \"gpt-3.5-turbo\"\n api_key env.OPENAI_API_KEY\n }\n}\n\n\nclient<llm> Ollama {\n provider ollama\n options {\n model llama2\n }\n}\n\nclient<llm> GPT35Azure {\n provider azure-openai\n options {\n resource_name \"west-us-azure-baml\"\n deployment_id \"gpt-35-turbo-default\"\n // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n api_version \"2024-02-01\"\n api_key env.AZURE_OPENAI_API_KEY\n }\n}\n\nclient<llm> Gemini {\n provider google-ai\n options {\n model gemini-1.5-pro-001\n api_key env.GOOGLE_API_KEY\n safetySettings {\n category HARM_CATEGORY_HATE_SPEECH\n threshold BLOCK_LOW_AND_ABOVE\n\n }\n }\n}\n\nclient<llm> Vertex {\n provider vertex-ai \n options {\n model gemini-1.5-pro\n project_id sam-project-vertex-1\n location us-central1\n credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS\n\n }\n}\n\n\nclient<llm> AwsBedrock {\n provider aws-bedrock\n options {\n inference_configuration {\n max_tokens 100\n }\n // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n model_id \"meta.llama3-8b-instruct-v1:0\"\n // model_id \"mistral.mistral-7b-instruct-v0:2\"\n api_key \"\"\n }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n provider aws-bedrock\n options {\n region \"us-invalid-7\"\n inference_configuration {\n max_tokens 100\n }\n // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n model_id \"meta.llama3-8b-instruct-v1:0\"\n // model_id \"mistral.mistral-7b-instruct-v0:2\"\n api_key \"\"\n }\n}\n\nclient<llm> Sonnet {\n provider anthropic\n options {\n model claude-3-5-sonnet-20241022\n api_key env.ANTHROPIC_API_KEY\n }\n}\n\nclient<llm> Claude {\n provider anthropic\n options {\n model claude-3-haiku-20240307\n api_key env.ANTHROPIC_API_KEY\n max_tokens 1000\n }\n}\n\nclient<llm> ClaudeWithCaching {\n provider anthropic\n options {\n model claude-3-haiku-20240307\n api_key env.ANTHROPIC_API_KEY\n max_tokens 1000\n allowed_role_metadata [\"cache_control\"]\n headers {\n \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n }\n }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n retry_policy Foo\n provider baml-fallback\n options {\n strategy [\n GPT4Turbo\n GPT35\n Lottery_SimpleSyntax\n ]\n }\n} \n \nclient<llm> Lottery_SimpleSyntax {\n provider baml-round-robin\n options {\n start 0\n strategy [\n GPT35\n Claude\n ]\n }\n}\n\nclient<llm> TogetherAi {\n provider \"openai-generic\"\n options {\n base_url \"https://api.together.ai/v1\"\n api_key env.TOGETHER_API_KEY\n model \"meta-llama/Llama-3-70b-chat-hf\"\n }\n}\n\n",
"custom-task.baml": "class BookOrder {\n orderId string @description(#\"\n The ID of the book order\n \"#)\n title string @description(#\"\n The title of the ordered book\n \"#)\n quantity int @description(#\"\n The quantity of books ordered\n \"#)\n price float @description(#\"\n The price of the book\n \"#)\n}\n\nclass FlightConfirmation {\n confirmationNumber string @description(#\"\n The flight confirmation number\n \"#)\n flightNumber string @description(#\"\n The flight number\n \"#)\n departureTime string @description(#\"\n The scheduled departure time of the flight\n \"#)\n arrivalTime string @description(#\"\n The scheduled arrival time of the flight\n \"#)\n seatNumber string @description(#\"\n The seat number assigned on the flight\n \"#)\n}\n\nclass GroceryReceipt {\n receiptId string @description(#\"\n The ID of the grocery receipt\n \"#)\n storeName string @description(#\"\n The name of the grocery store\n \"#)\n items (string | int | float)[] @description(#\"\n A list of items purchased. Each item consists of a name, quantity, and price.\n \"#)\n totalAmount float @description(#\"\n The total amount spent on groceries\n \"#)\n}\n\nclass CustomTaskResult {\n bookOrder BookOrder | null\n flightConfirmation FlightConfirmation | null\n groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n client \"openai/gpt-4o-mini\"\n prompt #\"\n Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n {{ ctx.output_format }}\n\n Input:\n \n {{ input}}\n \"#\n}\n\ntest CustomTask {\n functions [CustomTask]\n args {\n input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n \"#\n }\n}",
"fiddle-examples/chain-of-thought.baml": "class Email {\n subject string\n body string\n from_address string\n}\n\nenum OrderStatus {\n ORDERED\n SHIPPED\n DELIVERED\n CANCELLED\n}\n\nclass OrderInfo {\n order_status OrderStatus\n tracking_number string?\n estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n client GPT4\n prompt #\"\n Given the email below:\n\n ```\n from: {{email.from_address}}\n Email Subject: {{email.subject}}\n Email Body: {{email.body}}\n ```\n\n Extract this info from the email in JSON format:\n {{ ctx.output_format }}\n\n Before you output the JSON, please explain your\n reasoning step-by-step. Here is an example on how to do this:\n 'If we think step by step we can see that ...\n therefore the output JSON is:\n {\n ... the json schema ...\n }'\n \"#\n}",
"fiddle-examples/chat-roles.baml": "// This will be available as an enum in your Python and Typescript code.\nenum Category2 {\n Refund\n CancelOrder\n TechnicalSupport\n AccountIssue\n Question\n}\n\nfunction ClassifyMessage2(input: string) -> Category {\n client GPT4\n\n prompt #\"\n {{ _.role(\"system\") }}\n // You can use _.role(\"system\") to indicate that this text should be a system message\n\n Classify the following INPUT into ONE\n of the following categories:\n\n {{ ctx.output_format }}\n\n {{ _.role(\"user\") }}\n // And _.role(\"user\") to indicate that this text should be a user message\n\n INPUT: {{ input }}\n\n Response:\n \"#\n}",
Expand Down
42 changes: 21 additions & 21 deletions integ-tests/python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ async def test_dynamic_class_output():
baml_options={"tb": tb},
)
print(output.model_dump_json())
assert output.hair_color == "black" # type: ignore (dynamic property)
assert output.hair_color == "black" # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand Down Expand Up @@ -834,7 +834,7 @@ async def test_stream_dynamic_class_output():
print("final ", final)
print("final ", final.model_dump())
print("final ", final.model_dump_json())
assert final.hair_color == "black" # type: ignore (dynamic property)
assert final.hair_color == "black" # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand Down Expand Up @@ -868,12 +868,12 @@ async def test_dynamic_inputs_list2():
],
{"tb": tb},
)
assert res[0].new_key == "hi1" # type: ignore (dynamic property)
assert res[0].new_key == "hi1" # type: ignore (dynamic property)
assert res[0].testKey == "myTest"
assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].new_key == "hi" # type: ignore (dynamic property)
assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].new_key == "hi" # type: ignore (dynamic property)
assert res[1].testKey == "myTest"
assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand Down Expand Up @@ -954,12 +954,12 @@ async def test_dynamic_inputs_list():
],
{"tb": tb},
)
assert res[0].new_key == "hi" # type: ignore (dynamic property)
assert res[0].new_key == "hi" # type: ignore (dynamic property)
assert res[0].testKey == "myTest"
assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].new_key == "hi" # type: ignore (dynamic property)
assert res[0].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].new_key == "hi" # type: ignore (dynamic property)
assert res[1].testKey == "myTest"
assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)
assert res[1].blah["nestedKey1"] == "nestedVal" # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand All @@ -981,9 +981,9 @@ async def test_dynamic_output_map():
print("final ", res)
print("final ", res.model_dump())
print("final ", res.model_dump_json())
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand Down Expand Up @@ -1015,10 +1015,10 @@ async def test_dynamic_output_union():
print("final ", res)
print("final ", res.model_dump())
print("final ", res.model_dump_json())
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
assert res.height["feet"] == 6 # type: ignore (dynamic property)
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
assert res.height["feet"] == 6 # type: ignore (dynamic property)

res = await b.MyFunc(
input="My name is Harrison. My hair is black and I'm 1.8 meters tall. I have blue eyes and a beard. I am 30 years old.",
Expand All @@ -1028,10 +1028,10 @@ async def test_dynamic_output_union():
print("final ", res)
print("final ", res.model_dump())
print("final ", res.model_dump_json())
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
assert res.height["meters"] == 1.8 # type: ignore (dynamic property)
assert res.hair_color == "black" # type: ignore (dynamic property)
assert res.attributes["eye_color"] == "blue" # type: ignore (dynamic property)
assert res.attributes["facial_hair"] == "beard" # type: ignore (dynamic property)
assert res.height["meters"] == 1.8 # type: ignore (dynamic property)


@pytest.mark.asyncio
Expand Down
Loading

0 comments on commit 603d9f7

Please sign in to comment.