diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 31c8f3d..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.github/workflows/cicd-demo.yml b/.github/workflows/cicd-demo.yml index 0cff299..e8317a9 100644 --- a/.github/workflows/cicd-demo.yml +++ b/.github/workflows/cicd-demo.yml @@ -44,7 +44,7 @@ jobs: BEDROCK_AGENT_ALIAS_ID: ${{ vars.BEDROCK_AGENT_ALIAS_ID }} BEDROCK_AGENT_ID: ${{ vars.BEDROCK_AGENT_ID }} run: | - sed -e "s/BEDROCK_AGENT_ALIAS_ID/$BEDROCK_AGENT_ALIAS_ID/g" -e "s/BEDROCK_AGENT_ID/$BEDROCK_AGENT_ID/g" sample-test-plans/bedrock-agent-target/template.yml > agenteval.yml + sed -e "s/BEDROCK_AGENT_ALIAS_ID/$BEDROCK_AGENT_ALIAS_ID/g" -e "s/BEDROCK_AGENT_ID/$BEDROCK_AGENT_ID/g" samples/test_plan_templates/bedrock_agent_target/template.yml > agenteval.yml agenteval run - name: Test Summary diff --git a/demo/.DS_Store b/demo/.DS_Store deleted file mode 100644 index fc99d1f..0000000 Binary files a/demo/.DS_Store and /dev/null differ diff --git a/demo/requirements.txt b/demo/requirements.txt deleted file mode 100644 index 1d45d31..0000000 --- a/demo/requirements.txt +++ /dev/null @@ -1,81 +0,0 @@ -agent-evaluation==0.2.0 -altair==5.3.0 -annotated-types==0.6.0 -attrs==23.2.0 -beautifulsoup4==4.12.3 -blinker==1.8.2 -boto3==1.34.104 -botocore==1.34.104 -cachetools==5.3.3 -certifi==2024.7.4 -charset-normalizer==3.3.2 -click==8.1.7 -contourpy==1.2.1 -cycler==0.12.1 -entrypoints==0.4 -Faker==25.2.0 -favicon==0.7.0 -fonttools==4.51.0 -gitdb==4.0.11 -GitPython==3.1.43 -htbuilder==0.6.2 -idna==3.7 -Jinja2==3.1.4 -jmespath==1.0.1 -jsonpath-ng==1.6.1 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -kiwisolver==1.4.5 -lxml==5.2.2 -Markdown==3.6 -markdown-it-py==3.0.0 -markdownlit==0.0.7 -MarkupSafe==2.1.5 -matplotlib==3.8.4 -mdurl==0.1.2 -more-itertools==10.2.0 -numpy==1.26.4 -packaging==24.0 -pandas==2.2.2 -pillow==10.3.0 -ply==3.11 -prometheus_client==0.20.0 -protobuf==4.25.3 -pyarrow==16.1.0 -pydantic==2.7.1 -pydantic_core==2.18.2 -pydeck==0.9.1 -Pygments==2.18.0 -pymdown-extensions==10.8.1 -pyparsing==3.1.2 -python-dateutil==2.9.0.post0 -pytz==2024.1 -PyYAML==6.0.1 -referencing==0.35.1 -requests==2.32.0 -rich==13.7.1 -rpds-py==0.18.1 -s3transfer==0.10.1 -six==1.16.0 -smmap==5.0.1 -soupsieve==2.5 -SQLAlchemy==2.0.30 -st-annotated-text==4.0.1 -streamlit==1.37.0 -streamlit-camera-input-live==0.2.0 -streamlit-card==1.0.2 -streamlit-embedcode==0.1.2 -streamlit-extras==0.4.2 -streamlit-faker==0.0.3 -streamlit-image-coordinates==0.1.6 -streamlit-keyup==0.2.4 -streamlit-toggle-switch==1.0.2 -streamlit-vertical-slider==2.5.5 -tenacity==8.3.0 -toml==0.10.2 -toolz==0.12.1 -tornado==6.4.1 -typing_extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.2 -validators==0.28.1 diff --git a/requirements-dev.txt b/requirements-dev.txt index 5932078..7eed496 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,4 +9,4 @@ mkdocs-material mkdocstrings[python] mkdocs-click bandit -pip-audit \ No newline at end of file +pip-audit diff --git a/requirements.txt b/requirements.txt index c368cd1..64b513d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,3 @@ pydantic>=2.1.0,<3.0 rich>=13.7.0,<14.0 jinja2>=3.1.3,<4.0 jsonpath-ng>=1.6.1,<2.0 -pathlib -aws-cdk-lib==2.155.0 -constructs>=10.0.0,<11.0.0 \ No newline at end of file diff --git a/stepfunctions/.gitignore b/samples/aws_step_functions_deployment/.gitignore similarity index 92% rename from stepfunctions/.gitignore rename to samples/aws_step_functions_deployment/.gitignore index e9071d0..dfedc1a 100644 --- a/stepfunctions/.gitignore +++ b/samples/aws_step_functions_deployment/.gitignore @@ -9,4 +9,4 @@ __pycache__ # CDK asset staging directory .cdk.staging cdk.out -.DS_Store +.DS_Store \ No newline at end of file diff --git a/stepfunctions/README.md b/samples/aws_step_functions_deployment/README.md similarity index 52% rename from stepfunctions/README.md rename to samples/aws_step_functions_deployment/README.md index af346c3..a3d505e 100644 --- a/stepfunctions/README.md +++ b/samples/aws_step_functions_deployment/README.md @@ -1,4 +1,4 @@ -# Bedrock Agent Evaluation Framework +# Bedrock Agent Evaluation Step Functions Deployment This project implements an automated evaluation framework for Amazon Bedrock Agents using AWS CDK, Step Functions, and Lambda. @@ -6,7 +6,8 @@ This project implements an automated evaluation framework for Amazon Bedrock Age The framework automates the process of updating Bedrock Agents with new prompts, creating aliases, running evaluation scenarios, and cleaning up resources. It uses AWS Step Functions to orchestrate the workflow and AWS Lambda functions to perform individual tasks. -The example provided is for an energy chatbot usecase +The example provided is for an energy chatbot usecase. We have provded three versions of **agent instruction** as `prompts` in the [example](example_prompt_jsons/prompts_scenarios.json). For each version, the framework will automatically create new Agent alias and test different scenarios and update the agent. + ## Components @@ -21,13 +22,14 @@ The example provided is for an energy chatbot usecase - `delete_alias`: Removes the temporary alias after evaluation. 3. **Step Functions State Machine**: Orchestrates the evaluation workflow, including agent updates, status checks, and scenario execution. + + ![workflow](graph_view.png) 4. **S3 Bucket**: Stores evaluation prompts and results. 5. **EventBridge Rule**: Triggers the Step Functions workflow when new evaluation prompts are uploaded to S3. ## Workflow - 1. New evaluation prompts are uploaded to the S3 bucket. 2. The EventBridge rule triggers the Step Functions state machine. 3. The state machine updates the Bedrock Agent with new instructions. @@ -41,80 +43,33 @@ The example provided is for an energy chatbot usecase 1. Ensure you have the AWS CDK installed and configured. 2. Install project dependencies: ``` - npm install + cd samples/aws_step_functions_deployment + python3 -m venv .venv + source .venv/bin/activate + pip install -r requirements.txt + ``` +3. Run CDK synth: ``` -3. Deploy the stack: + cdk synth + ``` +4. Deploy the stack: ``` cdk deploy ``` ## Usage - To run an evaluation: +1. Create a Bedrock Agent (You don't need to configure it yet just simply create it). +2. Prepare an evaluation JSON file with prompts and customer profiles as the [example](example_prompt_jsons/prompts_scenarios.json) (Replace the agent id and name with the one you have created in the file). +3. Upload the file to the S3 bucket `stepfunctionsstack-evaluationbucket` in the `evaluation_prompts/` prefix. +4. The evaluation process will start automatically. +5. Results will be available in the S3 bucket under the `results/` prefix. + -1. Prepare an evaluation JSON file with prompts and customer profiles. -2. Upload the file to the S3 bucket in the `evaluation_prompts/` prefix. -3. The evaluation process will start automatically. -4. Results will be available in the S3 bucket under the `results/` prefix. +![demo](demo.gif) ## Notes - Ensure proper IAM permissions are set up for accessing Bedrock, S3, and other AWS services. - The `agenteval` library is assumed to be provided as a custom Lambda layer. - -# CDK instructions - -The `cdk.json` file tells the CDK Toolkit how to execute your app. - -This project is set up like a standard Python project. The initialization -process also creates a virtualenv within this project, stored under the `.venv` -directory. To create the virtualenv it assumes that there is a `python3` -(or `python` for Windows) executable in your path with access to the `venv` -package. If for any reason the automatic creation of the virtualenv fails, -you can create the virtualenv manually. - -To manually create a virtualenv on MacOS and Linux: - -``` -$ python3 -m venv .venv -``` - -After the init process completes and the virtualenv is created, you can use the following -step to activate your virtualenv. - -``` -$ source .venv/bin/activate -``` - -If you are a Windows platform, you would activate the virtualenv like this: - -``` -% .venv\Scripts\activate.bat -``` - -Once the virtualenv is activated, you can install the required dependencies. - -``` -$ pip install -r requirements.txt -``` - -At this point you can now synthesize the CloudFormation template for this code. - -``` -$ cdk synth -``` - -To add additional dependencies, for example other CDK libraries, just add -them to your `setup.py` file and rerun the `pip install -r requirements.txt` -command. - -## Useful commands - - * `cdk ls` list all stacks in the app - * `cdk synth` emits the synthesized CloudFormation template - * `cdk deploy` deploy this stack to your default AWS account/region - * `cdk diff` compare deployed stack with current state - * `cdk docs` open CDK documentation - -Enjoy! diff --git a/stepfunctions/app.py b/samples/aws_step_functions_deployment/app.py similarity index 100% rename from stepfunctions/app.py rename to samples/aws_step_functions_deployment/app.py diff --git a/stepfunctions/cdk.json b/samples/aws_step_functions_deployment/cdk.json similarity index 100% rename from stepfunctions/cdk.json rename to samples/aws_step_functions_deployment/cdk.json diff --git a/samples/aws_step_functions_deployment/demo.gif b/samples/aws_step_functions_deployment/demo.gif new file mode 100644 index 0000000..87083d5 Binary files /dev/null and b/samples/aws_step_functions_deployment/demo.gif differ diff --git a/stepfunctions/example_prompt_jsons/prompts_scenarios.json b/samples/aws_step_functions_deployment/example_prompt_jsons/prompts_scenarios.json similarity index 99% rename from stepfunctions/example_prompt_jsons/prompts_scenarios.json rename to samples/aws_step_functions_deployment/example_prompt_jsons/prompts_scenarios.json index 936a47a..fabbc68 100644 --- a/stepfunctions/example_prompt_jsons/prompts_scenarios.json +++ b/samples/aws_step_functions_deployment/example_prompt_jsons/prompts_scenarios.json @@ -1,5 +1,5 @@ -{ "agent_id" : "ABCDEFGHIJ", - "agent_name": "agent_name", +{ "agent_id" : "WQKSOXFRHJ", + "agent_name": "agent-quick-start-2ofav", "prompts": [ { "id":"1", diff --git a/samples/aws_step_functions_deployment/graph_view.png b/samples/aws_step_functions_deployment/graph_view.png new file mode 100644 index 0000000..8c7c556 Binary files /dev/null and b/samples/aws_step_functions_deployment/graph_view.png differ diff --git a/samples/aws_step_functions_deployment/layers/agent-evaluation/requirements.txt b/samples/aws_step_functions_deployment/layers/agent-evaluation/requirements.txt new file mode 100644 index 0000000..f3d84cf --- /dev/null +++ b/samples/aws_step_functions_deployment/layers/agent-evaluation/requirements.txt @@ -0,0 +1 @@ +agent-evaluation==0.2.0 \ No newline at end of file diff --git a/stepfunctions/layers/aws-lambda-powertools/requirements.txt b/samples/aws_step_functions_deployment/layers/aws-lambda-powertools/requirements.txt similarity index 100% rename from stepfunctions/layers/aws-lambda-powertools/requirements.txt rename to samples/aws_step_functions_deployment/layers/aws-lambda-powertools/requirements.txt diff --git a/stepfunctions/requirements-dev.txt b/samples/aws_step_functions_deployment/requirements-dev.txt similarity index 100% rename from stepfunctions/requirements-dev.txt rename to samples/aws_step_functions_deployment/requirements-dev.txt diff --git a/stepfunctions/requirements.txt b/samples/aws_step_functions_deployment/requirements.txt similarity index 62% rename from stepfunctions/requirements.txt rename to samples/aws_step_functions_deployment/requirements.txt index c0a15e4..0b10bcc 100644 --- a/stepfunctions/requirements.txt +++ b/samples/aws_step_functions_deployment/requirements.txt @@ -1,3 +1,3 @@ pathlib -aws-cdk-lib==2.155.0 constructs>=10.0.0,<11.0.0 +aws-cdk-lib==2.155.0 \ No newline at end of file diff --git a/stepfunctions/source.bat b/samples/aws_step_functions_deployment/source.bat similarity index 100% rename from stepfunctions/source.bat rename to samples/aws_step_functions_deployment/source.bat diff --git a/demo/utils/__init__.py b/samples/aws_step_functions_deployment/stepfunctions/__init__.py similarity index 100% rename from demo/utils/__init__.py rename to samples/aws_step_functions_deployment/stepfunctions/__init__.py diff --git a/stepfunctions/stepfunctions/functions/check_agent_status_1/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_1/index.py similarity index 55% rename from stepfunctions/stepfunctions/functions/check_agent_status_1/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_1/index.py index 3a5ee67..0fdcce9 100644 --- a/stepfunctions/stepfunctions/functions/check_agent_status_1/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_1/index.py @@ -1,6 +1,4 @@ import boto3 -import json -import os s3_client = boto3.client('s3') bedrock_agent = boto3.client('bedrock-agent') @@ -11,23 +9,20 @@ def handler(event, context): agent_id = event["agent_id"] - - logger.info("Getting agent status") + logger.info(f"Getting agent status for agent: {agent_id}") try: response = bedrock_agent.get_agent( - agentId=agent_id + agentId=agent_id ) agent_status = response["agent"]["agentStatus"] logger.info(f"Agent status: {agent_status}") + return { + 'statusCode': 200, + 'agent_id': agent_id, + 'agent_status': agent_status + } except Exception as e: - logger.error(f"Error getting agent status: {e}") - - agent_status = response["agent"]["agentStatus"] - - - return { - 'statusCode': 200, - 'agent_id': agent_id, - 'agent_status': agent_status - } - \ No newline at end of file + return { + 'statusCode': 500, + 'error': f"Erorr getting agent: {e}" + } diff --git a/stepfunctions/stepfunctions/functions/check_agent_status_2/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_2/index.py similarity index 57% rename from stepfunctions/stepfunctions/functions/check_agent_status_2/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_2/index.py index eb3fd07..2c1e66d 100644 --- a/stepfunctions/stepfunctions/functions/check_agent_status_2/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/check_agent_status_2/index.py @@ -11,22 +11,20 @@ def handler(event, context): agent_id = event["update_output"]["agentid"] - - logger.info("Getting agent status") + logger.info(f"Getting agent status for agent: {agent_id}") try: response = bedrock_agent.get_agent( - agentId=agent_id + agentId=agent_id ) agent_status = response["agent"]["agentStatus"] logger.info(f"Agent status: {agent_status}") - + return { + 'statusCode': 200, + 'agent_id': agent_id, + 'agent_status': agent_status + } except Exception as e: - logger.error(f"Erorr getting agent: {e}") - - agent_status = response["agent"]["agentStatus"] - - return { - 'statusCode': 200, - 'agent_id': agent_id, - 'agent_status': agent_status - } + return { + 'statusCode': 500, + 'error': f"Erorr getting agent: {e}" + } diff --git a/stepfunctions/stepfunctions/functions/create_alias/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/create_alias/index.py similarity index 89% rename from stepfunctions/stepfunctions/functions/create_alias/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/create_alias/index.py index a75b644..63d9909 100644 --- a/stepfunctions/stepfunctions/functions/create_alias/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/create_alias/index.py @@ -22,6 +22,10 @@ def handler(event, context): except Exception as e: logger.error(f"Error creating alias: {e}") + return { + 'statusCode': 500, + 'body': json.dumps('Error creating alias') + } agent_id = alias_resp["agentAlias"]["agentId"] diff --git a/stepfunctions/stepfunctions/functions/delete_alias/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/delete_alias/index.py similarity index 75% rename from stepfunctions/stepfunctions/functions/delete_alias/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/delete_alias/index.py index 0e5c87b..c660e6a 100644 --- a/stepfunctions/stepfunctions/functions/delete_alias/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/delete_alias/index.py @@ -1,7 +1,4 @@ -import json import boto3 -import uuid -import os from aws_lambda_powertools import Logger logger = Logger() @@ -16,13 +13,17 @@ def handler(event, context): logger.info("Deleting Agent Alias") try: response = bedrock_agent.delete_agent_alias( - agentAliasId=agent_alias_id, - agentId=agent_id + agentAliasId=agent_alias_id, + agentId=agent_id ) logger.info(f"Delete alias response: {response}") except Exception as e: logger.error(f"Error deleting agent alias : {e}") + return { + 'statusCode': 500, + 'error': f"Erorr deleting agent alias: {e}" + } return { 'statusCode': 200, diff --git a/stepfunctions/stepfunctions/functions/generate_map/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/generate_map/index.py similarity index 90% rename from stepfunctions/stepfunctions/functions/generate_map/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/generate_map/index.py index 873b7e7..2ced93e 100644 --- a/stepfunctions/stepfunctions/functions/generate_map/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/generate_map/index.py @@ -20,6 +20,10 @@ def handler(event, context): logger.info(text) except Exception as e: logger.error(f"Error getting object: {e}") + return { + 'statusCode': 500, + 'body': 'Error fetching scenarios' + } prompts = text['prompts'] diff --git a/stepfunctions/stepfunctions/functions/run_test/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/run_test/index.py similarity index 93% rename from stepfunctions/stepfunctions/functions/run_test/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/run_test/index.py index b6b0a6d..5aed0a7 100644 --- a/stepfunctions/stepfunctions/functions/run_test/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/run_test/index.py @@ -1,8 +1,6 @@ import yaml -import json import datetime import os -import shutil import threading import time import boto3 @@ -11,13 +9,15 @@ from agenteval.runner import Runner from agenteval.plan import Plan +from aws_lambda_powertools import Logger + +logger = Logger() s3_client = boto3.client('s3') def handler(event, context): scenario = event['scenario'] - prompt = event['prompt'] agent_id = event['agent_id'] agent_alias_id = event['agent_alias_id'] agent_alias_name = event['agent_alias_name'] @@ -38,7 +38,6 @@ def handler(event, context): yaml_data = { 'evaluator': { 'model': 'claude-3', - 'region': 'us-east-1' }, 'target': { 'type': 'bedrock-agent', @@ -96,20 +95,17 @@ def handler(event, context): verbose=False, num_threads=None, work_dir = test_result_dir - ) + ) try: runner_thread = threading.Thread(target=runner.run) runner_thread.start() - - start_time = datetime.datetime.now() num_completed = 0 while num_completed < runner.num_tests: time.sleep(1) num_completed = len(list(filter(lambda x:x != None, runner.results.values()))) - percentage = num_completed / runner.num_tests runner_thread.join() now = datetime.datetime.now() @@ -124,13 +120,15 @@ def handler(event, context): with open(os.path.join(test_result_dir, "agenteval_summary.md")) as f: result = f.read() - s3_key = f"results/{agent_alias_name}/{uid}/results.md" + s3_key = f"results/agent={agent_id}/alias={agent_alias_name}/test_id={uid}/results.md" s3_client.put_object(Bucket=bucket_name, Key=s3_key, Body=result) except Exception as e: + logger.error(f"Error running the test: {e}") status = "error" + return{ 'created_at': created_at, 'finished_at':finished_at, diff --git a/stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py b/samples/aws_step_functions_deployment/stepfunctions/functions/update_bedrock_agent/index.py similarity index 85% rename from stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py rename to samples/aws_step_functions_deployment/stepfunctions/functions/update_bedrock_agent/index.py index 3ba810c..c3fdbc4 100644 --- a/stepfunctions/stepfunctions/functions/update_bedrock_agent/index.py +++ b/samples/aws_step_functions_deployment/stepfunctions/functions/update_bedrock_agent/index.py @@ -1,6 +1,4 @@ -import json import boto3 -import uuid import os from aws_lambda_powertools import Logger @@ -33,6 +31,10 @@ def handler(event, context): logger.info(f"Update agent response: {update_resp}") except Exception as e: logger.error(f"Error updating agent: {e}") + return { + 'statusCode': 500, + 'error': "Error updating agent" + } logger.info("Preparing Agent") try: @@ -40,7 +42,10 @@ def handler(event, context): logger.info(f"Prepaing Agent response: {prep_resp}") except Exception as e: logger.error(f"Error preparing agent : {e}") - + return { + 'statusCode': 500, + 'error': "Error preparing agent" + } return { 'statusCode': 200, diff --git a/stepfunctions/stepfunctions/layer/__init__.py b/samples/aws_step_functions_deployment/stepfunctions/layer/__init__.py similarity index 100% rename from stepfunctions/stepfunctions/layer/__init__.py rename to samples/aws_step_functions_deployment/stepfunctions/layer/__init__.py diff --git a/stepfunctions/stepfunctions/layer/layer.py b/samples/aws_step_functions_deployment/stepfunctions/layer/layer.py similarity index 100% rename from stepfunctions/stepfunctions/layer/layer.py rename to samples/aws_step_functions_deployment/stepfunctions/layer/layer.py diff --git a/stepfunctions/stepfunctions/stepfunctions_stack.py b/samples/aws_step_functions_deployment/stepfunctions/stepfunctions_stack.py similarity index 94% rename from stepfunctions/stepfunctions/stepfunctions_stack.py rename to samples/aws_step_functions_deployment/stepfunctions/stepfunctions_stack.py index 934b3e0..323145a 100644 --- a/stepfunctions/stepfunctions/stepfunctions_stack.py +++ b/samples/aws_step_functions_deployment/stepfunctions/stepfunctions_stack.py @@ -15,9 +15,17 @@ ) from constructs import Construct from .layer import Layer -architecture = _lambda.Architecture.X86_64 +import platform + runtime = _lambda.Runtime.PYTHON_3_12 + +platform_mapping = { + "x86_64": _lambda.Architecture.X86_64, + "arm64": _lambda.Architecture.ARM_64 +} +architecture = platform_mapping[platform.uname().machine] + class StepfunctionsStack(Stack): def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: @@ -236,11 +244,11 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: "Agent Alias Map", max_concurrency=1, items_path = sfn.JsonPath.string_at("$.body"), - parameters={ - "agent_id": sfn.JsonPath.string_at("$.agent_id"), - "agent_name": sfn.JsonPath.string_at("$.agent_name"), - "prompt": sfn.JsonPath.string_at("$$.Map.Item.Value.prompt"), - "scenarios": sfn.JsonPath.string_at("$$.Map.Item.Value.scenarios") + item_selector={ + "agent_id": sfn.JsonPath.string_at("$.agent_id"), + "agent_name": sfn.JsonPath.string_at("$.agent_name"), + "prompt": sfn.JsonPath.string_at("$$.Map.Item.Value.prompt"), + "scenarios": sfn.JsonPath.string_at("$$.Map.Item.Value.scenarios") } #you can only update an agent one at a time # @@ -255,7 +263,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: "RunTestFunction", runtime=runtime, architecture=architecture, - timeout=cdk.Duration.minutes(5), + timeout=cdk.Duration.minutes(10), handler="index.handler", code=_lambda.Code.from_asset( os.path.join( @@ -264,7 +272,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: "run_test", ) ), - layers=[agenteval_layer.layer_version], + layers=[powertools_layer.layer_version, agenteval_layer.layer_version], environment={ "EVALUATION_BUCKET": evaluation_bucket.bucket_name, }, @@ -285,7 +293,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: self, "Evaluation Map", items_path = sfn.JsonPath.string_at("$.scenarios"), - parameters={ + item_selector={ "prompt": sfn.JsonPath.string_at("$.prompt"), "agent_id": sfn.JsonPath.string_at("$.agent_id"), "agent_alias_id": sfn.JsonPath.string_at("$.agent_alias_id"), @@ -329,10 +337,9 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: map_definition_2= run_test_step.next(pass_step) - test_map.iterator(map_definition_2) + test_map.item_processor(map_definition_2) - # eval_function_timeout_minutes = 10 map_definition = get_status_step_1.next( first_choice.when(condition1, wait_step.next(get_status_step_1)).otherwise(update_agent_step .next( @@ -350,7 +357,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: - agent_alias_map.iterator(map_definition) + agent_alias_map.item_processor(map_definition) diff --git a/demo/.gitignore b/samples/streamlit_app/.gitignore similarity index 91% rename from demo/.gitignore rename to samples/streamlit_app/.gitignore index 1d215e7..c158c46 100644 --- a/demo/.gitignore +++ b/samples/streamlit_app/.gitignore @@ -2,4 +2,4 @@ static/ # demo working directory for local db and file -workdir/ +workdir/ \ No newline at end of file diff --git a/demo/.streamlit/config.toml b/samples/streamlit_app/.streamlit/config.toml similarity index 100% rename from demo/.streamlit/config.toml rename to samples/streamlit_app/.streamlit/config.toml diff --git a/demo/.streamlit/secrets.toml b/samples/streamlit_app/.streamlit/secrets.toml similarity index 100% rename from demo/.streamlit/secrets.toml rename to samples/streamlit_app/.streamlit/secrets.toml diff --git a/demo/Home.py b/samples/streamlit_app/Home.py similarity index 92% rename from demo/Home.py rename to samples/streamlit_app/Home.py index 22349d4..4126b1d 100644 --- a/demo/Home.py +++ b/samples/streamlit_app/Home.py @@ -8,7 +8,7 @@ st.set_page_config( layout="centered", page_title="Agent Evaluation", - page_icon="⭐", + page_icon="logo.png", ) # set demo working directories @@ -46,7 +46,7 @@ def icon(emoji: str): ) -icon("🥳") +st.image("logo.png", width=100) st.title("Agent Evaluation!") st.markdown( @@ -61,6 +61,6 @@ def icon(emoji: str): st.markdown( """ - Read more in the dedicated :balloon: [Agent Evaluation](https://github.com/awslabs/agent-evaluation/tree/main/docs). + Read more in the dedicated :books: [Agent Evaluation](https://awslabs.github.io/agent-evaluation/). """ ) diff --git a/demo/README.md b/samples/streamlit_app/README.md similarity index 100% rename from demo/README.md rename to samples/streamlit_app/README.md diff --git a/demo/agenteval_demo_ui.gif b/samples/streamlit_app/demo.gif similarity index 56% rename from demo/agenteval_demo_ui.gif rename to samples/streamlit_app/demo.gif index aba0898..33b6d13 100644 Binary files a/demo/agenteval_demo_ui.gif and b/samples/streamlit_app/demo.gif differ diff --git a/samples/streamlit_app/logo.png b/samples/streamlit_app/logo.png new file mode 100644 index 0000000..7866b95 Binary files /dev/null and b/samples/streamlit_app/logo.png differ diff --git "a/demo/pages/0_\342\234\217\357\270\217_Configure_test_plan.py" "b/samples/streamlit_app/pages/0_\342\234\217\357\270\217_Configure_test_plan.py" similarity index 100% rename from "demo/pages/0_\342\234\217\357\270\217_Configure_test_plan.py" rename to "samples/streamlit_app/pages/0_\342\234\217\357\270\217_Configure_test_plan.py" diff --git "a/demo/pages/1_\342\217\263_Run_test_plan.py" "b/samples/streamlit_app/pages/1_\342\217\263_Run_test_plan.py" similarity index 96% rename from "demo/pages/1_\342\217\263_Run_test_plan.py" rename to "samples/streamlit_app/pages/1_\342\217\263_Run_test_plan.py" index 718cf1a..139624e 100644 --- "a/demo/pages/1_\342\217\263_Run_test_plan.py" +++ "b/samples/streamlit_app/pages/1_\342\217\263_Run_test_plan.py" @@ -67,6 +67,8 @@ ) with open(os.path.join(result_dir, created_at, "agenteval_summary.md")) as f: result = f.read() + # Fix: patch the emoji rendering + result = result.replace('green_circle', 'large_green_circle') st.markdown(result, unsafe_allow_html=True) except Exception as e: st.error(f"Test failed: {e}") diff --git "a/demo/pages/2_\360\237\221\200_Manage_result.py" "b/samples/streamlit_app/pages/2_\360\237\221\200_Manage_result.py" similarity index 100% rename from "demo/pages/2_\360\237\221\200_Manage_result.py" rename to "samples/streamlit_app/pages/2_\360\237\221\200_Manage_result.py" diff --git a/samples/streamlit_app/requirements.txt b/samples/streamlit_app/requirements.txt new file mode 100644 index 0000000..9fd0f40 --- /dev/null +++ b/samples/streamlit_app/requirements.txt @@ -0,0 +1,83 @@ +agent-evaluation==0.2.0 +altair==5.4.1 +annotated-types==0.7.0 +attrs==24.2.0 +beautifulsoup4==4.12.3 +blinker==1.8.2 +boto3==1.35.35 +botocore==1.35.35 +cachetools==5.5.0 +certifi==2024.8.30 +charset-normalizer==3.3.2 +click==8.1.7 +contourpy==1.3.0 +cycler==0.12.1 +entrypoints==0.4 +Faker==30.3.0 +favicon==0.7.0 +fonttools==4.54.1 +gitdb==4.0.11 +GitPython==3.1.43 +htbuilder==0.6.2 +idna==3.10 +Jinja2==3.1.4 +jmespath==1.0.1 +jsonpath-ng==1.6.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +kiwisolver==1.4.7 +lxml==5.3.0 +Markdown==3.7 +markdown-it-py==3.0.0 +markdownlit==0.0.7 +MarkupSafe==3.0.0 +matplotlib==3.9.2 +mdurl==0.1.2 +more-itertools==10.5.0 +narwhals==1.9.1 +numpy==2.1.2 +packaging==24.1 +pandas==2.2.3 +pillow==10.4.0 +plotly==5.24.1 +ply==3.11 +prometheus_client==0.21.0 +protobuf==5.28.2 +pyarrow==17.0.0 +pydantic==2.9.2 +pydantic_core==2.23.4 +pydeck==0.9.1 +Pygments==2.18.0 +pymdown-extensions==10.11.2 +pyparsing==3.1.4 +python-dateutil==2.9.0.post0 +pytz==2024.2 +PyYAML==6.0.2 +referencing==0.35.1 +requests==2.32.3 +rich==13.9.2 +rpds-py==0.20.0 +s3transfer==0.10.2 +six==1.16.0 +smmap==5.0.1 +soupsieve==2.6 +SQLAlchemy==2.0.35 +st-annotated-text==4.0.1 +st-theme==1.2.3 +streamlit==1.39.0 +streamlit-camera-input-live==0.2.0 +streamlit-card==1.0.2 +streamlit-embedcode==0.1.2 +streamlit-extras==0.4.7 +streamlit-faker==0.0.3 +streamlit-image-coordinates==0.1.9 +streamlit-keyup==0.2.4 +streamlit-toggle-switch==1.0.2 +streamlit-vertical-slider==2.5.5 +tenacity==9.0.0 +toml==0.10.2 +tornado==6.4.1 +typing_extensions==4.12.2 +tzdata==2024.2 +urllib3==2.2.3 +validators==0.34.0 diff --git a/stepfunctions/stepfunctions/__init__.py b/samples/streamlit_app/utils/__init__.py similarity index 100% rename from stepfunctions/stepfunctions/__init__.py rename to samples/streamlit_app/utils/__init__.py diff --git a/demo/utils/db_handling.py b/samples/streamlit_app/utils/db_handling.py similarity index 100% rename from demo/utils/db_handling.py rename to samples/streamlit_app/utils/db_handling.py diff --git a/demo/utils/state_handling.py b/samples/streamlit_app/utils/state_handling.py similarity index 100% rename from demo/utils/state_handling.py rename to samples/streamlit_app/utils/state_handling.py diff --git a/sample-test-plans/bedrock-agent-target/template.yml b/samples/test_plan_templates/bedrock_agent_target/template.yml similarity index 100% rename from sample-test-plans/bedrock-agent-target/template.yml rename to samples/test_plan_templates/bedrock_agent_target/template.yml diff --git a/stepfunctions/.DS_Store b/stepfunctions/.DS_Store deleted file mode 100644 index 2e44791..0000000 Binary files a/stepfunctions/.DS_Store and /dev/null differ diff --git a/stepfunctions/layers/agent-evaluation/requirements.txt b/stepfunctions/layers/agent-evaluation/requirements.txt deleted file mode 100644 index 8885a87..0000000 --- a/stepfunctions/layers/agent-evaluation/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -agent-evaluation \ No newline at end of file diff --git a/stepfunctions/layers/jinja2/requirements.txt b/stepfunctions/layers/jinja2/requirements.txt deleted file mode 100644 index 1c579e7..0000000 --- a/stepfunctions/layers/jinja2/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -jinja2 \ No newline at end of file diff --git a/stepfunctions/layers/pydantic/requirements.txt b/stepfunctions/layers/pydantic/requirements.txt deleted file mode 100644 index 59cc1e9..0000000 --- a/stepfunctions/layers/pydantic/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pydantic \ No newline at end of file diff --git a/stepfunctions/layers/pyyaml/requirements.txt b/stepfunctions/layers/pyyaml/requirements.txt deleted file mode 100644 index 4818cc5..0000000 --- a/stepfunctions/layers/pyyaml/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyyaml \ No newline at end of file diff --git a/stepfunctions/tests/__init__.py b/stepfunctions/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/stepfunctions/tests/unit/__init__.py b/stepfunctions/tests/unit/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/stepfunctions/tests/unit/test_stepfunctions_stack.py b/stepfunctions/tests/unit/test_stepfunctions_stack.py deleted file mode 100644 index ce36343..0000000 --- a/stepfunctions/tests/unit/test_stepfunctions_stack.py +++ /dev/null @@ -1,6 +0,0 @@ -import aws_cdk as core -import aws_cdk.assertions as assertions - -from stepfunctions.stepfunctions_stack import StepfunctionsStack - -