awslabs · sharonxiaohanli · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/cicd-demo.yml b/.github/workflows/cicd-demo.yml
@@ -44,7 +44,7 @@ jobs:
         BEDROCK_AGENT_ALIAS_ID: ${{ vars.BEDROCK_AGENT_ALIAS_ID }}
         BEDROCK_AGENT_ID: ${{ vars.BEDROCK_AGENT_ID }}
       run: |
-        sed -e "s/BEDROCK_AGENT_ALIAS_ID/$BEDROCK_AGENT_ALIAS_ID/g" -e "s/BEDROCK_AGENT_ID/$BEDROCK_AGENT_ID/g" sample-test-plans/bedrock-agent-target/template.yml > agenteval.yml
+        sed -e "s/BEDROCK_AGENT_ALIAS_ID/$BEDROCK_AGENT_ALIAS_ID/g" -e "s/BEDROCK_AGENT_ID/$BEDROCK_AGENT_ID/g" samples/test_plan_templates/bedrock_agent_target/template.yml > agenteval.yml
         agenteval run
 
     - name: Test Summary

diff --git a/demo/.DS_Store b/demo/.DS_Store
diff --git a/demo/requirements.txt b/demo/requirements.txt
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -9,4 +9,4 @@ mkdocs-material
 mkdocstrings[python]
 mkdocs-click
 bandit
-pip-audit
+pip-audit
diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,3 @@ pydantic>=2.1.0,<3.0
 rich>=13.7.0,<14.0
 jinja2>=3.1.3,<4.0
 jsonpath-ng>=1.6.1,<2.0
-pathlib
-aws-cdk-lib==2.155.0
-constructs>=10.0.0,<11.0.0
diff --git a/stepfunctions/.gitignore → .../aws_step_functions_deployment/.gitignore b/stepfunctions/.gitignore → .../aws_step_functions_deployment/.gitignore
@@ -9,4 +9,4 @@ __pycache__
 # CDK asset staging directory
 .cdk.staging
 cdk.out
-.DS_Store
+.DS_Store
diff --git a/stepfunctions/README.md → ...s/aws_step_functions_deployment/README.md b/stepfunctions/README.md → ...s/aws_step_functions_deployment/README.md
@@ -1,12 +1,13 @@
-# Bedrock Agent Evaluation Framework
+# Bedrock Agent Evaluation Step Functions Deployment
 
 This project implements an automated evaluation framework for Amazon Bedrock Agents using AWS CDK, Step Functions, and Lambda.
 
 ## Overview
 
 The framework automates the process of updating Bedrock Agents with new prompts, creating aliases, running evaluation scenarios, and cleaning up resources. It uses AWS Step Functions to orchestrate the workflow and AWS Lambda functions to perform individual tasks.
 
-The example provided is for an energy chatbot usecase
+The example provided is for an energy chatbot usecase. We have provded three versions of **agent instruction** as `prompts` in the [example](example_prompt_jsons/prompts_scenarios.json). For each version, the framework will automatically create new Agent alias and test different scenarios and update the agent.
+
 
 ## Components
 
@@ -21,13 +22,14 @@ The example provided is for an energy chatbot usecase
    - `delete_alias`: Removes the temporary alias after evaluation.
 
 3. **Step Functions State Machine**: Orchestrates the evaluation workflow, including agent updates, status checks, and scenario execution.
+
+   ![workflow](graph_view.png)
 
 4. **S3 Bucket**: Stores evaluation prompts and results.
 
 5. **EventBridge Rule**: Triggers the Step Functions workflow when new evaluation prompts are uploaded to S3.
 
 ## Workflow
-
 1. New evaluation prompts are uploaded to the S3 bucket.
 2. The EventBridge rule triggers the Step Functions state machine.
 3. The state machine updates the Bedrock Agent with new instructions.
@@ -41,80 +43,33 @@ The example provided is for an energy chatbot usecase
 1. Ensure you have the AWS CDK installed and configured.
 2. Install project dependencies:
    ```
-   npm install
+   cd samples/aws_step_functions_deployment
+   python3 -m venv .venv   
+   source .venv/bin/activate
+   pip install -r requirements.txt
+   ```
+3. Run CDK synth:
    ```
-3. Deploy the stack:
+   cdk synth
+   ```
+4. Deploy the stack:
    ```
    cdk deploy
    ```
 
 ## Usage
-
 To run an evaluation:
+1. Create a Bedrock Agent (You don't need to configure it yet just simply create it). 
+2. Prepare an evaluation JSON file with prompts and customer profiles as the [example](example_prompt_jsons/prompts_scenarios.json) (Replace the agent id and name with the one you have created in the file).
+3. Upload the file to the S3 bucket `stepfunctionsstack-evaluationbucket` in the `evaluation_prompts/` prefix.
+4. The evaluation process will start automatically.
+5. Results will be available in the S3 bucket under the `results/` prefix.
+
 
-1. Prepare an evaluation JSON file with prompts and customer profiles.
-2. Upload the file to the S3 bucket in the `evaluation_prompts/` prefix.
-3. The evaluation process will start automatically.
-4. Results will be available in the S3 bucket under the `results/` prefix.
+![demo](demo.gif)
 
 ## Notes
 
 - Ensure proper IAM permissions are set up for accessing Bedrock, S3, and other AWS services.
 - The `agenteval` library is assumed to be provided as a custom Lambda layer.
 
-
-# CDK instructions
-
-The `cdk.json` file tells the CDK Toolkit how to execute your app.
-
-This project is set up like a standard Python project.  The initialization
-process also creates a virtualenv within this project, stored under the `.venv`
-directory.  To create the virtualenv it assumes that there is a `python3`
-(or `python` for Windows) executable in your path with access to the `venv`
-package. If for any reason the automatic creation of the virtualenv fails,
-you can create the virtualenv manually.
-
-To manually create a virtualenv on MacOS and Linux:
-
-```
-$ python3 -m venv .venv
-```
-
-After the init process completes and the virtualenv is created, you can use the following
-step to activate your virtualenv.
-
-```
-$ source .venv/bin/activate
-```
-
-If you are a Windows platform, you would activate the virtualenv like this:
-
-```
-% .venv\Scripts\activate.bat
-```
-
-Once the virtualenv is activated, you can install the required dependencies.
-
-```
-$ pip install -r requirements.txt
-```
-
-At this point you can now synthesize the CloudFormation template for this code.
-
-```
-$ cdk synth
-```
-
-To add additional dependencies, for example other CDK libraries, just add
-them to your `setup.py` file and rerun the `pip install -r requirements.txt`
-command.
-
-## Useful commands
-
- * `cdk ls`          list all stacks in the app
- * `cdk synth`       emits the synthesized CloudFormation template
- * `cdk deploy`      deploy this stack to your default AWS account/region
- * `cdk diff`        compare deployed stack with current state
- * `cdk docs`        open CDK documentation
-
-Enjoy!
diff --git a/stepfunctions/app.py → samples/aws_step_functions_deployment/app.py b/stepfunctions/app.py → samples/aws_step_functions_deployment/app.py
diff --git a/stepfunctions/cdk.json → ...es/aws_step_functions_deployment/cdk.json b/stepfunctions/cdk.json → ...es/aws_step_functions_deployment/cdk.json
diff --git a/samples/aws_step_functions_deployment/demo.gif b/samples/aws_step_functions_deployment/demo.gif
diff --git a/...ample_prompt_jsons/prompts_scenarios.json → ...ample_prompt_jsons/prompts_scenarios.json b/...ample_prompt_jsons/prompts_scenarios.json → ...ample_prompt_jsons/prompts_scenarios.json
@@ -1,5 +1,5 @@
-{   "agent_id" : "ABCDEFGHIJ", 
-    "agent_name": "agent_name",   
+{   "agent_id" : "WQKSOXFRHJ", 
+    "agent_name": "agent-quick-start-2ofav",   
     "prompts": [
         {
             "id":"1",

diff --git a/samples/aws_step_functions_deployment/graph_view.png b/samples/aws_step_functions_deployment/graph_view.png
diff --git a/samples/aws_step_functions_deployment/layers/agent-evaluation/requirements.txt b/samples/aws_step_functions_deployment/layers/agent-evaluation/requirements.txt
@@ -0,0 +1 @@
+agent-evaluation==0.2.0
diff --git a/...rs/aws-lambda-powertools/requirements.txt → ...rs/aws-lambda-powertools/requirements.txt b/...rs/aws-lambda-powertools/requirements.txt → ...rs/aws-lambda-powertools/requirements.txt
diff --git a/stepfunctions/requirements-dev.txt → ...functions_deployment/requirements-dev.txt b/stepfunctions/requirements-dev.txt → ...functions_deployment/requirements-dev.txt
diff --git a/stepfunctions/requirements.txt → ...tep_functions_deployment/requirements.txt b/stepfunctions/requirements.txt → ...tep_functions_deployment/requirements.txt
@@ -1,3 +1,3 @@
 pathlib
-aws-cdk-lib==2.155.0
 constructs>=10.0.0,<11.0.0
+aws-cdk-lib==2.155.0
diff --git a/stepfunctions/source.bat → .../aws_step_functions_deployment/source.bat b/stepfunctions/source.bat → .../aws_step_functions_deployment/source.bat
diff --git a/demo/utils/__init__.py → ...ions_deployment/stepfunctions/__init__.py b/demo/utils/__init__.py → ...ions_deployment/stepfunctions/__init__.py
diff --git a/...s/functions/check_agent_status_1/index.py → ...s/functions/check_agent_status_1/index.py b/...s/functions/check_agent_status_1/index.py → ...s/functions/check_agent_status_1/index.py
@@ -1,6 +1,4 @@
 import boto3
-import json
-import os
 
 s3_client = boto3.client('s3')
 bedrock_agent = boto3.client('bedrock-agent')
@@ -11,23 +9,20 @@
 def handler(event, context):
 
     agent_id = event["agent_id"]
-
-    logger.info("Getting agent status")
+    logger.info(f"Getting agent status for agent: {agent_id}")
     try:
         response = bedrock_agent.get_agent(
-        agentId=agent_id
+            agentId=agent_id
         )
         agent_status = response["agent"]["agentStatus"]
         logger.info(f"Agent status: {agent_status}")
+        return {
+            'statusCode': 200,
+            'agent_id': agent_id,
+            'agent_status': agent_status
+        }
     except Exception as e:
-        logger.error(f"Error getting agent status: {e}")
-
-    agent_status = response["agent"]["agentStatus"]
-
-
-    return {
-        'statusCode': 200,
-        'agent_id': agent_id,
-        'agent_status': agent_status
-    }
-
+        return {
+            'statusCode': 500,
+            'error': f"Erorr getting agent: {e}"
+        } 
diff --git a/...s/functions/check_agent_status_2/index.py → ...s/functions/check_agent_status_2/index.py b/...s/functions/check_agent_status_2/index.py → ...s/functions/check_agent_status_2/index.py
@@ -11,22 +11,20 @@
 def handler(event, context):
 
     agent_id = event["update_output"]["agentid"]
-
-    logger.info("Getting agent status")
+    logger.info(f"Getting agent status for agent: {agent_id}")
     try:
         response = bedrock_agent.get_agent(
-        agentId=agent_id
+            agentId=agent_id
         )
         agent_status = response["agent"]["agentStatus"]
         logger.info(f"Agent status: {agent_status}")
-
+        return {
+            'statusCode': 200,
+            'agent_id': agent_id,
+            'agent_status': agent_status
+        }
     except Exception as e:
-        logger.error(f"Erorr getting agent: {e}")
-
-    agent_status = response["agent"]["agentStatus"]
-
-    return {
-        'statusCode': 200,
-        'agent_id': agent_id,
-        'agent_status': agent_status
-    }
+        return {
+            'statusCode': 500,
+            'error': f"Erorr getting agent: {e}"
+        } 
diff --git a/...functions/functions/create_alias/index.py → ...functions/functions/create_alias/index.py b/...functions/functions/create_alias/index.py → ...functions/functions/create_alias/index.py
@@ -22,6 +22,10 @@ def handler(event, context):
 
     except Exception as e:
         logger.error(f"Error creating alias: {e}")
+        return {
+            'statusCode': 500,
+            'body': json.dumps('Error creating alias')
+        }
 
 
     agent_id = alias_resp["agentAlias"]["agentId"]

diff --git a/...functions/functions/delete_alias/index.py → ...functions/functions/delete_alias/index.py b/...functions/functions/delete_alias/index.py → ...functions/functions/delete_alias/index.py
@@ -1,7 +1,4 @@
-import json
 import boto3
-import uuid
-import os
 from aws_lambda_powertools import Logger
 
 logger = Logger()
@@ -16,13 +13,17 @@ def handler(event, context):
     logger.info("Deleting Agent Alias")
     try:
         response = bedrock_agent.delete_agent_alias(
-        agentAliasId=agent_alias_id,
-        agentId=agent_id
+            agentAliasId=agent_alias_id,
+            agentId=agent_id
         )
         logger.info(f"Delete alias response: {response}")
 
     except Exception as e:
         logger.error(f"Error deleting agent alias : {e}")
+        return {
+            'statusCode': 500,
+            'error': f"Erorr deleting agent alias: {e}"
+        }
 
     return {
         'statusCode': 200,

diff --git a/...functions/functions/generate_map/index.py → ...functions/functions/generate_map/index.py b/...functions/functions/generate_map/index.py → ...functions/functions/generate_map/index.py
@@ -20,6 +20,10 @@ def handler(event, context):
         logger.info(text)
     except Exception as e:
         logger.error(f"Error getting object: {e}")
+        return {
+            'statusCode': 500, 
+            'body': 'Error fetching scenarios'
+        }
 
 
     prompts = text['prompts']