Skip to content

Commit

Permalink
Merge pull request #42 from salesforcecli/mdonnalley/generate-tests
Browse files Browse the repository at this point in the history
Generate ai tests
  • Loading branch information
WillieRuemmele authored Dec 13, 2024
2 parents 6d24491 + aa86d11 commit a133e27
Show file tree
Hide file tree
Showing 10 changed files with 376 additions and 37 deletions.
16 changes: 16 additions & 0 deletions command-snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
"flags": ["api-version", "flags-dir", "job-spec", "json", "name", "target-org"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:generate:definition",
"flagAliases": [],
"flagChars": [],
"flags": ["flags-dir"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:generate:spec",
Expand All @@ -27,6 +35,14 @@
],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:generate:testset",
"flagAliases": [],
"flagChars": [],
"flags": ["flags-dir"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:preview",
Expand Down
13 changes: 13 additions & 0 deletions messages/agent.generate.definition.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# summary

Interactively generate a new AiEvaluationDefinition.

# description

This command will prompt you for the necessary information to create a new AiEvaluationDefinition. The definition will be saved to the `aiEvaluationDefinitions` directory in the project.

You must have the `Bots` and `AiEvaluationTestSets` metadata types present in your project to use this command.

# examples

- <%= config.bin %> <%= command.id %>
11 changes: 11 additions & 0 deletions messages/agent.generate.testset.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# summary

Interactively generate an AiEvaluationTestSet.

# description

Answer the prompts to generate an AiEvaluationTestSet that will be written to a file. You can then run "sf agent generate definition" to generate the AiEvaluationDefinition that can be used to evaluate the test set.

# examples

- <%= config.bin %> <%= command.id %>
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"author": "Salesforce",
"bugs": "https://github.com/forcedotcom/cli/issues",
"dependencies": {
"@inquirer/confirm": "^5.1.0",
"@inquirer/figures": "^1.0.7",
"@inquirer/input": "^4.0.1",
"@inquirer/select": "^4.0.1",
Expand All @@ -15,8 +16,8 @@
"@salesforce/kit": "^3.2.1",
"@salesforce/sf-plugins-core": "^12.1.0",
"ansis": "^3.3.2",
"ink-text-input": "^6.0.0",
"ink": "^5.0.1",
"ink-text-input": "^6.0.0",
"react": "^18.3.1"
},
"devDependencies": {
Expand All @@ -26,10 +27,10 @@
"@salesforce/dev-scripts": "^10.2.10",
"@salesforce/plugin-command-reference": "^3.1.29",
"@types/react": "^18.3.3",
"eslint-config-xo-react": "^0.27.0",
"eslint-config-xo": "^0.45.0",
"eslint-plugin-react-hooks": "^4.6.2",
"eslint-config-xo-react": "^0.27.0",
"eslint-plugin-react": "^7.34.3",
"eslint-plugin-react-hooks": "^4.6.2",
"eslint-plugin-sf-plugin": "^1.20.9",
"oclif": "^4.15.12",
"ts-node": "^10.9.2",
Expand Down
88 changes: 88 additions & 0 deletions src/commands/agent/generate/definition.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright (c) 2024, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import { dirname, join } from 'node:path';
import { mkdir, readdir, writeFile } from 'node:fs/promises';
import { SfCommand } from '@salesforce/sf-plugins-core';
import { Messages, SfError } from '@salesforce/core';
import select from '@inquirer/select';
import input from '@inquirer/input';
import { theme } from '../../../inquirer-theme.js';

Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.definition');

export default class AgentGenerateDefinition extends SfCommand<void> {
public static readonly summary = messages.getMessage('summary');
public static readonly description = messages.getMessage('description');
public static readonly examples = messages.getMessages('examples');
public static readonly enableJsonFlag = false;
public static readonly state = 'beta';

public async run(): Promise<void> {
const testSetDir = join('force-app', 'main', 'default', 'aiEvaluationTestSets');
const testSets = (await readdir(testSetDir)).map((testSet) => testSet.replace('.xml', ''));
if (testSets.length === 0) {
throw new SfError(`No test sets found in ${testSetDir}`, 'NoTestSetsFoundError', [
'Run the "sf agent generate testset" command to create a test set',
]);
}

const botsDir = join('force-app', 'main', 'default', 'bots');
const bots = await readdir(botsDir);
if (bots.length === 0) {
throw new SfError(`No bots found in ${botsDir}`, 'NoBotsFoundError');
}

const testSet = await select<string>({
message: 'Select the AiEvaluationTestSet to use',
choices: testSets,
theme,
});

const bot = await select<string>({
message: 'Select the Bot to run the tests against',
choices: bots,
theme,
});

const name = await input({
message: 'Enter a name for the AiEvaluationDefinition',
validate: (i: string): string | boolean => (i.length > 0 ? true : 'Name cannot be empty'),
theme,
});

const description = await input({
message: 'Enter a description for the AiEvaluationDefinition',
theme,
});

const subjectType = await select<string>({
message: 'Select the type for the AiEvaluationDefinition',
choices: ['AGENT'],
theme,
});

this.log(`Generating AiEvaluationDefinition for ${bot} using ${testSet} AiEvaluationTestSet`);

const xml = `<?xml version="1.0" encoding="UTF-8"?>
<AiEvaluationDefinition xmlns="http://soap.sforce.com/2006/04/metadata">
${description ? `<description>${description}</description>` : ''}
<name>${name}</name>
<subjectType>${subjectType}</subjectType>
<subjectName>${bot}</subjectName>
<testSetName>${testSet}</testSetName>
</AiEvaluationDefinition>`;

// remove all empty lines
const cleanedXml = xml.replace(/^\s*[\r\n]/gm, '');

const definitionPath = join('force-app', 'main', 'default', 'aiEvaluationDefinitions', `${name}.xml`);
await mkdir(dirname(definitionPath), { recursive: true });
this.log(`Writing AiEvaluationDefinition to ${definitionPath}`);
await writeFile(definitionPath, cleanedXml);
}
}
9 changes: 3 additions & 6 deletions src/commands/agent/generate/spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import select from '@inquirer/select';
import inquirerInput from '@inquirer/input';
import figures from '@inquirer/figures';
import { Agent, AgentCreateConfig, SfAgent } from '@salesforce/agents';
import { theme } from '../../../inquirer-theme.js';

Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.spec');
Expand Down Expand Up @@ -206,18 +207,14 @@ export default class AgentCreateSpec extends SfCommand<AgentCreateSpecResult> {
return select({
choices: flagDef.options.map((o) => ({ name: o, value: o })),
message,
theme: {
prefix: { idle: ansis.blueBright('?') },
},
theme,
});
}

return inquirerInput({
message,
validate: flagDef.validate,
theme: {
prefix: { idle: ansis.blueBright('?') },
},
theme,
});
}
}
121 changes: 121 additions & 0 deletions src/commands/agent/generate/testset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright (c) 2024, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import { dirname, join } from 'node:path';
import { mkdir, writeFile } from 'node:fs/promises';
import { SfCommand } from '@salesforce/sf-plugins-core';
import { Messages } from '@salesforce/core';
import input from '@inquirer/input';
import select from '@inquirer/select';
import confirm from '@inquirer/confirm';
import { theme } from '../../../inquirer-theme.js';

Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset');

type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating';

export type TestSetInputs = {
utterance: string;
expectationType: ExpectationType;
expectedValue: string;
};

async function promptForTestCase(): Promise<TestSetInputs> {
const utterance = await input({
message: 'What utterance would you like to test?',
validate: (d: string): boolean | string => d.length > 0 || 'utterance cannot be empty',
theme,
});

const expectationType = await select<ExpectationType>({
message: 'What type of expectation would you like to test for the utterance?',
choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'],
theme,
});

const expectedValue = await input({
message: 'What is the expected value for the expectation?',
validate: (d: string): boolean | string => {
if (!d.length) {
return 'expected value cannot be empty';
}

if (expectationType === 'action_sequence_match') {
return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions';
}

return true;
},
theme,
});

return {
utterance,
expectationType,
expectedValue,
};
}

export function constructTestSetXML(testCases: TestSetInputs[]): string {
const tab = ' ';
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<AiEvaluationTestSet>\n${tab}<subjectType>AGENT</subjectType>\n`;
testCases.forEach((testCase, i) => {
const expectedValue =
testCase.expectationType === 'action_sequence_match'
? `[${testCase.expectedValue
.split(',')
.map((v) => `"${v}"`)
.join(',')}]`
: testCase.expectedValue;
xml += ` <testCase>
<number>${i + 1}</number>
<inputs>
<utterance>${testCase.utterance}</utterance>
</inputs>
<expectations>
<expectation>
<name>${testCase.expectationType}</name>
<expectedValue>${expectedValue}</expectedValue>
</expectation>
</expectations>
</testCase>\n`;
});
xml += '</AiEvaluationTestSet>';
return xml;
}

export default class AgentGenerateTestset extends SfCommand<void> {
public static readonly summary = messages.getMessage('summary');
public static readonly description = messages.getMessage('description');
public static readonly examples = messages.getMessages('examples');
public static readonly enableJsonFlag = false;
public static readonly state = 'beta';

public async run(): Promise<void> {
const testSetName = await input({
message: 'What is the name of the test set?',
});
const testCases = [];
do {
this.log();
this.styledHeader(`Adding test case #${testCases.length + 1}`);
// eslint-disable-next-line no-await-in-loop
testCases.push(await promptForTestCase());
} while ( // eslint-disable-next-line no-await-in-loop
await confirm({
message: 'Would you like to add another test case?',
default: true,
})
);

const testSetPath = join('force-app', 'main', 'default', 'aiEvaluationTestsets', `${testSetName}.xml`);
await mkdir(dirname(testSetPath), { recursive: true });
this.log();
this.log(`Writing new AiEvaluationTestSet to ${testSetPath}`);
await writeFile(testSetPath, constructTestSetXML(testCases));
}
}
11 changes: 11 additions & 0 deletions src/inquirer-theme.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright (c) 2024, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import ansis from 'ansis';

export const theme = {
prefix: { idle: ansis.blueBright('?') },
};
Loading

0 comments on commit a133e27

Please sign in to comment.