-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #42 from salesforcecli/mdonnalley/generate-tests
Generate ai tests
- Loading branch information
Showing
10 changed files
with
376 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# summary | ||
|
||
Interactively generate a new AiEvaluationDefinition. | ||
|
||
# description | ||
|
||
This command will prompt you for the necessary information to create a new AiEvaluationDefinition. The definition will be saved to the `aiEvaluationDefinitions` directory in the project. | ||
|
||
You must have the `Bots` and `AiEvaluationTestSets` metadata types present in your project to use this command. | ||
|
||
# examples | ||
|
||
- <%= config.bin %> <%= command.id %> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# summary | ||
|
||
Interactively generate an AiEvaluationTestSet. | ||
|
||
# description | ||
|
||
Answer the prompts to generate an AiEvaluationTestSet that will be written to a file. You can then run "sf agent generate definition" to generate the AiEvaluationDefinition that can be used to evaluate the test set. | ||
|
||
# examples | ||
|
||
- <%= config.bin %> <%= command.id %> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (c) 2024, salesforce.com, inc. | ||
* All rights reserved. | ||
* Licensed under the BSD 3-Clause license. | ||
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause | ||
*/ | ||
import { dirname, join } from 'node:path'; | ||
import { mkdir, readdir, writeFile } from 'node:fs/promises'; | ||
import { SfCommand } from '@salesforce/sf-plugins-core'; | ||
import { Messages, SfError } from '@salesforce/core'; | ||
import select from '@inquirer/select'; | ||
import input from '@inquirer/input'; | ||
import { theme } from '../../../inquirer-theme.js'; | ||
|
||
Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); | ||
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.definition'); | ||
|
||
export default class AgentGenerateDefinition extends SfCommand<void> { | ||
public static readonly summary = messages.getMessage('summary'); | ||
public static readonly description = messages.getMessage('description'); | ||
public static readonly examples = messages.getMessages('examples'); | ||
public static readonly enableJsonFlag = false; | ||
public static readonly state = 'beta'; | ||
|
||
public async run(): Promise<void> { | ||
const testSetDir = join('force-app', 'main', 'default', 'aiEvaluationTestSets'); | ||
const testSets = (await readdir(testSetDir)).map((testSet) => testSet.replace('.xml', '')); | ||
if (testSets.length === 0) { | ||
throw new SfError(`No test sets found in ${testSetDir}`, 'NoTestSetsFoundError', [ | ||
'Run the "sf agent generate testset" command to create a test set', | ||
]); | ||
} | ||
|
||
const botsDir = join('force-app', 'main', 'default', 'bots'); | ||
const bots = await readdir(botsDir); | ||
if (bots.length === 0) { | ||
throw new SfError(`No bots found in ${botsDir}`, 'NoBotsFoundError'); | ||
} | ||
|
||
const testSet = await select<string>({ | ||
message: 'Select the AiEvaluationTestSet to use', | ||
choices: testSets, | ||
theme, | ||
}); | ||
|
||
const bot = await select<string>({ | ||
message: 'Select the Bot to run the tests against', | ||
choices: bots, | ||
theme, | ||
}); | ||
|
||
const name = await input({ | ||
message: 'Enter a name for the AiEvaluationDefinition', | ||
validate: (i: string): string | boolean => (i.length > 0 ? true : 'Name cannot be empty'), | ||
theme, | ||
}); | ||
|
||
const description = await input({ | ||
message: 'Enter a description for the AiEvaluationDefinition', | ||
theme, | ||
}); | ||
|
||
const subjectType = await select<string>({ | ||
message: 'Select the type for the AiEvaluationDefinition', | ||
choices: ['AGENT'], | ||
theme, | ||
}); | ||
|
||
this.log(`Generating AiEvaluationDefinition for ${bot} using ${testSet} AiEvaluationTestSet`); | ||
|
||
const xml = `<?xml version="1.0" encoding="UTF-8"?> | ||
<AiEvaluationDefinition xmlns="http://soap.sforce.com/2006/04/metadata"> | ||
${description ? `<description>${description}</description>` : ''} | ||
<name>${name}</name> | ||
<subjectType>${subjectType}</subjectType> | ||
<subjectName>${bot}</subjectName> | ||
<testSetName>${testSet}</testSetName> | ||
</AiEvaluationDefinition>`; | ||
|
||
// remove all empty lines | ||
const cleanedXml = xml.replace(/^\s*[\r\n]/gm, ''); | ||
|
||
const definitionPath = join('force-app', 'main', 'default', 'aiEvaluationDefinitions', `${name}.xml`); | ||
await mkdir(dirname(definitionPath), { recursive: true }); | ||
this.log(`Writing AiEvaluationDefinition to ${definitionPath}`); | ||
await writeFile(definitionPath, cleanedXml); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/* | ||
* Copyright (c) 2024, salesforce.com, inc. | ||
* All rights reserved. | ||
* Licensed under the BSD 3-Clause license. | ||
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause | ||
*/ | ||
import { dirname, join } from 'node:path'; | ||
import { mkdir, writeFile } from 'node:fs/promises'; | ||
import { SfCommand } from '@salesforce/sf-plugins-core'; | ||
import { Messages } from '@salesforce/core'; | ||
import input from '@inquirer/input'; | ||
import select from '@inquirer/select'; | ||
import confirm from '@inquirer/confirm'; | ||
import { theme } from '../../../inquirer-theme.js'; | ||
|
||
Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); | ||
const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset'); | ||
|
||
type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating'; | ||
|
||
export type TestSetInputs = { | ||
utterance: string; | ||
expectationType: ExpectationType; | ||
expectedValue: string; | ||
}; | ||
|
||
async function promptForTestCase(): Promise<TestSetInputs> { | ||
const utterance = await input({ | ||
message: 'What utterance would you like to test?', | ||
validate: (d: string): boolean | string => d.length > 0 || 'utterance cannot be empty', | ||
theme, | ||
}); | ||
|
||
const expectationType = await select<ExpectationType>({ | ||
message: 'What type of expectation would you like to test for the utterance?', | ||
choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'], | ||
theme, | ||
}); | ||
|
||
const expectedValue = await input({ | ||
message: 'What is the expected value for the expectation?', | ||
validate: (d: string): boolean | string => { | ||
if (!d.length) { | ||
return 'expected value cannot be empty'; | ||
} | ||
|
||
if (expectationType === 'action_sequence_match') { | ||
return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions'; | ||
} | ||
|
||
return true; | ||
}, | ||
theme, | ||
}); | ||
|
||
return { | ||
utterance, | ||
expectationType, | ||
expectedValue, | ||
}; | ||
} | ||
|
||
export function constructTestSetXML(testCases: TestSetInputs[]): string { | ||
const tab = ' '; | ||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<AiEvaluationTestSet>\n${tab}<subjectType>AGENT</subjectType>\n`; | ||
testCases.forEach((testCase, i) => { | ||
const expectedValue = | ||
testCase.expectationType === 'action_sequence_match' | ||
? `[${testCase.expectedValue | ||
.split(',') | ||
.map((v) => `"${v}"`) | ||
.join(',')}]` | ||
: testCase.expectedValue; | ||
xml += ` <testCase> | ||
<number>${i + 1}</number> | ||
<inputs> | ||
<utterance>${testCase.utterance}</utterance> | ||
</inputs> | ||
<expectations> | ||
<expectation> | ||
<name>${testCase.expectationType}</name> | ||
<expectedValue>${expectedValue}</expectedValue> | ||
</expectation> | ||
</expectations> | ||
</testCase>\n`; | ||
}); | ||
xml += '</AiEvaluationTestSet>'; | ||
return xml; | ||
} | ||
|
||
export default class AgentGenerateTestset extends SfCommand<void> { | ||
public static readonly summary = messages.getMessage('summary'); | ||
public static readonly description = messages.getMessage('description'); | ||
public static readonly examples = messages.getMessages('examples'); | ||
public static readonly enableJsonFlag = false; | ||
public static readonly state = 'beta'; | ||
|
||
public async run(): Promise<void> { | ||
const testSetName = await input({ | ||
message: 'What is the name of the test set?', | ||
}); | ||
const testCases = []; | ||
do { | ||
this.log(); | ||
this.styledHeader(`Adding test case #${testCases.length + 1}`); | ||
// eslint-disable-next-line no-await-in-loop | ||
testCases.push(await promptForTestCase()); | ||
} while ( // eslint-disable-next-line no-await-in-loop | ||
await confirm({ | ||
message: 'Would you like to add another test case?', | ||
default: true, | ||
}) | ||
); | ||
|
||
const testSetPath = join('force-app', 'main', 'default', 'aiEvaluationTestsets', `${testSetName}.xml`); | ||
await mkdir(dirname(testSetPath), { recursive: true }); | ||
this.log(); | ||
this.log(`Writing new AiEvaluationTestSet to ${testSetPath}`); | ||
await writeFile(testSetPath, constructTestSetXML(testCases)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/* | ||
* Copyright (c) 2024, salesforce.com, inc. | ||
* All rights reserved. | ||
* Licensed under the BSD 3-Clause license. | ||
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause | ||
*/ | ||
import ansis from 'ansis'; | ||
|
||
export const theme = { | ||
prefix: { idle: ansis.blueBright('?') }, | ||
}; |
Oops, something went wrong.