diff --git a/command-snapshot.json b/command-snapshot.json index bf66e8c..8b8d093 100644 --- a/command-snapshot.json +++ b/command-snapshot.json @@ -7,6 +7,14 @@ "flags": ["api-version", "flags-dir", "job-spec", "json", "name", "target-org"], "plugin": "@salesforce/plugin-agent" }, + { + "alias": [], + "command": "agent:generate:definition", + "flagAliases": [], + "flagChars": [], + "flags": ["flags-dir"], + "plugin": "@salesforce/plugin-agent" + }, { "alias": [], "command": "agent:generate:spec", @@ -27,6 +35,14 @@ ], "plugin": "@salesforce/plugin-agent" }, + { + "alias": [], + "command": "agent:generate:testset", + "flagAliases": [], + "flagChars": [], + "flags": ["flags-dir"], + "plugin": "@salesforce/plugin-agent" + }, { "alias": [], "command": "agent:preview", diff --git a/messages/agent.generate.definition.md b/messages/agent.generate.definition.md new file mode 100644 index 0000000..e11696e --- /dev/null +++ b/messages/agent.generate.definition.md @@ -0,0 +1,13 @@ +# summary + +Interactively generate a new AiEvaluationDefinition. + +# description + +This command will prompt you for the necessary information to create a new AiEvaluationDefinition. The definition will be saved to the `aiEvaluationDefinitions` directory in the project. + +You must have the `Bots` and `AiEvaluationTestSets` metadata types present in your project to use this command. + +# examples + +- <%= config.bin %> <%= command.id %> diff --git a/messages/agent.generate.testset.md b/messages/agent.generate.testset.md new file mode 100644 index 0000000..6030bf1 --- /dev/null +++ b/messages/agent.generate.testset.md @@ -0,0 +1,11 @@ +# summary + +Interactively generate an AiEvaluationTestSet. + +# description + +Answer the prompts to generate an AiEvaluationTestSet that will be written to a file. You can then run "sf agent generate definition" to generate the AiEvaluationDefinition that can be used to evaluate the test set. + +# examples + +- <%= config.bin %> <%= command.id %> diff --git a/package.json b/package.json index 758fcdf..10c9011 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "author": "Salesforce", "bugs": "https://github.com/forcedotcom/cli/issues", "dependencies": { + "@inquirer/confirm": "^5.1.0", "@inquirer/figures": "^1.0.7", "@inquirer/input": "^4.0.1", "@inquirer/select": "^4.0.1", @@ -15,8 +16,8 @@ "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", "ansis": "^3.3.2", - "ink-text-input": "^6.0.0", "ink": "^5.0.1", + "ink-text-input": "^6.0.0", "react": "^18.3.1" }, "devDependencies": { @@ -26,10 +27,10 @@ "@salesforce/dev-scripts": "^10.2.10", "@salesforce/plugin-command-reference": "^3.1.29", "@types/react": "^18.3.3", - "eslint-config-xo-react": "^0.27.0", "eslint-config-xo": "^0.45.0", - "eslint-plugin-react-hooks": "^4.6.2", + "eslint-config-xo-react": "^0.27.0", "eslint-plugin-react": "^7.34.3", + "eslint-plugin-react-hooks": "^4.6.2", "eslint-plugin-sf-plugin": "^1.20.9", "oclif": "^4.15.12", "ts-node": "^10.9.2", diff --git a/src/commands/agent/generate/definition.ts b/src/commands/agent/generate/definition.ts new file mode 100644 index 0000000..e9d8ae1 --- /dev/null +++ b/src/commands/agent/generate/definition.ts @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2024, salesforce.com, inc. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ +import { dirname, join } from 'node:path'; +import { mkdir, readdir, writeFile } from 'node:fs/promises'; +import { SfCommand } from '@salesforce/sf-plugins-core'; +import { Messages, SfError } from '@salesforce/core'; +import select from '@inquirer/select'; +import input from '@inquirer/input'; +import { theme } from '../../../inquirer-theme.js'; + +Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); +const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.definition'); + +export default class AgentGenerateDefinition extends SfCommand { + public static readonly summary = messages.getMessage('summary'); + public static readonly description = messages.getMessage('description'); + public static readonly examples = messages.getMessages('examples'); + public static readonly enableJsonFlag = false; + public static readonly state = 'beta'; + + public async run(): Promise { + const testSetDir = join('force-app', 'main', 'default', 'aiEvaluationTestSets'); + const testSets = (await readdir(testSetDir)).map((testSet) => testSet.replace('.xml', '')); + if (testSets.length === 0) { + throw new SfError(`No test sets found in ${testSetDir}`, 'NoTestSetsFoundError', [ + 'Run the "sf agent generate testset" command to create a test set', + ]); + } + + const botsDir = join('force-app', 'main', 'default', 'bots'); + const bots = await readdir(botsDir); + if (bots.length === 0) { + throw new SfError(`No bots found in ${botsDir}`, 'NoBotsFoundError'); + } + + const testSet = await select({ + message: 'Select the AiEvaluationTestSet to use', + choices: testSets, + theme, + }); + + const bot = await select({ + message: 'Select the Bot to run the tests against', + choices: bots, + theme, + }); + + const name = await input({ + message: 'Enter a name for the AiEvaluationDefinition', + validate: (i: string): string | boolean => (i.length > 0 ? true : 'Name cannot be empty'), + theme, + }); + + const description = await input({ + message: 'Enter a description for the AiEvaluationDefinition', + theme, + }); + + const subjectType = await select({ + message: 'Select the type for the AiEvaluationDefinition', + choices: ['AGENT'], + theme, + }); + + this.log(`Generating AiEvaluationDefinition for ${bot} using ${testSet} AiEvaluationTestSet`); + + const xml = ` + + ${description ? `${description}` : ''} + ${name} + ${subjectType} + ${bot} + ${testSet} +`; + + // remove all empty lines + const cleanedXml = xml.replace(/^\s*[\r\n]/gm, ''); + + const definitionPath = join('force-app', 'main', 'default', 'aiEvaluationDefinitions', `${name}.xml`); + await mkdir(dirname(definitionPath), { recursive: true }); + this.log(`Writing AiEvaluationDefinition to ${definitionPath}`); + await writeFile(definitionPath, cleanedXml); + } +} diff --git a/src/commands/agent/generate/spec.ts b/src/commands/agent/generate/spec.ts index 2467bda..df54d53 100644 --- a/src/commands/agent/generate/spec.ts +++ b/src/commands/agent/generate/spec.ts @@ -14,6 +14,7 @@ import select from '@inquirer/select'; import inquirerInput from '@inquirer/input'; import figures from '@inquirer/figures'; import { Agent, AgentCreateConfig, SfAgent } from '@salesforce/agents'; +import { theme } from '../../../inquirer-theme.js'; Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.spec'); @@ -206,18 +207,14 @@ export default class AgentCreateSpec extends SfCommand { return select({ choices: flagDef.options.map((o) => ({ name: o, value: o })), message, - theme: { - prefix: { idle: ansis.blueBright('?') }, - }, + theme, }); } return inquirerInput({ message, validate: flagDef.validate, - theme: { - prefix: { idle: ansis.blueBright('?') }, - }, + theme, }); } } diff --git a/src/commands/agent/generate/testset.ts b/src/commands/agent/generate/testset.ts new file mode 100644 index 0000000..905f6dc --- /dev/null +++ b/src/commands/agent/generate/testset.ts @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2024, salesforce.com, inc. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ +import { dirname, join } from 'node:path'; +import { mkdir, writeFile } from 'node:fs/promises'; +import { SfCommand } from '@salesforce/sf-plugins-core'; +import { Messages } from '@salesforce/core'; +import input from '@inquirer/input'; +import select from '@inquirer/select'; +import confirm from '@inquirer/confirm'; +import { theme } from '../../../inquirer-theme.js'; + +Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); +const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset'); + +type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating'; + +export type TestSetInputs = { + utterance: string; + expectationType: ExpectationType; + expectedValue: string; +}; + +async function promptForTestCase(): Promise { + const utterance = await input({ + message: 'What utterance would you like to test?', + validate: (d: string): boolean | string => d.length > 0 || 'utterance cannot be empty', + theme, + }); + + const expectationType = await select({ + message: 'What type of expectation would you like to test for the utterance?', + choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'], + theme, + }); + + const expectedValue = await input({ + message: 'What is the expected value for the expectation?', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'expected value cannot be empty'; + } + + if (expectationType === 'action_sequence_match') { + return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions'; + } + + return true; + }, + theme, + }); + + return { + utterance, + expectationType, + expectedValue, + }; +} + +export function constructTestSetXML(testCases: TestSetInputs[]): string { + const tab = ' '; + let xml = `\n\n${tab}AGENT\n`; + testCases.forEach((testCase, i) => { + const expectedValue = + testCase.expectationType === 'action_sequence_match' + ? `[${testCase.expectedValue + .split(',') + .map((v) => `"${v}"`) + .join(',')}]` + : testCase.expectedValue; + xml += ` + ${i + 1} + + ${testCase.utterance} + + + + ${testCase.expectationType} + ${expectedValue} + + + \n`; + }); + xml += ''; + return xml; +} + +export default class AgentGenerateTestset extends SfCommand { + public static readonly summary = messages.getMessage('summary'); + public static readonly description = messages.getMessage('description'); + public static readonly examples = messages.getMessages('examples'); + public static readonly enableJsonFlag = false; + public static readonly state = 'beta'; + + public async run(): Promise { + const testSetName = await input({ + message: 'What is the name of the test set?', + }); + const testCases = []; + do { + this.log(); + this.styledHeader(`Adding test case #${testCases.length + 1}`); + // eslint-disable-next-line no-await-in-loop + testCases.push(await promptForTestCase()); + } while ( // eslint-disable-next-line no-await-in-loop + await confirm({ + message: 'Would you like to add another test case?', + default: true, + }) + ); + + const testSetPath = join('force-app', 'main', 'default', 'aiEvaluationTestsets', `${testSetName}.xml`); + await mkdir(dirname(testSetPath), { recursive: true }); + this.log(); + this.log(`Writing new AiEvaluationTestSet to ${testSetPath}`); + await writeFile(testSetPath, constructTestSetXML(testCases)); + } +} diff --git a/src/inquirer-theme.ts b/src/inquirer-theme.ts new file mode 100644 index 0000000..ad29000 --- /dev/null +++ b/src/inquirer-theme.ts @@ -0,0 +1,11 @@ +/* + * Copyright (c) 2024, salesforce.com, inc. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ +import ansis from 'ansis'; + +export const theme = { + prefix: { idle: ansis.blueBright('?') }, +}; diff --git a/test/commands/agent/generate/testset.test.ts b/test/commands/agent/generate/testset.test.ts new file mode 100644 index 0000000..7296a91 --- /dev/null +++ b/test/commands/agent/generate/testset.test.ts @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023, salesforce.com, inc. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ +import { expect } from 'chai'; +import { type TestSetInputs, constructTestSetXML } from '../../../../src/commands/agent/generate/testset.js'; + +describe('constructTestSetXML', () => { + it('should return a valid test set XML', () => { + const testCases = [ + { + utterance: 'hello', + expectationType: 'topic_sequence_match', + expectedValue: 'greeting', + }, + { + utterance: 'goodbye', + expectationType: 'action_sequence_match', + expectedValue: 'farewell,seeya', + }, + { + utterance: 'how are you', + expectationType: 'bot_response_rating', + expectedValue: '.5', + }, + ] satisfies TestSetInputs[]; + + const xml = constructTestSetXML(testCases); + + expect(xml).to.equal(` + + AGENT + + 1 + + hello + + + + topic_sequence_match + greeting + + + + + 2 + + goodbye + + + + action_sequence_match + ["farewell","seeya"] + + + + + 3 + + how are you + + + + bot_response_rating + .5 + + + +`); + }); +}); diff --git a/yarn.lock b/yarn.lock index c7a3099..167cb53 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1061,6 +1061,14 @@ "@inquirer/core" "^9.1.0" "@inquirer/type" "^1.5.3" +"@inquirer/confirm@^5.1.0": + version "5.1.0" + resolved "https://registry.yarnpkg.com/@inquirer/confirm/-/confirm-5.1.0.tgz#061cd0790c8debe092353589a501211b0d6c53ef" + integrity sha512-osaBbIMEqVFjTX5exoqPXs6PilWQdjaLhGtMDXMXg/yxkHXNq43GlxGyTA35lK2HpzUgDN+Cjh/2AmqCN0QJpw== + dependencies: + "@inquirer/core" "^10.1.1" + "@inquirer/type" "^3.0.1" + "@inquirer/core@^10.0.1": version "10.0.1" resolved "https://registry.yarnpkg.com/@inquirer/core/-/core-10.0.1.tgz#22068da87d8f6317452172dfd521e811ccbcb90e" @@ -1076,6 +1084,21 @@ wrap-ansi "^6.2.0" yoctocolors-cjs "^2.1.2" +"@inquirer/core@^10.1.1": + version "10.1.1" + resolved "https://registry.yarnpkg.com/@inquirer/core/-/core-10.1.1.tgz#801e82649fb64bcb2b5e4667397ff8c25bccebab" + integrity sha512-rmZVXy9iZvO3ZStEe/ayuuwIJ23LSF13aPMlLMTQARX6lGUBDHGV8UB5i9MRrfy0+mZwt5/9bdy8llszSD3NQA== + dependencies: + "@inquirer/figures" "^1.0.8" + "@inquirer/type" "^3.0.1" + ansi-escapes "^4.3.2" + cli-width "^4.1.0" + mute-stream "^2.0.0" + signal-exit "^4.1.0" + strip-ansi "^6.0.1" + wrap-ansi "^6.2.0" + yoctocolors-cjs "^2.1.2" + "@inquirer/core@^9.0.8", "@inquirer/core@^9.1.0": version "9.1.0" resolved "https://registry.yarnpkg.com/@inquirer/core/-/core-9.1.0.tgz#158b82dc44564a1abd0ce14723d50c3efa0634a2" @@ -1100,6 +1123,11 @@ resolved "https://registry.yarnpkg.com/@inquirer/figures/-/figures-1.0.7.tgz#d050ccc0eabfacc0248c4ff647a9dfba1b01594b" integrity sha512-m+Trk77mp54Zma6xLkLuY+mvanPxlE4A7yNKs2HBiyZ4UkVs28Mv5c/pgWrHeInx+USHeX/WEPzjrWrcJiQgjw== +"@inquirer/figures@^1.0.8": + version "1.0.8" + resolved "https://registry.yarnpkg.com/@inquirer/figures/-/figures-1.0.8.tgz#d9e414a1376a331a0e71b151fea27c48845788b0" + integrity sha512-tKd+jsmhq21AP1LhexC0pPwsCxEhGgAkg28byjJAd+xhmIs8LUX8JbUc3vBf3PhLxWiB5EvyBE5X7JSPAqMAqg== + "@inquirer/input@^2.2.4": version "2.2.7" resolved "https://registry.yarnpkg.com/@inquirer/input/-/input-2.2.7.tgz#87a922243a6c833ee5f1d4a6102c68b3cee9f19d" @@ -1159,6 +1187,11 @@ resolved "https://registry.yarnpkg.com/@inquirer/type/-/type-3.0.0.tgz#1762ebe667ec1d838012b20bf0cf90b841ba68bc" integrity sha512-YYykfbw/lefC7yKj7nanzQXILM7r3suIvyFlCcMskc99axmsSewXWkAfXKwMbgxL76iAFVmRwmYdwNZNc8gjog== +"@inquirer/type@^3.0.1": + version "3.0.1" + resolved "https://registry.yarnpkg.com/@inquirer/type/-/type-3.0.1.tgz#619ce9f65c3e114d8e39c41822bed3440d20b478" + integrity sha512-+ksJMIy92sOAiAccGpcKZUc3bYO07cADnscIxHBknEm3uNts3movSmBofc1908BNy5edKscxYeAdaX1NXkHS6A== + "@isaacs/cliui@^8.0.2": version "8.0.2" resolved "https://registry.yarnpkg.com/@isaacs/cliui/-/cliui-8.0.2.tgz#b37667b7bc181c168782259bab42474fbf52b550" @@ -7741,16 +7774,7 @@ stack-utils@^2.0.6: dependencies: escape-string-regexp "^2.0.0" -"string-width-cjs@npm:string-width@^4.2.0": - version "4.2.3" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" - integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.1" - -string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -7872,14 +7896,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1": - version "6.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" - integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== - dependencies: - ansi-regex "^5.0.1" - -strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -8546,7 +8563,7 @@ workerpool@^6.5.1: resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.5.1.tgz#060f73b39d0caf97c6db64da004cd01b4c099544" integrity sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -8564,15 +8581,6 @@ wrap-ansi@^6.2.0: string-width "^4.1.0" strip-ansi "^6.0.0" -wrap-ansi@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - wrap-ansi@^8.1.0: version "8.1.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"