Merge pull request #42 from salesforcecli/mdonnalley/generate-tests

Generate ai tests
salesforcecli · Dec 13, 2024 · a133e27 · a133e27
2 parents 6d24491 + aa86d11
commit a133e27
Show file tree

Hide file tree

Showing 10 changed files with 376 additions and 37 deletions.
diff --git a/command-snapshot.json b/command-snapshot.json
@@ -7,6 +7,14 @@
     "flags": ["api-version", "flags-dir", "job-spec", "json", "name", "target-org"],
     "plugin": "@salesforce/plugin-agent"
   },
+  {
+    "alias": [],
+    "command": "agent:generate:definition",
+    "flagAliases": [],
+    "flagChars": [],
+    "flags": ["flags-dir"],
+    "plugin": "@salesforce/plugin-agent"
+  },
   {
     "alias": [],
     "command": "agent:generate:spec",
@@ -27,6 +35,14 @@
     ],
     "plugin": "@salesforce/plugin-agent"
   },
+  {
+    "alias": [],
+    "command": "agent:generate:testset",
+    "flagAliases": [],
+    "flagChars": [],
+    "flags": ["flags-dir"],
+    "plugin": "@salesforce/plugin-agent"
+  },
   {
     "alias": [],
     "command": "agent:preview",

diff --git a/messages/agent.generate.definition.md b/messages/agent.generate.definition.md
@@ -0,0 +1,13 @@
+# summary
+
+Interactively generate a new AiEvaluationDefinition.
+
+# description
+
+This command will prompt you for the necessary information to create a new AiEvaluationDefinition. The definition will be saved to the `aiEvaluationDefinitions` directory in the project.
+
+You must have the `Bots` and `AiEvaluationTestSets` metadata types present in your project to use this command.
+
+# examples
+
+- <%= config.bin %> <%= command.id %>
diff --git a/messages/agent.generate.testset.md b/messages/agent.generate.testset.md
@@ -0,0 +1,11 @@
+# summary
+
+Interactively generate an AiEvaluationTestSet.
+
+# description
+
+Answer the prompts to generate an AiEvaluationTestSet that will be written to a file. You can then run "sf agent generate definition" to generate the AiEvaluationDefinition that can be used to evaluate the test set.
+
+# examples
+
+- <%= config.bin %> <%= command.id %>
diff --git a/package.json b/package.json
@@ -5,6 +5,7 @@
   "author": "Salesforce",
   "bugs": "https://github.com/forcedotcom/cli/issues",
   "dependencies": {
+    "@inquirer/confirm": "^5.1.0",
     "@inquirer/figures": "^1.0.7",
     "@inquirer/input": "^4.0.1",
     "@inquirer/select": "^4.0.1",
@@ -15,8 +16,8 @@
     "@salesforce/kit": "^3.2.1",
     "@salesforce/sf-plugins-core": "^12.1.0",
     "ansis": "^3.3.2",
-    "ink-text-input": "^6.0.0",
     "ink": "^5.0.1",
+    "ink-text-input": "^6.0.0",
     "react": "^18.3.1"
   },
   "devDependencies": {
@@ -26,10 +27,10 @@
     "@salesforce/dev-scripts": "^10.2.10",
     "@salesforce/plugin-command-reference": "^3.1.29",
     "@types/react": "^18.3.3",
-    "eslint-config-xo-react": "^0.27.0",
     "eslint-config-xo": "^0.45.0",
-    "eslint-plugin-react-hooks": "^4.6.2",
+    "eslint-config-xo-react": "^0.27.0",
     "eslint-plugin-react": "^7.34.3",
+    "eslint-plugin-react-hooks": "^4.6.2",
     "eslint-plugin-sf-plugin": "^1.20.9",
     "oclif": "^4.15.12",
     "ts-node": "^10.9.2",

diff --git a/src/commands/agent/generate/definition.ts b/src/commands/agent/generate/definition.ts
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { dirname, join } from 'node:path';
+import { mkdir, readdir, writeFile } from 'node:fs/promises';
+import { SfCommand } from '@salesforce/sf-plugins-core';
+import { Messages, SfError } from '@salesforce/core';
+import select from '@inquirer/select';
+import input from '@inquirer/input';
+import { theme } from '../../../inquirer-theme.js';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.definition');
+
+export default class AgentGenerateDefinition extends SfCommand<void> {
+  public static readonly summary = messages.getMessage('summary');
+  public static readonly description = messages.getMessage('description');
+  public static readonly examples = messages.getMessages('examples');
+  public static readonly enableJsonFlag = false;
+  public static readonly state = 'beta';
+
+  public async run(): Promise<void> {
+    const testSetDir = join('force-app', 'main', 'default', 'aiEvaluationTestSets');
+    const testSets = (await readdir(testSetDir)).map((testSet) => testSet.replace('.xml', ''));
+    if (testSets.length === 0) {
+      throw new SfError(`No test sets found in ${testSetDir}`, 'NoTestSetsFoundError', [
+        'Run the "sf agent generate testset" command to create a test set',
+      ]);
+    }
+
+    const botsDir = join('force-app', 'main', 'default', 'bots');
+    const bots = await readdir(botsDir);
+    if (bots.length === 0) {
+      throw new SfError(`No bots found in ${botsDir}`, 'NoBotsFoundError');
+    }
+
+    const testSet = await select<string>({
+      message: 'Select the AiEvaluationTestSet to use',
+      choices: testSets,
+      theme,
+    });
+
+    const bot = await select<string>({
+      message: 'Select the Bot to run the tests against',
+      choices: bots,
+      theme,
+    });
+
+    const name = await input({
+      message: 'Enter a name for the AiEvaluationDefinition',
+      validate: (i: string): string | boolean => (i.length > 0 ? true : 'Name cannot be empty'),
+      theme,
+    });
+
+    const description = await input({
+      message: 'Enter a description for the AiEvaluationDefinition',
+      theme,
+    });
+
+    const subjectType = await select<string>({
+      message: 'Select the type for the AiEvaluationDefinition',
+      choices: ['AGENT'],
+      theme,
+    });
+
+    this.log(`Generating AiEvaluationDefinition for ${bot} using ${testSet} AiEvaluationTestSet`);
+
+    const xml = `<?xml version="1.0" encoding="UTF-8"?>
+<AiEvaluationDefinition xmlns="http://soap.sforce.com/2006/04/metadata">
+    ${description ? `<description>${description}</description>` : ''}
+    <name>${name}</name>
+    <subjectType>${subjectType}</subjectType>
+    <subjectName>${bot}</subjectName>
+    <testSetName>${testSet}</testSetName>
+</AiEvaluationDefinition>`;
+
+    // remove all empty lines
+    const cleanedXml = xml.replace(/^\s*[\r\n]/gm, '');
+
+    const definitionPath = join('force-app', 'main', 'default', 'aiEvaluationDefinitions', `${name}.xml`);
+    await mkdir(dirname(definitionPath), { recursive: true });
+    this.log(`Writing AiEvaluationDefinition to ${definitionPath}`);
+    await writeFile(definitionPath, cleanedXml);
+  }
+}
diff --git a/src/commands/agent/generate/spec.ts b/src/commands/agent/generate/spec.ts
@@ -14,6 +14,7 @@ import select from '@inquirer/select';
 import inquirerInput from '@inquirer/input';
 import figures from '@inquirer/figures';
 import { Agent, AgentCreateConfig, SfAgent } from '@salesforce/agents';
+import { theme } from '../../../inquirer-theme.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.spec');
@@ -206,18 +207,14 @@ export default class AgentCreateSpec extends SfCommand<AgentCreateSpecResult> {
       return select({
         choices: flagDef.options.map((o) => ({ name: o, value: o })),
         message,
-        theme: {
-          prefix: { idle: ansis.blueBright('?') },
-        },
+        theme,
       });
     }
 
     return inquirerInput({
       message,
       validate: flagDef.validate,
-      theme: {
-        prefix: { idle: ansis.blueBright('?') },
-      },
+      theme,
     });
   }
 }
diff --git a/src/commands/agent/generate/testset.ts b/src/commands/agent/generate/testset.ts
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { dirname, join } from 'node:path';
+import { mkdir, writeFile } from 'node:fs/promises';
+import { SfCommand } from '@salesforce/sf-plugins-core';
+import { Messages } from '@salesforce/core';
+import input from '@inquirer/input';
+import select from '@inquirer/select';
+import confirm from '@inquirer/confirm';
+import { theme } from '../../../inquirer-theme.js';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset');
+
+type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating';
+
+export type TestSetInputs = {
+  utterance: string;
+  expectationType: ExpectationType;
+  expectedValue: string;
+};
+
+async function promptForTestCase(): Promise<TestSetInputs> {
+  const utterance = await input({
+    message: 'What utterance would you like to test?',
+    validate: (d: string): boolean | string => d.length > 0 || 'utterance cannot be empty',
+    theme,
+  });
+
+  const expectationType = await select<ExpectationType>({
+    message: 'What type of expectation would you like to test for the utterance?',
+    choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'],
+    theme,
+  });
+
+  const expectedValue = await input({
+    message: 'What is the expected value for the expectation?',
+    validate: (d: string): boolean | string => {
+      if (!d.length) {
+        return 'expected value cannot be empty';
+      }
+
+      if (expectationType === 'action_sequence_match') {
+        return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions';
+      }
+
+      return true;
+    },
+    theme,
+  });
+
+  return {
+    utterance,
+    expectationType,
+    expectedValue,
+  };
+}
+
+export function constructTestSetXML(testCases: TestSetInputs[]): string {
+  const tab = '  ';
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<AiEvaluationTestSet>\n${tab}<subjectType>AGENT</subjectType>\n`;
+  testCases.forEach((testCase, i) => {
+    const expectedValue =
+      testCase.expectationType === 'action_sequence_match'
+        ? `[${testCase.expectedValue
+            .split(',')
+            .map((v) => `"${v}"`)
+            .join(',')}]`
+        : testCase.expectedValue;
+    xml += `  <testCase>
+    <number>${i + 1}</number>
+    <inputs>
+      <utterance>${testCase.utterance}</utterance>
+    </inputs>
+    <expectations>
+      <expectation>
+        <name>${testCase.expectationType}</name>
+        <expectedValue>${expectedValue}</expectedValue>
+      </expectation>
+    </expectations>
+  </testCase>\n`;
+  });
+  xml += '</AiEvaluationTestSet>';
+  return xml;
+}
+
+export default class AgentGenerateTestset extends SfCommand<void> {
+  public static readonly summary = messages.getMessage('summary');
+  public static readonly description = messages.getMessage('description');
+  public static readonly examples = messages.getMessages('examples');
+  public static readonly enableJsonFlag = false;
+  public static readonly state = 'beta';
+
+  public async run(): Promise<void> {
+    const testSetName = await input({
+      message: 'What is the name of the test set?',
+    });
+    const testCases = [];
+    do {
+      this.log();
+      this.styledHeader(`Adding test case #${testCases.length + 1}`);
+      // eslint-disable-next-line no-await-in-loop
+      testCases.push(await promptForTestCase());
+    } while ( // eslint-disable-next-line no-await-in-loop
+      await confirm({
+        message: 'Would you like to add another test case?',
+        default: true,
+      })
+    );
+
+    const testSetPath = join('force-app', 'main', 'default', 'aiEvaluationTestsets', `${testSetName}.xml`);
+    await mkdir(dirname(testSetPath), { recursive: true });
+    this.log();
+    this.log(`Writing new AiEvaluationTestSet to ${testSetPath}`);
+    await writeFile(testSetPath, constructTestSetXML(testCases));
+  }
+}
diff --git a/src/inquirer-theme.ts b/src/inquirer-theme.ts
@@ -0,0 +1,11 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import ansis from 'ansis';
+
+export const theme = {
+  prefix: { idle: ansis.blueBright('?') },
+};