From 05da890bdf564dff5baf232b36ad62244ef2b38b Mon Sep 17 00:00:00 2001 From: Rinat Zaripov Date: Sun, 21 Jan 2024 19:18:29 +1100 Subject: [PATCH] feat: evaluate refactor outcomes using LLM to make decision of whether file edit should be accepted or discarded (#14) --- .changeset/witty-singers-sort.md | 56 ++ .gitignore | 6 +- .refactor-bot/benchmarks/test-benchmark.yaml | 22 + .../refactors/test-likely-failure/goal.md | 4 +- README.md | 25 +- package.json | 1 + .../refactor-bot/src/benchmark/benchmark.ts | 102 +++ .../src/benchmark/benchmarkConfig.ts | 61 ++ packages/refactor-bot/src/benchmark/cli.ts | 80 ++ .../refactor-bot/src/benchmark/cliHandler.ts | 12 + .../src/benchmark/example-config.yaml | 22 + .../src/benchmark/loadBenchmarkConfig.ts | 23 + .../src/benchmark/loadRefactorResult.ts | 36 + .../src/benchmark/refactorResultSchema.ts | 36 + .../src/benchmark/reportBenchmarkSummary.ts | 68 ++ .../refactor-bot/src/benchmark/runVariant.ts | 204 +++++ .../src/benchmark/summarizeCosts.ts | 40 + .../src/benchmark/summarizeOutcomeScore.ts | 56 ++ .../src/benchmark/summarizePerformance.ts | 21 + .../src/benchmark/summarizeRefactorResult.ts | 49 ++ packages/refactor-bot/src/bootstrap.ts | 10 +- packages/refactor-bot/src/cache/cache.ts | 50 +- packages/refactor-bot/src/cache/log.ts | 9 +- .../src/cache/makeCachedFunction.ts | 2 + .../src/cache/makeCachedObservable.test.ts | 8 +- .../src/cache/makeCachedObservable.ts | 21 +- .../refactor-bot/src/cache/persistence.ts | 14 +- packages/refactor-bot/src/cache/state.ts | 65 +- packages/refactor-bot/src/chat-gpt/api.ts | 10 +- .../src/child-process/spawnResult.ts | 5 + .../src/evaluate/evaluateFileChanges.ts | 215 ++++++ .../src/evaluate/evaluateFileScore.ts | 85 +++ .../src/evaluate/evaluateRefactorResult.ts | 267 +++++++ .../src/evaluate/evaluateUnchangedFile.ts | 181 +++++ .../src/evaluate/extractRequirements.ts | 115 +++ .../refactor-bot/src/evaluate/utils/avg.ts | 5 + .../refactor-bot/src/evaluate/utils/sum.ts | 3 + .../src/git/gitShowFileCommitSummary.ts | 41 + .../refactor-bot/src/logger/asyncFormatter.ts | 2 +- .../refactor-bot/src/logger/formatters.ts | 29 + .../src/package-manager/runCheckCommand.ts | 25 +- packages/refactor-bot/src/playground.ts | 69 +- .../src/prompt-formatters/formatBulletList.ts | 17 + .../formatFencedCodeBlock.ts | 26 + .../prompt-formatters/formatFileContents.ts | 42 ++ .../src/prompt-formatters/formatFileDiff.ts | 8 + .../src/prompt-formatters/formatOptional.ts | 12 + .../src/prompt-formatters/formatZodError.ts | 19 + packages/refactor-bot/src/prompt/cli.ts | 6 + packages/refactor-bot/src/prompt/run.ts | 3 +- .../src/refactor/actions/acceptedEdit.ts | 4 +- .../src/refactor/actions/discardedEdit.ts | 4 +- packages/refactor-bot/src/refactor/check.ts | 7 +- .../src/refactor/checkoutSandbox.ts | 5 - packages/refactor-bot/src/refactor/cli.ts | 21 +- .../src/refactor/collectLlmUsage.ts | 145 ++++ .../src/refactor/determineModelParameters.ts | 32 +- .../refactor-bot/src/refactor/filesToEdit.ts | 110 +++ .../src/refactor/findRefactorStateLocation.ts | 44 ++ .../src/refactor/planAndRefactor.ts | 26 +- packages/refactor-bot/src/refactor/prompt.ts | 133 +++- .../refactor-bot/src/refactor/refactor.ts | 88 +-- .../src/refactor/refactorBatch.ts | 98 +-- .../refactor-bot/src/refactor/refactorFile.ts | 710 ++++++++++-------- .../refactor-bot/src/refactor/refactorGoal.ts | 1 + .../src/refactor/resetToLastAcceptedCommit.ts | 9 +- .../src/refactor/resultsCollector.ts | 195 +++-- .../src/refactor/retrieveParameters.ts | 155 +--- .../refactor-bot/src/refactor/runRefactor.ts | 168 +++-- packages/refactor-bot/src/refactor/types.ts | 240 +++--- .../parseFencedCodeBlocks.test.ts | 142 ++++ .../response-parsers/parseFencedCodeBlocks.ts | 60 ++ .../parseJsonResponse.test.ts | 58 ++ .../src/response-parsers/parseJsonResponse.ts | 52 ++ packages/refactor-bot/src/utils/hasOne.ts | 6 +- .../src/utils/isFileNotFoundError.ts | 5 + .../refactor-bot/src/utils/parseJsonSchema.ts | 22 + packages/refactor-bot/tsconfig.json | 2 +- 78 files changed, 3872 insertions(+), 958 deletions(-) create mode 100644 .changeset/witty-singers-sort.md create mode 100644 .refactor-bot/benchmarks/test-benchmark.yaml create mode 100644 packages/refactor-bot/src/benchmark/benchmark.ts create mode 100644 packages/refactor-bot/src/benchmark/benchmarkConfig.ts create mode 100644 packages/refactor-bot/src/benchmark/cli.ts create mode 100644 packages/refactor-bot/src/benchmark/cliHandler.ts create mode 100644 packages/refactor-bot/src/benchmark/example-config.yaml create mode 100644 packages/refactor-bot/src/benchmark/loadBenchmarkConfig.ts create mode 100644 packages/refactor-bot/src/benchmark/loadRefactorResult.ts create mode 100644 packages/refactor-bot/src/benchmark/refactorResultSchema.ts create mode 100644 packages/refactor-bot/src/benchmark/reportBenchmarkSummary.ts create mode 100644 packages/refactor-bot/src/benchmark/runVariant.ts create mode 100644 packages/refactor-bot/src/benchmark/summarizeCosts.ts create mode 100644 packages/refactor-bot/src/benchmark/summarizeOutcomeScore.ts create mode 100644 packages/refactor-bot/src/benchmark/summarizePerformance.ts create mode 100644 packages/refactor-bot/src/benchmark/summarizeRefactorResult.ts create mode 100644 packages/refactor-bot/src/evaluate/evaluateFileChanges.ts create mode 100644 packages/refactor-bot/src/evaluate/evaluateFileScore.ts create mode 100644 packages/refactor-bot/src/evaluate/evaluateRefactorResult.ts create mode 100644 packages/refactor-bot/src/evaluate/evaluateUnchangedFile.ts create mode 100644 packages/refactor-bot/src/evaluate/extractRequirements.ts create mode 100644 packages/refactor-bot/src/evaluate/utils/avg.ts create mode 100644 packages/refactor-bot/src/evaluate/utils/sum.ts create mode 100644 packages/refactor-bot/src/git/gitShowFileCommitSummary.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatBulletList.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatFencedCodeBlock.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatFileContents.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatFileDiff.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatOptional.ts create mode 100644 packages/refactor-bot/src/prompt-formatters/formatZodError.ts create mode 100644 packages/refactor-bot/src/refactor/collectLlmUsage.ts create mode 100644 packages/refactor-bot/src/refactor/filesToEdit.ts create mode 100644 packages/refactor-bot/src/refactor/findRefactorStateLocation.ts create mode 100644 packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.test.ts create mode 100644 packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.ts create mode 100644 packages/refactor-bot/src/response-parsers/parseJsonResponse.test.ts create mode 100644 packages/refactor-bot/src/response-parsers/parseJsonResponse.ts create mode 100644 packages/refactor-bot/src/utils/isFileNotFoundError.ts create mode 100644 packages/refactor-bot/src/utils/parseJsonSchema.ts diff --git a/.changeset/witty-singers-sort.md b/.changeset/witty-singers-sort.md new file mode 100644 index 0000000..f58b37a --- /dev/null +++ b/.changeset/witty-singers-sort.md @@ -0,0 +1,56 @@ +--- +'refactor-bot': patch +--- + +feat: evaluate refactor outcomes using LLM to make decision of whether file edit +should be accepted or discarded + +This is a big change which adds extra steps to the refactor process. Every time +an LLM produces a file edit - we will pass that edit through evaluation +algorithm to asses whether it should be accepted or discarded. Previously, this +logic was only affected by the existence or absence of eslint errors. This will +make the final result higher quality and more reliable. + +The new behavior can be disabled by setting `evaluate: false` in the `goal.md` +file. + +In addition to that, this change also adds a new CLI command for internal use +which allows us to compare results of multiple refactor runs. This is useful for +benchmarking purposes. + +To run the benchmark, use the following command: + +```sh +pnpm benchmark --config .refactor-bot/benchmarks/test-benchmark.yaml +``` + +Where the config: + +```yaml +refactorConfig: + name: test-refactoring + ref: 8f1a3da55caeee3df75853042e57978c45513f18 + budgetCents: 100 + model: gpt-4-1106-preview + objective: + Replace all usages of `readFile` from `fs/promises` module with + `readFileSync` from `fs` module in + `packages/refactor-bot/src/refactor/planTasks.ts`, + `packages/refactor-bot/src/refactor/loadRefactors.ts` and + `packages/refactor-bot/src/refactor/discoverDependencies.ts`. + +numberOfRuns: 2 + +variants: + - name: 'A' + ids: # ids of refactor runs to save mooney on + - VRixXEwC + - k0FmgQjU + - IpSOtP7d + - xqydSrSU + - name: 'B' +``` + +This will run multiple refactor runs and compare the results. At this moment no +statistical analysis is performed as I'm not convinced we can reach statistical +significance with the number of runs that also doesn't make you poor. diff --git a/.gitignore b/.gitignore index c270442..aed0928 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,9 @@ node_modules .tsc-out src/test.ts -.refactor-bot/refactors/*/state .DS_Store -.refactor-bot/prompts/playground.md dist +.refactor-bot/benchmarks/state +.refactor-bot/refactors/*/state +.refactor-bot/prompts/playground.md +.refactor-bot/playground-cache \ No newline at end of file diff --git a/.refactor-bot/benchmarks/test-benchmark.yaml b/.refactor-bot/benchmarks/test-benchmark.yaml new file mode 100644 index 0000000..1faa309 --- /dev/null +++ b/.refactor-bot/benchmarks/test-benchmark.yaml @@ -0,0 +1,22 @@ +refactorConfig: + name: test-refactoring + ref: 8f1a3da55caeee3df75853042e57978c45513f18 + budgetCents: 100 + model: gpt-4-1106-preview + objective: + Replace all usages of `readFile` from `fs/promises` module with + `readFileSync` from `fs` module in + `packages/refactor-bot/src/refactor/planTasks.ts`, + `packages/refactor-bot/src/refactor/loadRefactors.ts` and + `packages/refactor-bot/src/refactor/discoverDependencies.ts`. + +numberOfRuns: 2 + +variants: + - name: 'A' + ids: + - VRixXEwC + - k0FmgQjU + - IpSOtP7d + - xqydSrSU + - name: 'B' diff --git a/.refactor-bot/refactors/test-likely-failure/goal.md b/.refactor-bot/refactors/test-likely-failure/goal.md index a8a3041..3c4d5fd 100644 --- a/.refactor-bot/refactors/test-likely-failure/goal.md +++ b/.refactor-bot/refactors/test-likely-failure/goal.md @@ -2,8 +2,8 @@ # This is to test a likely failure during refactor ref: 8f1a3da55caeee3df75853042e57978c45513f18 budgetCents: 100 -model: gpt-3.5-turbo +model: gpt-3.5-turbo-1106 ``` Replace all usages of `readFile` from `fs/promises` module with `readFileSync` -from `fs` module in `packages/refactor-bot/src/pipeline/dependencies.ts`. +from `fs` module in `packages/refactor-bot/src/cache/dependencies.ts`. diff --git a/README.md b/README.md index 2fc8643..64e9666 100644 --- a/README.md +++ b/README.md @@ -80,20 +80,15 @@ Options: Performs a refactoring using Plan and Execute technique Options: - --help Show help [boolean] - --version Show version number [boolean] - --name Name of the refactoring to run [string] - --id Unique id of the refactoring that was previously run but d - idn't finish to start from last successful point [string] - --save-to-cache Whether to enable saving results to the cache, by default - it's enabled [boolean] [default: true] - --enable-cache-for Enable cache for specific steps only, can be useful if we - want to disable cache for all other steps and replay them - [array] - --costs Whether to print the total costs of OpenAI requests, by de - fault it's disabled [boolean] [default: false] - --performance Whether to print performance metrics, by default it's disa - bled [boolean] [default: false] + --help Show help [boolean] + --version Show version number [boolean] + --name Name of the refactoring to run [string] + --id Unique id of the refactoring that was previously run but didn't + finish - use this to start from last successful step [string] + --costs Whether to print the total costs of OpenAI requests, by default + it's disabled [boolean] [default: false] + --performance Whether to print performance metrics, by default it's disabled + [boolean] [default: false] ``` At first it will create a file for you with description of the refactor. Open @@ -183,7 +178,7 @@ further, but there is no way to measure how much we improved it. So the next step is to add a way to measure the quality of the refactor for benchmarking purposes. -- [ ] ability to evaluate result of the refactor to benchmark the quality of +- [x] ability to evaluate result of the refactor to benchmark the quality of the refactor, so we can asses how different changes affect the quality and performance of the refactor-bot - [ ] add Chain Of Thoughts to try to recover from failed attempts to fix diff --git a/package.json b/package.json index 0698a61..f54b5df 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ }, "type": "module", "scripts": { + "benchmark": "pnpm tsx packages/refactor-bot/src/benchmark/cli.ts", "refactor-bot-bundled": "node ./packages/refactor-bot/dist/bin/refactor-bot.mjs" }, "dependencies": {}, diff --git a/packages/refactor-bot/src/benchmark/benchmark.ts b/packages/refactor-bot/src/benchmark/benchmark.ts new file mode 100644 index 0000000..2c826fe --- /dev/null +++ b/packages/refactor-bot/src/benchmark/benchmark.ts @@ -0,0 +1,102 @@ +import orderBy from 'lodash-es/orderBy'; +import { join } from 'path'; +import { from, lastValueFrom, mergeMap, toArray } from 'rxjs'; + +import { createCachedPipeline } from '../cache/state'; +import type { CacheStateRef } from '../cache/types'; +import { logger } from '../logger/logger'; +import { randomText } from '../utils/randomText'; +import { loadBenchmarkConfig } from './loadBenchmarkConfig'; +import { reportBenchmarkSummary } from './reportBenchmarkSummary'; +import { runVariant } from './runVariant'; +import { summarizeRefactorResult } from './summarizeRefactorResult'; + +export async function benchmark(opts: { + config: string; + id?: string; + // for debugging: + saveToCache?: boolean; + enableCacheFor?: string[]; + disableCacheFor?: string[]; +}) { + const config = await loadBenchmarkConfig(opts.config); + + const id = opts.id ?? randomText(8); + + const runVariantsAndCompare = async ( + input: typeof config, + ctx?: CacheStateRef + ) => { + const variantAtATime = Math.max( + 1, + input.maxConcurrentRefactors / input.variants.length + ); + const maxConcurrentRefactorsPerVariant = Math.max( + 1, + input.maxConcurrentRefactors / variantAtATime + ); + + logger.debug('Running refactors for multiple variants', { + maxConcurrentRefactors: input.maxConcurrentRefactors, + variantAtATime, + maxConcurrentRefactorsPerVariant, + }); + + const results = await lastValueFrom( + from(input.variants).pipe( + mergeMap(async (variant) => { + const { resultFilePaths } = await runVariant( + { + id, + variant, + numberOfRuns: input.numberOfRuns, + evaluationConfig: input.evaluationConfig, + maxConcurrentRefactors: + maxConcurrentRefactorsPerVariant, + refactorConfig: { + ...input.refactorConfig, + ...config.refactorConfig, + }, + }, + ctx + ); + + return { + variant: variant.name, + resultFilePaths, + }; + }, variantAtATime), + toArray() + ) + ); + + logger.debug('Finished running refactors for multiple variants', { + results, + }); + + const summaries = await Promise.all( + results.map(async (result) => ({ + variant: result.variant, + summary: await summarizeRefactorResult({ + resultFilePaths: result.resultFilePaths, + }), + })) + ); + + const orderedSummaries = orderBy(summaries, ['variant']); + + await reportBenchmarkSummary({ + summaries: orderedSummaries, + }); + }; + + const { execute } = createCachedPipeline({ + location: join(`.refactor-bot/benchmarks/state`, id), + saveToCache: opts.saveToCache ?? true, + enableCacheFor: opts.enableCacheFor, + disableCacheFor: opts.disableCacheFor, + pipeline: runVariantsAndCompare, + }); + + return await execute(config); +} diff --git a/packages/refactor-bot/src/benchmark/benchmarkConfig.ts b/packages/refactor-bot/src/benchmark/benchmarkConfig.ts new file mode 100644 index 0000000..47d9b0a --- /dev/null +++ b/packages/refactor-bot/src/benchmark/benchmarkConfig.ts @@ -0,0 +1,61 @@ +import { z } from 'zod'; + +import { modelsSchema } from '../chat-gpt/api'; +import { ensureHasTwoElements } from '../utils/hasOne'; + +export const passthroughRefactorConfigSchema = z + .object({ + name: z.string(), + model: modelsSchema, + objective: z.string(), + }) + .passthrough(); + +const partialRefactorConfigSchema = z + .object({ + model: modelsSchema.optional(), + objective: z.string().optional(), + }) + .passthrough(); + +export const appVariantSchema = z.object({ + name: z.string().regex(/^[a-z0-9-]+$/i), + ref: z.string().optional(), + repository: z.string().optional(), + ids: z.array(z.string()).optional(), + command: z + .array(z.string()) + .nonempty() + .default(['pnpm', 'refactor-bot', 'refactor']), + refactorConfig: partialRefactorConfigSchema.optional(), +}); + +export const evaluationConfigSchema = z.object({ + model: modelsSchema, + choices: z.number().default(3), +}); + +export const benchConfigSchema = z + .object({ + variants: z + .array(appVariantSchema) + .transform((variants) => ensureHasTwoElements(variants)), + refactorConfig: passthroughRefactorConfigSchema, + evaluationConfig: evaluationConfigSchema.default({ + model: 'gpt-4-1106-preview', + choices: 3, + }), + numberOfRuns: z.number().default(1), + maxConcurrentRefactors: z.number().default(4), + }) + .transform((input, ctx) => { + const variants = new Set(input.variants.map((variant) => variant.name)); + if (variants.size !== input.variants.length) { + ctx.addIssue({ + code: 'custom', + message: 'Variants must have unique names', + }); + return z.NEVER; + } + return input; + }); diff --git a/packages/refactor-bot/src/benchmark/cli.ts b/packages/refactor-bot/src/benchmark/cli.ts new file mode 100644 index 0000000..b10bae3 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/cli.ts @@ -0,0 +1,80 @@ +import type { ArgumentsCamelCase, Argv, CommandModule } from 'yargs'; +import yargs from 'yargs'; + +import { line } from '../text/line'; + +const builder = (yargs: Argv) => + yargs + .option('id', { + type: 'string', + describe: line` + Unique id of the benchmark run to identify cache directory + `, + }) + .option('config', { + type: 'string', + describe: line` + Path to the config yaml file containing benchmark configuration + `, + demandOption: true, + }) + .option('save-to-cache', { + type: 'boolean', + describe: line` + Whether to enable saving results to the cache, by default + it's enabled. + `, + default: true, + hidden: true, + }) + .option('enable-cache-for', { + type: 'string', + array: true, + describe: line` + Disable cache for specific steps - you can specify the name + of the step or a name followed by a hash of the cache entry. + This for debugging purposes only. + `, + hidden: true, + }) + .option('disable-cache-for', { + type: 'string', + array: true, + describe: line` + Disable cache for specific steps - you can specify the name + of the step or a name followed by a hash of the cache entry. + This for debugging purposes only. + `, + hidden: true, + }); + +type Args = { + config: string; +}; + +const benchmarkCommand = { + command: 'benchmark', + describe: line` + Performs refactoring using different versions of the refactor bot then + evaluates the results and compares them. + `, + builder, + handler: async (opts: ArgumentsCamelCase) => { + await import('dotenv').then((m) => + m.config({ + override: true, + }) + ); + const { cliHandler } = await import('./cliHandler'); + await cliHandler(opts); + }, +} satisfies CommandModule, Args>; + +const opts = await builder(yargs(process.argv.slice(2))) + .usage(benchmarkCommand.describe) + .parseAsync(); + +await benchmarkCommand.handler(opts).catch((err) => { + console.error(err); + process.exitCode = 1; +}); diff --git a/packages/refactor-bot/src/benchmark/cliHandler.ts b/packages/refactor-bot/src/benchmark/cliHandler.ts new file mode 100644 index 0000000..c5a0a92 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/cliHandler.ts @@ -0,0 +1,12 @@ +import { benchmark } from './benchmark'; + +export async function cliHandler(opts: { + config: string; + id?: string; + // for debugging: + saveToCache?: boolean; + enableCacheFor?: string[]; + disableCacheFor?: string[]; +}) { + await benchmark(opts); +} diff --git a/packages/refactor-bot/src/benchmark/example-config.yaml b/packages/refactor-bot/src/benchmark/example-config.yaml new file mode 100644 index 0000000..afb87c0 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/example-config.yaml @@ -0,0 +1,22 @@ +refactorConfig: + name: 'refactor-this-or-that' + objective: 'Refactor this or that' + model: 'gpt-4-1106-preview' + +evaluationConfig: + model: 'gpt-4-1106-preview' + choices: 3 + +numberOfRuns: 3 + +variants: + - name: 'A' + ref: 'HEAD' + + - name: 'B' + ref: 'HEAD' + command: + - pnpm + - refactor-bot + - refactor + - --name refactor-this-or-that diff --git a/packages/refactor-bot/src/benchmark/loadBenchmarkConfig.ts b/packages/refactor-bot/src/benchmark/loadBenchmarkConfig.ts new file mode 100644 index 0000000..43483a7 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/loadBenchmarkConfig.ts @@ -0,0 +1,23 @@ +import { readFile } from 'fs/promises'; +import { load } from 'js-yaml'; + +import { ConfigurationError } from '../errors/configurationError'; +import { isFileNotFoundError } from '../utils/isFileNotFoundError'; +import { benchConfigSchema } from './benchmarkConfig'; + +export async function loadBenchmarkConfig(filePath: string) { + try { + const text = await readFile(filePath, 'utf-8'); + const data = load(text); + const parsed = benchConfigSchema.parse(data); + return parsed; + } catch (err) { + if (isFileNotFoundError(err)) { + throw new ConfigurationError(`Config file not found`, { + cause: err, + }); + } + + throw err; + } +} diff --git a/packages/refactor-bot/src/benchmark/loadRefactorResult.ts b/packages/refactor-bot/src/benchmark/loadRefactorResult.ts new file mode 100644 index 0000000..d23f1f5 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/loadRefactorResult.ts @@ -0,0 +1,36 @@ +import { readFile } from 'fs/promises'; +import { load } from 'js-yaml'; +import { z } from 'zod'; + +import { ConfigurationError } from '../errors/configurationError'; +import { line } from '../text/line'; +import { isFileNotFoundError } from '../utils/isFileNotFoundError'; +import { refactorResultSchema } from './refactorResultSchema'; + +export async function loadRefactorResult(opts: { resultFilePath: string }) { + try { + return refactorResultSchema.parse( + load(await readFile(opts.resultFilePath, 'utf-8')) + ); + } catch (err) { + if (isFileNotFoundError(err)) { + return undefined; + } + + if (err instanceof z.ZodError) { + throw new ConfigurationError( + line` + Failed to parse result of the refactor at + "${opts.resultFilePath}" - when the schema changes + re-evaluating the refactor results + will not be possible + `, + { + cause: err, + } + ); + } + + throw err; + } +} diff --git a/packages/refactor-bot/src/benchmark/refactorResultSchema.ts b/packages/refactor-bot/src/benchmark/refactorResultSchema.ts new file mode 100644 index 0000000..9178a1d --- /dev/null +++ b/packages/refactor-bot/src/benchmark/refactorResultSchema.ts @@ -0,0 +1,36 @@ +import { z } from 'zod'; + +import { checkoutSandboxResultSchema } from '../refactor/checkoutSandbox'; +import { planFilesResultSchema } from '../refactor/planFiles'; +import { + llmUsageEntrySchema, + refactorFilesResultSchema, +} from '../refactor/types'; + +/** + * @note this is a copy of the refactor result schema that is used in the + * ../refactor/resultsCollector.ts file - we maintain a second copy here so + * that we can introduce breaking changes to the refactor result schema and + * "migrate" the old format to the new format using this schema here, if we + * have to. + */ +export const refactorResultSchema = z + .object({ + objective: z.string(), + status: z.enum(['success', 'failure']), + error: z.record(z.unknown()).optional(), + }) + .merge(checkoutSandboxResultSchema) + .merge(refactorFilesResultSchema) + .augment({ + planFilesResults: z.array(planFilesResultSchema).optional(), + usage: z.array(llmUsageEntrySchema), + performance: z.object({ + totalDurationMs: z.number(), + durationMsByStep: z.record( + z.object({ + durationMs: z.number(), + }) + ), + }), + }); diff --git a/packages/refactor-bot/src/benchmark/reportBenchmarkSummary.ts b/packages/refactor-bot/src/benchmark/reportBenchmarkSummary.ts new file mode 100644 index 0000000..de9b90c --- /dev/null +++ b/packages/refactor-bot/src/benchmark/reportBenchmarkSummary.ts @@ -0,0 +1,68 @@ +import assert from 'assert'; +import { formatWithOptions } from 'util'; + +import { markdown, printMarkdown } from '../markdown/markdown'; +import { format } from '../text/format'; +import { lowerCamelCaseToKebabCase } from '../utils/lowerCamelCaseToKebabCase'; +import type { summarizeRefactorResult } from './summarizeRefactorResult'; + +function formatColumn(_: string, column: unknown) { + if (typeof column === 'number') { + return column.toFixed(2); + } + + return formatWithOptions({ colors: true }, '%O', column); +} + +export async function reportBenchmarkSummary(opts: { + summaries: Array<{ + variant: string; + summary: Awaited>; + }>; +}) { + const keys = opts.summaries.map(({ summary }) => Object.keys(summary))[0]; + assert(keys !== undefined); + assert(keys.length > 0); + + const header = [ + 'variant', + ...keys.map((key) => lowerCamelCaseToKebabCase(key)), + ].join(' | '); + const header2 = ['---', '---', ...keys.map(() => '---')].join(' | '); + const table = opts.summaries + .map(({ variant, summary }) => + [ + variant, + ...keys.map((key) => + formatColumn(key, (summary as Record)[key]) + ), + ].join(' | ') + ) + .join('\n'); + + await printMarkdown( + format( + markdown` + # Benchmark results + + %results% + + All columns averaged over \`numberOfRuns\`. + + \`score\` - outcome based metric from 0 to 1 where 1 represents + an outcome where every explicit requirement is satisfied and 0 + represents failed refactor + + \`acceptedRatio\` - ratio of number of files that were accepted + as a result of the refactor to the total number of files that + were changed + + \`wastedTokensRatio\` - number of tokens that were exchanged + with LLM, which produced an outcome that was later discarded + `, + { + results: [header, header2, table].join('\n'), + } + ) + ); +} diff --git a/packages/refactor-bot/src/benchmark/runVariant.ts b/packages/refactor-bot/src/benchmark/runVariant.ts new file mode 100644 index 0000000..2ac1439 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/runVariant.ts @@ -0,0 +1,204 @@ +import assert from 'assert'; +import dedent from 'dedent'; +import { mkdir, writeFile } from 'fs/promises'; +import { dump } from 'js-yaml'; +import { join } from 'path'; +import { lastValueFrom, mergeMap, range, toArray } from 'rxjs'; +import { z } from 'zod'; + +import { makeCachedFunction } from '../cache/makeCachedFunction'; +import { spawnResult } from '../child-process/spawnResult'; +import { findRepositoryRoot } from '../file-system/findRepositoryRoot'; +import { logger } from '../logger/logger'; +import { checkoutSandbox } from '../refactor/checkoutSandbox'; +import { randomText } from '../utils/randomText'; +import { + appVariantSchema, + evaluationConfigSchema, + passthroughRefactorConfigSchema, +} from './benchmarkConfig'; + +export const runVariantSchema = z.object({ + id: z.string(), + variant: appVariantSchema, + refactorConfig: passthroughRefactorConfigSchema, + evaluationConfig: evaluationConfigSchema, + numberOfRuns: z.number(), + maxConcurrentRefactors: z.number(), +}); + +export const runVariant = makeCachedFunction({ + name: 'run-variant', + inputSchema: runVariantSchema, + resultSchema: z.object({ + resultFilePaths: z.array(z.string()), + }), + transform: async (input, ctx) => { + const repoRoot = await findRepositoryRoot(); + + const newRuns = input.numberOfRuns - (input.variant.ids?.length ?? 0); + + if (newRuns === 0) { + logger.debug('No new runs to start', { + numberOfRunsRequired: input.numberOfRuns, + numberOfPreEvaluatedRuns: input.variant.ids?.length ?? 0, + maxConcurrentRefactors: input.maxConcurrentRefactors, + }); + + const resultFilePaths = (input.variant.ids ?? []).map( + (refactorId) => + join( + repoRoot, + '.refactor-bot', + 'refactors', + input.refactorConfig.name, + 'state', + refactorId, + 'result.yaml' + ) + ); + + return { + resultFilePaths, + }; + } + + const cliSandbox = await checkoutSandbox( + { + name: ['bench', input.variant.name].join('-'), + id: input.id, + ref: input.variant.ref, + repository: input.variant.repository, + allowDirtyWorkingTree: true, + }, + ctx + ); + + const { objective, ...rest } = input.refactorConfig; + + await mkdir( + join( + cliSandbox.sandboxDirectoryPath, + '.refactor-bot', + 'refactors', + input.refactorConfig.name + ), + { recursive: true } + ); + + logger.debug('Created sandbox for the refactor CLI at', { + location: cliSandbox.sandboxDirectoryPath, + }); + + const goalMd = join( + cliSandbox.sandboxDirectoryPath, + '.refactor-bot', + 'refactors', + input.refactorConfig.name, + `goal.md` + ); + + await writeFile( + goalMd, + dedent` + \`\`\`yaml + ${dump(rest)} + \`\`\` + + ${objective} + ` + ); + + logger.debug('Written refactor config with an objective at', { + location: goalMd, + }); + + logger.debug('Starting refactoring process', { + numberOfRuns: input.numberOfRuns, + numberOfPreEvaluatedRuns: input.variant.ids?.length ?? 0, + maxConcurrentRefactors: input.maxConcurrentRefactors, + }); + + const results = await lastValueFrom( + range(0, newRuns).pipe( + mergeMap(async () => { + const refactorId = randomText(8); + + const result = await spawnResult( + input.variant.command[0], + input.variant.command + .slice(1) + .concat([ + '--name', + input.refactorConfig.name, + '--id', + refactorId, + ]), + { + cwd: cliSandbox.sandboxDirectoryPath, + output: [], + exitCodes: 'any', + stdio: 'inherit', + env: { + ...process.env, + LOG_RELATIVE_TO_CWD: process.cwd(), + CACHE_ROOT: repoRoot, + }, + } + ); + + logger.debug('Process finished', { + command: input.variant.command.join(' '), + pid: result.pid, + ...(result.error && { + error: result.error, + }), + ...(typeof result.status === 'number' && { + status: result.status, + }), + ...(typeof result.signal === 'string' && { + signal: result.signal, + }), + }); + + assert(!result.signal, 'Failed to run refactor command'); + + return { + resultFilePath: join( + repoRoot, + '.refactor-bot', + 'refactors', + input.refactorConfig.name, + 'state', + refactorId, + 'result.yaml' + ), + }; + }, input.maxConcurrentRefactors), + toArray() + ) + ); + + const resultFilePaths = (input.variant.ids ?? []) + .map((refactorId) => + join( + repoRoot, + '.refactor-bot', + 'refactors', + input.refactorConfig.name, + 'state', + refactorId, + 'result.yaml' + ) + ) + .concat(results.map((result) => result.resultFilePath)); + + logger.debug('Result file paths below', { + resultFilePaths, + }); + + return { + resultFilePaths, + }; + }, +}); diff --git a/packages/refactor-bot/src/benchmark/summarizeCosts.ts b/packages/refactor-bot/src/benchmark/summarizeCosts.ts new file mode 100644 index 0000000..45a3aee --- /dev/null +++ b/packages/refactor-bot/src/benchmark/summarizeCosts.ts @@ -0,0 +1,40 @@ +import type { z } from 'zod'; + +import { avg } from '../evaluate/utils/avg'; +import { sum } from '../evaluate/utils/sum'; +import { summarizeLlmUsageTokens } from '../refactor/collectLlmUsage'; +import type { refactorResultSchema } from './refactorResultSchema'; + +export function summarizeCosts(opts: { + results: Array>; +}) { + const summaries = opts.results.map((result) => { + const { totalTokens } = summarizeLlmUsageTokens(result); + + const wastedTokens = sum( + result.discarded.map((result) => + sum(result.usage.map((usage) => usage.usage.totalTokens)) + ) + ); + + return { + totalTokens, + wastedTokensRatio: wastedTokens / totalTokens, + }; + }); + + return { + /** + * Average number of tokens used in the refactor + */ + totalTokens: avg(summaries.map((summary) => summary.totalTokens)), + + /** + * Average ratio of tokens used to produce results which were discarded + * during the refactor + */ + wastedTokensRatio: avg( + summaries.map((summary) => summary.wastedTokensRatio) + ), + }; +} diff --git a/packages/refactor-bot/src/benchmark/summarizeOutcomeScore.ts b/packages/refactor-bot/src/benchmark/summarizeOutcomeScore.ts new file mode 100644 index 0000000..700afb2 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/summarizeOutcomeScore.ts @@ -0,0 +1,56 @@ +import type { z } from 'zod'; + +import { avg } from '../evaluate/utils/avg'; +import { summarizeRefactorFilesResult } from '../refactor/types'; +import type { refactorResultSchema } from './refactorResultSchema'; + +export function summarizeOutcomeScore(opts: { + results: Array>; +}) { + const summaries = opts.results.map((result) => { + const { accepted, discarded } = summarizeRefactorFilesResult(result); + + const acceptedFileNames = Object.keys(accepted.resultsByFilePaths); + const discardedFileNames = Object.keys(discarded.resultsByFilePaths); + + if (acceptedFileNames.length === 0 && discardedFileNames.length === 0) { + return { + score: 0, + acceptedRatio: 0, + }; + } + + const score = avg([ + ...Object.values(accepted.resultsByFilePaths).map((results) => + avg(results.map((result) => result.evaluation?.score ?? 0)) + ), + ...Object.values(discarded.resultsByFilePaths).map((results) => + avg(results.map((result) => result.evaluation?.score ?? 0)) + ), + ]); + + return { + score, + acceptedRatio: + acceptedFileNames.length / + (acceptedFileNames.length + discardedFileNames.length), + }; + }); + + return { + /** + * Average evaluation score for all refactors + * + * This score reflects whether the refactoring objective is fully + * complete or not. Value of 1 means that all requirements were met, + * value of 0 means that none of the requirements were met. + */ + score: avg(summaries.map((summary) => summary.score)), + + /** + * Average ratio of accepted file changes to the total number of + * files we attempted to refactor + */ + acceptedRatio: avg(summaries.map((summary) => summary.acceptedRatio)), + }; +} diff --git a/packages/refactor-bot/src/benchmark/summarizePerformance.ts b/packages/refactor-bot/src/benchmark/summarizePerformance.ts new file mode 100644 index 0000000..e9960ca --- /dev/null +++ b/packages/refactor-bot/src/benchmark/summarizePerformance.ts @@ -0,0 +1,21 @@ +import type { z } from 'zod'; + +import { avg } from '../evaluate/utils/avg'; +import type { refactorResultSchema } from './refactorResultSchema'; + +export function summarizePerformance(opts: { + results: Array>; +}) { + const summaries = opts.results.map((result) => { + return { + totalDurationMs: result.performance.totalDurationMs, + }; + }); + + return { + /** + * Average duration of the refactor + */ + durationMs: avg(summaries.map((summary) => summary.totalDurationMs)), + }; +} diff --git a/packages/refactor-bot/src/benchmark/summarizeRefactorResult.ts b/packages/refactor-bot/src/benchmark/summarizeRefactorResult.ts new file mode 100644 index 0000000..52becb6 --- /dev/null +++ b/packages/refactor-bot/src/benchmark/summarizeRefactorResult.ts @@ -0,0 +1,49 @@ +import { ConfigurationError } from '../errors/configurationError'; +import { line } from '../text/line'; +import { loadRefactorResult } from './loadRefactorResult'; +import { summarizeCosts } from './summarizeCosts'; +import { summarizeOutcomeScore } from './summarizeOutcomeScore'; +import { summarizePerformance } from './summarizePerformance'; + +export async function summarizeRefactorResult(opts: { + resultFilePaths: string[]; +}) { + const results = await Promise.all( + opts.resultFilePaths.map((resultFilePath) => + loadRefactorResult({ + resultFilePath, + }) + ) + ); + + const loadedResults = results.flatMap((result) => (result ? [result] : [])); + + if (loadedResults.length === 0) { + throw new ConfigurationError( + line` + No parsable refactor results found, this could happen when + refactoring has failed for all attempts of the given variant - + double check that the configuration is correct + ` + ); + } + + const outcomeScore = summarizeOutcomeScore({ + results: loadedResults, + }); + + const costs = summarizeCosts({ + results: loadedResults, + }); + + const performance = summarizePerformance({ + results: loadedResults, + }); + + return { + numberOfRuns: loadedResults.length, + ...outcomeScore, + ...costs, + ...performance, + }; +} diff --git a/packages/refactor-bot/src/bootstrap.ts b/packages/refactor-bot/src/bootstrap.ts index 36a889e..1556cf6 100644 --- a/packages/refactor-bot/src/bootstrap.ts +++ b/packages/refactor-bot/src/bootstrap.ts @@ -1,8 +1,16 @@ export async function bootstrap(load: () => Promise) { + if (process.argv.includes('--inspect')) { + await import('inspector').then((inspector) => { + console.log('Attach to PID', process.pid); + inspector.open(undefined, undefined, true); + // eslint-disable-next-line no-debugger + debugger; + }); + } await import('dotenv').then((dotenv) => dotenv.config({ override: true, }) ); - await load(); + return await load(); } diff --git a/packages/refactor-bot/src/cache/cache.ts b/packages/refactor-bot/src/cache/cache.ts index e6d8596..e98b5ef 100644 --- a/packages/refactor-bot/src/cache/cache.ts +++ b/packages/refactor-bot/src/cache/cache.ts @@ -56,7 +56,19 @@ export async function lookupEventsInCache< if ( opts.state.enableCacheFor && - !opts.state.enableCacheFor.includes(opts.name) + !opts.state.enableCacheFor.includes(opts.name) && + !opts.state.enableCacheFor.includes(basename(opts.key)) + ) { + return { + foundLocation: undefined, + foundEvents: undefined, + }; + } + + if ( + opts.state.disableCacheFor && + (opts.state.disableCacheFor.includes(opts.name) || + opts.state.disableCacheFor.includes(basename(opts.key))) ) { return { foundLocation: undefined, @@ -73,14 +85,21 @@ export async function lookupEventsInCache< } try { - const cwd = opts.location; + const cwd = opts.state.internal_unboundCacheLookup + ? opts.state.location ?? opts.location + : opts.location; /** * @note to simplify mocking there is only fg dependency, but * what we really needed here is fs.stats, but let's use the fg * so we don't have to mock fs.stats and sync it with the fg. */ const entries = await opts.state.deps.fg( - [`${basename(opts.key)}.yaml`], + opts.state.internal_unboundCacheLookup + ? [ + `${basename(opts.key)}.yaml`, + `**/${basename(opts.key)}.yaml`, + ] + : [`${basename(opts.key)}.yaml`], { cwd, ignore: [], @@ -156,6 +175,9 @@ export async function saveEventsToCache< } export async function cleanCache( + opts: { + cleanRoot?: boolean; + }, ctx: { location?: string }, deps = defaultDeps ) { @@ -183,7 +205,9 @@ export async function cleanCache( return; } - logger.trace('Cleaning', relative(process.cwd(), location)); + logger.trace('Cleaning', { + location, + }); /** * Filter out any directory or file that appears in the log @@ -218,17 +242,17 @@ export async function cleanCache( ), ]; - const filesAndDirs = await fg([`*.yaml`, `*`, ...include], { - cwd: location, - ignore, - onlyFiles: false, - }); + const filesAndDirs = await fg( + [...(opts.cleanRoot ? [`*.yaml`, `*`] : []), ...include], + { + cwd: location, + ignore, + onlyFiles: false, + } + ); for (const entry of filesAndDirs) { - logger.trace( - `Deleting`, - relative(process.cwd(), join(location, entry)) - ); + logger.trace(`Deleting`, join(location, entry)); if (entry.endsWith('.yaml')) { await unlink(join(location, entry)); diff --git a/packages/refactor-bot/src/cache/log.ts b/packages/refactor-bot/src/cache/log.ts index 16a57cd..5ea8e0e 100644 --- a/packages/refactor-bot/src/cache/log.ts +++ b/packages/refactor-bot/src/cache/log.ts @@ -1,5 +1,3 @@ -import { relative } from 'path'; - import { CycleDetectedError } from '../errors/cycleDetectedError'; import { type CacheState, getPipelineState } from './state'; import type { CacheStateRef } from './types'; @@ -38,9 +36,8 @@ export function addToExecutionLog(opts: { state: CacheState; key: string }) { export function logExecutionLog(ctx: CacheStateRef) { const state = getPipelineState(ctx); if (state) { - state.deps.logger.debug( - `Execution log:`, - state.log.map((entry) => relative(process.cwd(), entry)) - ); + state.deps.logger.debug(`Execution log:`, { + executionLog: state.log.map((entry) => entry), + }); } } diff --git a/packages/refactor-bot/src/cache/makeCachedFunction.ts b/packages/refactor-bot/src/cache/makeCachedFunction.ts index f1d052c..756849f 100644 --- a/packages/refactor-bot/src/cache/makeCachedFunction.ts +++ b/packages/refactor-bot/src/cache/makeCachedFunction.ts @@ -51,6 +51,8 @@ export function makeCachedFunction< }, ctx: CacheStateRef & { dispatch: (typeof defaultDeps)['dispatch']; + actions: (typeof defaultDeps)['actions']; + skipSavingToCache: (err: unknown) => void; } ) => Promise>; }, diff --git a/packages/refactor-bot/src/cache/makeCachedObservable.test.ts b/packages/refactor-bot/src/cache/makeCachedObservable.test.ts index b361048..713def9 100644 --- a/packages/refactor-bot/src/cache/makeCachedObservable.test.ts +++ b/packages/refactor-bot/src/cache/makeCachedObservable.test.ts @@ -102,7 +102,7 @@ const setup = () => { return makeCachedObservableToTest(...params); }) satisfies typeof makeCachedObservableToTest, cleanCache: ((...params) => { - params[1] = deps; + params[2] = deps; return cleanCacheToTest(...params); }) satisfies typeof cleanCache, }; @@ -922,7 +922,7 @@ it('should delete old cached files when clean is called', async () => { 'add-660e.yaml': expect.arrayContaining([addResult({ value: 1 })]), }); - await cleanCache(ctx); + await cleanCache({ cleanRoot: true }, ctx); expect(Object.fromEntries(files.entries())).toEqual({ 'add-10bb.yaml': expect.arrayContaining([addResult({ value: 3 })]), @@ -955,7 +955,7 @@ it('should clean only on executed levels', async () => { 'multiply-26e7' ); - await cleanCache(ctx, deps); + await cleanCache({ cleanRoot: true }, ctx, deps); /** * @note note that files with xxxx hash deleted but not files with xxyy hash @@ -974,7 +974,7 @@ it('should clean only on executed levels', async () => { 'sub-pipe-51ea/multiply-b956' ); - await cleanCache(ctx, deps); + await cleanCache({ cleanRoot: true }, ctx, deps); /** * @note note that files with xxyy hash deleted now diff --git a/packages/refactor-bot/src/cache/makeCachedObservable.ts b/packages/refactor-bot/src/cache/makeCachedObservable.ts index 9d57d5f..28c3f92 100644 --- a/packages/refactor-bot/src/cache/makeCachedObservable.ts +++ b/packages/refactor-bot/src/cache/makeCachedObservable.ts @@ -93,6 +93,8 @@ export function makeCachedObservable< }, ctx: CacheStateRef & { dispatch: (action: AnyAction) => void; + actions: (typeof defaultDeps)['actions']; + skipSavingToCache: (err: unknown) => void; } ) => Observable | AnyAction>; }, @@ -235,7 +237,7 @@ export function makeCachedObservable< `, { ...(location && { - location: relative(process.cwd(), location), + location, }), } ); @@ -252,12 +254,21 @@ export function makeCachedObservable< key, }); + let savingToCacheWasSkippedByTheFunctionCall = false; + + const skipSavingToCache = () => { + deps.logger.trace(`Saving events has been skipped`, { + location: location || '.', + }); + savingToCacheWasSkippedByTheFunctionCall = true; + }; + const resultingEvents = concat( of( executionStarted({ name, key, - input, + input: validatedInput, }) ), opts.factory(validatedInput, { @@ -266,6 +277,8 @@ export function makeCachedObservable< location: join(location, basename(key)), }), dispatch: recordAndDispatch, + actions: deps.actions, + skipSavingToCache, }) as Observable, of( executionSuccess({ @@ -291,6 +304,10 @@ export function makeCachedObservable< ); const saveResultingEvents = defer(async () => { + if (savingToCacheWasSkippedByTheFunctionCall) { + return; + } + await saveEventsToCache({ events, eventsSchema, diff --git a/packages/refactor-bot/src/cache/persistence.ts b/packages/refactor-bot/src/cache/persistence.ts index 52c5d2e..084a7b8 100644 --- a/packages/refactor-bot/src/cache/persistence.ts +++ b/packages/refactor-bot/src/cache/persistence.ts @@ -1,4 +1,4 @@ -import { dirname, relative } from 'path'; +import { dirname } from 'path'; import type { z, ZodType } from 'zod'; import { type TypeOf } from 'zod'; @@ -54,9 +54,9 @@ export const saveEvents = async < ) => { const { logger } = deps; - logger.trace( - `Saving events to "${relative(process.cwd(), opts.location)}"` - ); + logger.trace(`Saving events`, { + location: opts.location, + }); await save( { @@ -79,9 +79,9 @@ export const loadEvents = async < }, deps = defaultDeps ) => { - deps.logger.trace( - `Loading events from "${relative(process.cwd(), opts.location)}"` - ); + deps.logger.trace(`Loading events`, { + location: opts.location, + }); const events = await load( { diff --git a/packages/refactor-bot/src/cache/state.ts b/packages/refactor-bot/src/cache/state.ts index c7ba8ed..107a258 100644 --- a/packages/refactor-bot/src/cache/state.ts +++ b/packages/refactor-bot/src/cache/state.ts @@ -1,5 +1,4 @@ import { AsyncLocalStorage } from 'async_hooks'; -import { relative } from 'path'; import type { AnyAction } from '../event-bus'; import { line } from '../text/line'; @@ -40,12 +39,27 @@ export type CacheState = { readonly saveToCache: boolean; /** - * Whether to load the cache for the given function or not, this allows us - * to disable the cache for certain functions only for debugging and testing - * purposes. + * Enable cache for specific steps - you can specify the name of the step + * or a name followed by a hash of the cache entry. */ readonly enableCacheFor?: string[]; + /** + * Disable cache for specific steps - you can specify the name of the step + * or a name followed by a hash of the cache entry. + */ + readonly disableCacheFor?: string[]; + + /** + * For debugging purposes, if set to `true` the pipeline will ignore the + * hierarchy of the pipeline and will attempt to look for any cached + * entries in the cache store, regardless of the parent function. + * + * This is useful when we want to refactor function names or introduce new + * hierarchy, but also do not want to loose the cache state. + */ + readonly internal_unboundCacheLookup?: boolean; + /** * If set to `true` the pipeline will stop executing and throw * an error. @@ -65,6 +79,7 @@ export const initializeCacheState = ( saveToCache?: boolean; saveInput?: boolean; enableCacheFor?: string[]; + disableCacheFor?: string[]; }, depsRaw: Partial = defaultDeps ) => { @@ -83,7 +98,7 @@ export const initializeCacheState = ( let state = withSymbol?.[pipelineState]; if (!state) { - const location = relative(process.cwd(), ctx?.location || '.'); + const location = ctx?.location || '.'; if (location) { logger.debug('Initializing new pipeline state', { @@ -104,6 +119,8 @@ export const initializeCacheState = ( log: [], saveToCache: ctx?.saveToCache ?? true, enableCacheFor: ctx?.enableCacheFor, + disableCacheFor: ctx?.disableCacheFor, + internal_unboundCacheLookup: false, deps, }; } @@ -142,6 +159,7 @@ export function createCachedPipeline(opts: { * Location of the cache */ location: string; + /** * Whether to save the results of the pipeline to the cache or not, this * allows disabling modifications to the cache to ensure currently cached @@ -151,27 +169,48 @@ export function createCachedPipeline(opts: { * get the hit. */ saveToCache: boolean; + /** - * Whether to load the cache for the given function or not, this allows us - * to disable the cache for certain functions only for debugging and testing - * purposes. + * Enable cache for specific steps - you can specify the name of the step + * or a name followed by a hash of the cache entry. */ enableCacheFor?: string[]; - pipeline: (input: Input) => Promise; + /** + * Disable cache for specific steps - you can specify the name of the step + * or a name followed by a hash of the cache entry. + */ + disableCacheFor?: string[]; + + /** + * Whether to clean the cache at the end of the execution or not, this is + * disabled by default + */ + cleanCache?: boolean; + + /** + * Whether to clean the root directory as well, or only the subdirectories + * that appeared in the log + */ + cleanRoot?: boolean; + + pipeline: (input: Input, ctx: CacheStateRef) => Promise; }) { const ctx = { location: opts.location, saveToCache: opts.saveToCache, enableCacheFor: opts.enableCacheFor, + disableCacheFor: opts.disableCacheFor, }; initializeCacheState(ctx); - const executePipeline = async (input: Input) => { + const executePipeline = async (input: Input, ctx: CacheStateRef) => { try { - const result = await opts.pipeline(input); - await cleanCache(ctx); + const result = await opts.pipeline(input, ctx); + if (opts.cleanCache) { + await cleanCache({ cleanRoot: opts.cleanRoot ?? false }, ctx); + } return result; } finally { logExecutionLog(ctx); @@ -180,7 +219,7 @@ export function createCachedPipeline(opts: { const execute = async (input: Input) => { return asyncLocalStorage.run(ctx, () => { - return executePipeline(input); + return executePipeline(input, ctx); }); }; diff --git a/packages/refactor-bot/src/chat-gpt/api.ts b/packages/refactor-bot/src/chat-gpt/api.ts index d96701d..016b9a8 100644 --- a/packages/refactor-bot/src/chat-gpt/api.ts +++ b/packages/refactor-bot/src/chat-gpt/api.ts @@ -251,8 +251,8 @@ export function estimatePrice( } export function calculatePrice( - opts: Response & { - model: Models; + opts: Pick & { + model: string; } ) { const model = opts.model; @@ -312,13 +312,13 @@ export async function chatCompletions(opts: Opts): Promise { ...(opts.functionCall && { function_call: opts.functionCall, }), - ...(opts.maxTokens && { + ...(typeof opts.maxTokens === 'number' && { max_tokens: opts.maxTokens, }), - ...(opts.temperature && { + ...(typeof opts.temperature === 'number' && { temperature: opts.temperature, }), - ...(opts.choices && { + ...(typeof opts.choices === 'number' && { n: opts.choices, }), } satisfies BodyShape), diff --git a/packages/refactor-bot/src/child-process/spawnResult.ts b/packages/refactor-bot/src/child-process/spawnResult.ts index 7d4f5f7..196e137 100644 --- a/packages/refactor-bot/src/child-process/spawnResult.ts +++ b/packages/refactor-bot/src/child-process/spawnResult.ts @@ -1,6 +1,7 @@ import assert from 'assert'; import { logger } from '../logger/logger'; +import { ensureHasOneElement } from '../utils/hasOne'; import { UnreachableError } from '../utils/UnreachableError'; import type { SpawnParameterMix, SpawnToPromiseOpts } from './spawnToPromise'; import { spawnToPromise, spawnWithSpawnParameters } from './spawnToPromise'; @@ -25,6 +26,7 @@ export type SpawnResultReturn = { status: number | null; signal: NodeJS.Signals | null; error?: Error | undefined; + args: [string, ...string[]]; }; export async function spawnResult( @@ -85,6 +87,9 @@ export async function spawnResult( pid: child.pid, signal: child.signalCode, status: child.exitCode, + get args(): [string, ...string[]] { + return ensureHasOneElement(child.spawnargs); + }, get output() { return combinedData; }, diff --git a/packages/refactor-bot/src/evaluate/evaluateFileChanges.ts b/packages/refactor-bot/src/evaluate/evaluateFileChanges.ts new file mode 100644 index 0000000..cc96ce6 --- /dev/null +++ b/packages/refactor-bot/src/evaluate/evaluateFileChanges.ts @@ -0,0 +1,215 @@ +import { z } from 'zod'; + +import { makeCachedFunction } from '../cache/makeCachedFunction'; +import type { CacheStateRef } from '../cache/types'; +import type { RegularAssistantMessage } from '../chat-gpt/api'; +import { markdown } from '../markdown/markdown'; +import { formatBulletList } from '../prompt-formatters/formatBulletList'; +import { formatFileContents } from '../prompt-formatters/formatFileContents'; +import { formatFileDiff } from '../prompt-formatters/formatFileDiff'; +import { formatOptional } from '../prompt-formatters/formatOptional'; +import { formatZodError } from '../prompt-formatters/formatZodError'; +import { + prompt, + promptParametersFrom, + refactorConfigPromptOptsSchema, +} from '../refactor/prompt'; +import { parseJsonResponse } from '../response-parsers/parseJsonResponse'; +import { format } from '../text/format'; +import { line } from '../text/line'; +import { ensureHasOneElement } from '../utils/hasOne'; + +export const evaluateFileChangesInput = refactorConfigPromptOptsSchema.augment({ + requirements: z.array(z.string()).nonempty(), + filePath: z.string(), + fileContentsBefore: z.string().optional(), + fileContentsAfter: z.string().optional(), + fileDiff: z.string().optional(), + issues: z.array(z.string()).optional(), + index: z.number().optional(), + choices: z.number().optional(), + temperature: z.number().optional(), +}); + +const promptResponseSchema = z.object({ + summary: z.string(), + requirements: z.array( + z.object({ + description: z.string(), + satisfied: z.boolean(), + }) + ), +}); + +export const evaluateFileResultSchema = z.object({ + key: z.string().optional(), + choices: z.array(promptResponseSchema).nonempty(), +}); + +const systemPromptText = markdown` + Think step by step. Be concise and to the point. Do not make assumptions and + follow instructions exactly. +`; + +const promptText = (opts: { + requirements: string[]; + filePath: string; + fileContentsBefore: string; + fileContentsAfter: string; + fileDiff: string; + issues: string[]; +}) => + format( + markdown` + + %requirements% + + + Given above requirements, an algorithm has made modifications to the + file at \`%filePath%\`. We now want to assess changes in that file. + Strictly focus only on the file mentioned. + + Below information is available to you and applies only to the file + \`%filePath%\`: + + %fileDiff% + + %fileContentsAfter% + + %issues% + + Assess the changes made by the algorithm and determine whether the + requirement was satisfied or not. Do not add new requirements. Do + not rephrase requirements. Do not change order of the requirements. + Do not assess the quality of the implementation in relation to the + requirement, but focus on the outcome. The number of requirements + returned should be the same as the number of requirements given. + + Return the following JSON result when you are done: + + ~~~json + { + "summary": "", + "requirements": [ + { + "description": "", + "satisfied": + } + ] + } + ~~~ + + In your final response, only include the JSON in the above format + wrapped with the code block. Do not include any other text or + output. + `, + { + requirements: formatBulletList({ + items: ensureHasOneElement(opts.requirements), + heading: `List of requirements:`, + }), + filePath: opts.filePath, + fileDiff: formatOptional({ + heading: 'Following changes were made to the file:', + text: formatFileDiff({ + fileDiff: opts.fileDiff, + }), + }), + fileContentsAfter: formatOptional({ + heading: 'File contents after the changes:', + text: formatFileContents({ + fileContents: opts.fileContentsAfter, + filePath: opts.filePath, + }), + }), + issues: formatBulletList({ + heading: 'Following issues were found after the changes:', + empty: line` + We've checked the code for issues - no issues were found. + `, + items: opts.issues, + }), + } + ); + +export const evaluateFileChanges = makeCachedFunction({ + name: 'eval-file', + inputSchema: evaluateFileChangesInput, + resultSchema: evaluateFileResultSchema, + transform: async ( + input: z.input, + ctx?: CacheStateRef + ) => { + const { + filePath, + requirements, + fileDiff, + fileContentsBefore, + fileContentsAfter, + issues = [], + } = input; + + const validateResponse = (message: RegularAssistantMessage) => + parseJsonResponse( + message.content, + z.object({ + summary: z.string(), + requirements: z.array( + z.object({ + description: z.string(), + satisfied: z.boolean(), + }) + ), + }) + ); + + const promptParams = promptParametersFrom( + { + ...input, + allowedFunctions: [], + }, + ctx + ); + + const result = await prompt( + { + preface: systemPromptText, + prompt: promptText({ + filePath, + requirements, + fileDiff: fileDiff || '', + fileContentsBefore: fileContentsBefore || '', + fileContentsAfter: fileContentsAfter || '', + issues, + }), + temperature: input.temperature ?? 0.2, + choices: input.choices, + shouldStop: (message) => { + try { + validateResponse(message); + return true; + } catch (err) { + if (err instanceof z.ZodError) { + return formatZodError({ + error: err, + }); + } + return String(err); + } + }, + ...promptParams, + seed: input.index?.toString(), + }, + ctx + ); + + return { + key: result.key, + choices: ensureHasOneElement( + result.choices.map((choice) => + validateResponse(choice.resultingMessage) + ) + ), + }; + }, +}); diff --git a/packages/refactor-bot/src/evaluate/evaluateFileScore.ts b/packages/refactor-bot/src/evaluate/evaluateFileScore.ts new file mode 100644 index 0000000..3ba8e4b --- /dev/null +++ b/packages/refactor-bot/src/evaluate/evaluateFileScore.ts @@ -0,0 +1,85 @@ +import { z } from 'zod'; + +import type { CacheStateRef } from '../cache/types'; +import { gitShowFile } from '../git/gitShowFile'; +import { gitShowFileCommitRangeSummary } from '../git/gitShowFileCommitSummary'; +import { + evaluateFileChanges, + evaluateFileChangesInput, +} from './evaluateFileChanges'; +import { evaluateUnchangedFile } from './evaluateUnchangedFile'; +import { avg } from './utils/avg'; + +export const evaluateFileScoreSchema = evaluateFileChangesInput + .omit({ + fileContentsAfter: true, + fileContentsBefore: true, + fileDiff: true, + }) + .augment({ + commit: z.string().optional(), + commitBeforeChanges: z.string(), + }); + +export const evaluateFileScoreResultSchema = z.object({ + key: z.string().optional(), + score: z.number(), +}); + +/** + * Determines if the file has changed since the last commit and evaluates the + * changes if so, then aggregates the results into a single score. + */ +export const evaluateFileScore = async ( + input: z.input, + ctx?: CacheStateRef +) => { + const { sandboxDirectoryPath, commit, commitBeforeChanges, filePath } = + input; + + const changeInfo = commit + ? await gitShowFileCommitRangeSummary({ + location: sandboxDirectoryPath, + filePath: filePath, + from: commitBeforeChanges, + to: commit, + }) + : undefined; + + const evaluationResults = + changeInfo && changeInfo.fileDiff + ? await evaluateFileChanges( + { + ...input, + ...changeInfo, + }, + ctx + ) + : await evaluateUnchangedFile( + { + ...input, + fileContents: await gitShowFile({ + location: sandboxDirectoryPath, + filePath: input.filePath, + ref: input.commitBeforeChanges, + }), + }, + ctx + ); + + const scoreChoices = evaluationResults.choices.map((choice) => { + const hit = choice.requirements.filter((r) => r.satisfied).length; + + const score = hit / choice.requirements.length; + + return { + score, + summary: choice.summary, + }; + }); + + return { + key: evaluationResults.key, + score: avg(scoreChoices.map((choice) => choice.score)), + }; +}; diff --git a/packages/refactor-bot/src/evaluate/evaluateRefactorResult.ts b/packages/refactor-bot/src/evaluate/evaluateRefactorResult.ts new file mode 100644 index 0000000..21ee9e5 --- /dev/null +++ b/packages/refactor-bot/src/evaluate/evaluateRefactorResult.ts @@ -0,0 +1,267 @@ +import assert from 'assert'; +import type { ObservedValueOf } from 'rxjs'; +import { + concatMap, + from, + lastValueFrom, + map, + mergeMap, + range, + scan, + startWith, + toArray, + zip, +} from 'rxjs'; +import { z } from 'zod'; + +import { refactorResultSchema } from '../benchmark/refactorResultSchema'; +import { makeCachedFunction } from '../cache/makeCachedFunction'; +import type { CacheStateRef } from '../cache/types'; +import { modelsSchema } from '../chat-gpt/api'; +import { summarizeRefactorFilesResult } from '../refactor/types'; +import { evaluateFileScore } from './evaluateFileScore'; +import { extractRequirements } from './extractRequirements'; +import { avg } from './utils/avg'; + +const evaluateRefactorResultOptsSchema = z.object({ + /** + * Model to use for evaluation + */ + model: modelsSchema.optional(), + + /** + * Temperature to use with the LLM for evaluation + */ + temperature: z.number().optional(), + + /** + * Number of choices to request from the model + */ + choices: z.number(), + + /** + * Results of the refactor we are evaluating + */ + result: refactorResultSchema, + + index: z.number().optional(), +}); + +const evaluateRefactorResultOnceSchema = + evaluateRefactorResultOptsSchema.augment({ + /** + * Objective split into requirements, we do the splitting + * beforehand to avoid LLM trying to do multiple things at once and/or + * producing extra unnecessary variance + */ + requirements: z.array(z.string()).nonempty(), + }); + +const evaluateRefactorResultOnce = makeCachedFunction({ + inputSchema: evaluateRefactorResultOnceSchema, + resultSchema: z.object({ + key: z.string().optional(), + score: z.number(), + scorePerFile: z.record(z.number()), + }), + name: 'eval-refactor', + transform: async ( + opts: z.output, + ctx?: CacheStateRef + ) => { + const { + requirements, + result: { sandboxDirectoryPath, startCommit }, + model, + } = opts; + + const { accepted, discarded } = summarizeRefactorFilesResult({ + accepted: opts.result.accepted, + discarded: opts.result.discarded, + }); + + const stepsPerFile = from([ + ...Object.entries({ + ...discarded.resultsByFilePaths, + ...accepted.resultsByFilePaths, + }), + ]).pipe( + mergeMap(([filePath, results]) => + from(results).pipe( + mergeMap(({ file: result }) => + result.steps.map((step) => ({ + filePath, + task: step.task, + timestamp: step.timestamp, + commit: step.commit, + checkSummary: step.checkSummary, + })) + ), + (stream) => + zip( + stream, + stream.pipe( + scan( + (commitBeforeChanges, next) => + /** + * When an attempt to refactor a file + * resulted in no commits, take previous + * commit as a commitBeforeChanges. + */ next.commit + ? next.commit + : commitBeforeChanges, + startCommit + ), + // make this one lag one step behind + startWith(startCommit) + ) + ), + map(([step, commitBeforeChanges]) => ({ + ...step, + commitBeforeChanges, + })), + toArray(), + map((steps) => ({ + filePath, + steps, + })) + ) + ), + map(({ filePath, steps }) => { + const first = steps[0]; + const last = steps[steps.length - 1]; + if (first === last) { + return { + filePath, + steps, + }; + } else { + return { + filePath, + steps: [ + { + ...last, + commitBeforeChanges: + first?.commitBeforeChanges || startCommit, + filePath, + }, + ], + }; + } + }) + ); + + const evaluateStep = async ( + step: ObservedValueOf['steps'][number] + ) => { + const issues = [ + ...(step.checkSummary?.newIssues || []), + ...(step.checkSummary?.remainingIssues || []), + ].map((issue) => issue.issue); + + return { + ...(await evaluateFileScore( + { + sandboxDirectoryPath, + index: opts.index, + requirements, + filePath: step.filePath, + commitBeforeChanges: step.commitBeforeChanges, + commit: step.commit, + issues, + model, + }, + ctx + )), + filePath: step.filePath, + }; + }; + + const scorePerFile = await lastValueFrom( + stepsPerFile.pipe( + mergeMap(({ steps, filePath }) => { + return from(steps).pipe( + // + concatMap(evaluateStep), + toArray(), + map((steps) => { + assert( + steps.every( + (step) => step.filePath === filePath + ) + ); + return { + filePath, + score: avg(steps.map((r) => r.score)), + steps, + }; + }) + ); + }, 2), + toArray() + ) + ); + + const score = avg(scorePerFile.map((r) => r.score)); + + return { + key: ctx?.location, + score, + scorePerFile: Object.fromEntries( + scorePerFile.map((r) => [r.filePath, r.score]) + ), + }; + }, +}); + +export async function evaluateRefactorResult( + opts: z.output & { + times: number; + }, + ctx?: CacheStateRef +) { + const extractRequirementsResult = await extractRequirements({ + objective: opts.result.objective, + sandboxDirectoryPath: opts.result.sandboxDirectoryPath, + choices: 2, + temperature: opts.temperature, + }); + + const requirements = extractRequirementsResult.choices.reduce( + (acc, choice) => + choice.requirements.length > acc.length ? acc : choice.requirements, + extractRequirementsResult.choices[0].requirements + ); + + return await lastValueFrom( + range(0, opts.times).pipe( + mergeMap( + (index) => + evaluateRefactorResultOnce( + { ...opts, requirements, index }, + ctx + ), + 4 + ), + toArray(), + map((evaluations) => { + const score = avg(evaluations.map((r) => r.score)); + const filePaths = new Set( + evaluations.flatMap((r) => Object.keys(r.scorePerFile)) + ); + return { + score, + evaluations, + scorePerFile: Object.fromEntries( + [...filePaths].map((filePath) => { + const scores = evaluations + .map((r) => r.scorePerFile[filePath]) + .filter((s) => s !== undefined) as number[]; + return [filePath, avg(scores)]; + }) + ), + }; + }) + ) + ); +} diff --git a/packages/refactor-bot/src/evaluate/evaluateUnchangedFile.ts b/packages/refactor-bot/src/evaluate/evaluateUnchangedFile.ts new file mode 100644 index 0000000..a118a5a --- /dev/null +++ b/packages/refactor-bot/src/evaluate/evaluateUnchangedFile.ts @@ -0,0 +1,181 @@ +import { z } from 'zod'; + +import { makeCachedFunction } from '../cache/makeCachedFunction'; +import type { CacheStateRef } from '../cache/types'; +import type { RegularAssistantMessage } from '../chat-gpt/api'; +import { markdown } from '../markdown/markdown'; +import { formatBulletList } from '../prompt-formatters/formatBulletList'; +import { formatFileContents } from '../prompt-formatters/formatFileContents'; +import { formatOptional } from '../prompt-formatters/formatOptional'; +import { + prompt, + promptParametersFrom, + refactorConfigPromptOptsSchema, +} from '../refactor/prompt'; +import { parseJsonResponse } from '../response-parsers/parseJsonResponse'; +import { format } from '../text/format'; +import { line } from '../text/line'; +import { ensureHasOneElement } from '../utils/hasOne'; +import { evaluateFileResultSchema } from './evaluateFileChanges'; + +export const evaluateUnchangedFileInput = + refactorConfigPromptOptsSchema.augment({ + requirements: z.array(z.string()).nonempty(), + filePath: z.string(), + fileContents: z.string(), + issues: z.array(z.string()).optional(), + index: z.number().optional(), + choices: z.number().optional(), + temperature: z.number().optional(), + }); + +const systemPromptText = markdown` + Think step by step. Be concise and to the point. Do not make assumptions + other than what was given in the instructions. +`; + +const promptText = (opts: { + requirements: [string, ...string[]]; + filePath: string; + fileContents: string; + issues?: string[]; +}) => + format( + markdown` + + %requirements% + + + Given above requirements, an algorithm has decided to include the + file \`%filePath%\` into refactoring, but after that, decided not to + make modifications to that file. We now want to assess whether the + decision aligns with the requirements. + + Below information is available to you and applies only to the file + \`%filePath%\`: + + %fileContents% + + %issues% + + Assess the decision made by the algorithm and determine whether the + requirement was satisfied or not. Do not add new requirements. Do + not rephrase requirements. Do not change order of the requirements. + Do not assess the quality of the implementation in relation to the + requirement, but focus on the outcome. The number of requirements + returned should be the same as the number of requirements given. + + Return the following JSON result when you are done: + + ~~~json + { + "summary": "", + "requirements": [ + { + "description": "", + "satisfied": true + } + ] + } + ~~~ + + In your final response, only include the JSON in the above format + wrapped with the code block. Do not include any other text or + output. + `, + { + requirements: formatBulletList({ + items: ensureHasOneElement(opts.requirements), + heading: `List of requirements:`, + }), + filePath: opts.filePath, + fileContents: formatOptional({ + heading: 'File contents:', + text: formatFileContents({ + fileContents: opts.fileContents, + filePath: opts.filePath, + }), + }), + issues: opts.issues + ? formatBulletList({ + heading: 'Following issues were found in the file:', + empty: line` + We've checked the file for issues - no issues were + found. + `, + items: opts.issues, + }) + : '', + } + ); + +export const evaluateUnchangedFile = makeCachedFunction({ + name: 'eval-file', + inputSchema: evaluateUnchangedFileInput, + resultSchema: evaluateFileResultSchema, + transform: async ( + input: z.input, + ctx?: CacheStateRef + ) => { + const { filePath, requirements, fileContents, issues } = input; + + const validateResponse = (message: RegularAssistantMessage) => + parseJsonResponse( + message.content, + z.object({ + summary: z.string(), + requirements: z.array( + z.object({ + description: z.string(), + satisfied: z.boolean(), + }) + ), + }) + ); + + const promptParams = promptParametersFrom( + { + ...input, + allowedFunctions: [], + }, + ctx + ); + + const result = await prompt( + { + preface: systemPromptText, + prompt: promptText({ + filePath, + requirements, + fileContents, + issues, + }), + temperature: input.temperature ?? 0.2, + choices: input.choices, + shouldStop: (message) => { + try { + validateResponse(message); + return true; + } catch (err) { + if (err instanceof z.ZodError) { + return err.message; + } + return String(err); + } + }, + ...promptParams, + seed: input.index?.toString(), + }, + ctx + ); + + return { + key: result.key, + choices: ensureHasOneElement( + result.choices.map((choice) => + validateResponse(choice.resultingMessage) + ) + ), + }; + }, +}); diff --git a/packages/refactor-bot/src/evaluate/extractRequirements.ts b/packages/refactor-bot/src/evaluate/extractRequirements.ts new file mode 100644 index 0000000..836322b --- /dev/null +++ b/packages/refactor-bot/src/evaluate/extractRequirements.ts @@ -0,0 +1,115 @@ +import { z } from 'zod'; + +import type { CacheStateRef } from '../cache/types'; +import type { RegularAssistantMessage } from '../chat-gpt/api'; +import { markdown } from '../markdown/markdown'; +import { formatZodError } from '../prompt-formatters/formatZodError'; +import { + prompt, + promptParametersFrom, + refactorConfigPromptOptsSchema, +} from '../refactor/prompt'; +import { parseJsonResponse } from '../response-parsers/parseJsonResponse'; +import { format } from '../text/format'; +import { ensureHasOneElement } from '../utils/hasOne'; + +export const extractRequirementsInput = refactorConfigPromptOptsSchema.augment({ + objective: z.string(), + choices: z.number().optional(), + temperature: z.number().optional(), +}); + +const requirementsArraySchema = z.array(z.string()).nonempty(); + +export const extractRequirementsResult = z.object({ + key: z.string().optional(), + choices: z.array( + z.object({ + requirements: requirementsArraySchema, + }) + ), +}); + +const systemPromptText = markdown` + Think step by step. Be concise and to the point. Do not make assumptions and + follow instructions exactly. +`; + +const promptText = (opts: { objective: string }) => + format( + markdown` + + %objective% + + + Given above objective, split it into requirements. + + Do not modify the requirements. Do not add new requirements. Do not + try to guess what the implementation would look like. Do not look + too far ahead. Do not rephrase the requirements. Only extract the + minimum which is explicitly specified. Do not include implied + requirements. There should be a minimum of 1 requirements as a + result. Strive to minimize the number of requirements. Requirements + should not be redundant. There should be no duplicate requirements. + + Return list of requirements in the following format: + + ~~~json + ["Requirement 1", "Requirement 2"] + ~~~ + `, + { + objective: opts.objective, + } + ); + +export const extractRequirements = async ( + input: z.input, + ctx?: CacheStateRef +) => { + const { objective } = input; + + const validateResponse = (message: RegularAssistantMessage) => + parseJsonResponse(message.content, requirementsArraySchema); + + const promptParams = promptParametersFrom(input, ctx); + + const result = await prompt( + { + preface: systemPromptText, + prompt: promptText({ + objective, + }), + temperature: input.temperature ?? 0.2, + choices: input.choices, + shouldStop: (message) => { + try { + validateResponse(message); + return true; + } catch (err) { + if (err instanceof z.ZodError) { + return formatZodError({ + error: err, + }); + } + return String(err); + } + }, + ...promptParams, + functionsConfig: { + ...promptParams.functionsConfig, + allowedFunctions: [], + }, + }, + ctx + ); + + return { + key: result.key, + choices: ensureHasOneElement( + result.choices.map((choice) => ({ + requirements: validateResponse(choice.resultingMessage), + })) + ), + }; +}; diff --git a/packages/refactor-bot/src/evaluate/utils/avg.ts b/packages/refactor-bot/src/evaluate/utils/avg.ts new file mode 100644 index 0000000..2afaa8e --- /dev/null +++ b/packages/refactor-bot/src/evaluate/utils/avg.ts @@ -0,0 +1,5 @@ +import { sum } from './sum'; + +export function avg(values: number[]) { + return sum(values) / values.length; +} diff --git a/packages/refactor-bot/src/evaluate/utils/sum.ts b/packages/refactor-bot/src/evaluate/utils/sum.ts new file mode 100644 index 0000000..7ab4fcd --- /dev/null +++ b/packages/refactor-bot/src/evaluate/utils/sum.ts @@ -0,0 +1,3 @@ +export function sum(values: number[]) { + return values.reduce((acc, value) => acc + value, 0); +} diff --git a/packages/refactor-bot/src/git/gitShowFileCommitSummary.ts b/packages/refactor-bot/src/git/gitShowFileCommitSummary.ts new file mode 100644 index 0000000..8c91357 --- /dev/null +++ b/packages/refactor-bot/src/git/gitShowFileCommitSummary.ts @@ -0,0 +1,41 @@ +import { gitFilesDiff } from './gitFilesDiff'; +import { gitShowFile } from './gitShowFile'; + +export async function gitShowFileCommitSummary(opts: { + location: string; + filePath: string; + ref: string; +}) { + return await gitShowFileCommitRangeSummary({ + ...opts, + from: `${opts.ref}~1`, + to: opts.ref, + }); +} + +export async function gitShowFileCommitRangeSummary(opts: { + location: string; + filePath: string; + from: string; + to: string; +}) { + const { location, filePath, from, to } = opts; + return { + filePath, + fileContentsBefore: await gitShowFile({ + location, + filePath, + ref: from, + }), + fileContentsAfter: await gitShowFile({ + location, + filePath, + ref: to, + }), + fileDiff: await gitFilesDiff({ + location, + filePaths: [filePath], + ref: `${from}...${to}`, + }), + }; +} diff --git a/packages/refactor-bot/src/logger/asyncFormatter.ts b/packages/refactor-bot/src/logger/asyncFormatter.ts index 42d78aa..ca7de8d 100644 --- a/packages/refactor-bot/src/logger/asyncFormatter.ts +++ b/packages/refactor-bot/src/logger/asyncFormatter.ts @@ -4,7 +4,7 @@ import type { TransformCallback } from 'stream'; import { onceAsync } from '../utils/onceAsync'; -export type FormatterMethod = (value: Y) => string | Y | Promise; +export type FormatterMethod = (value: unknown) => unknown; export type Formatters = { [key: string]: FormatterMethod; diff --git a/packages/refactor-bot/src/logger/formatters.ts b/packages/refactor-bot/src/logger/formatters.ts index 89a83ae..1e49f18 100644 --- a/packages/refactor-bot/src/logger/formatters.ts +++ b/packages/refactor-bot/src/logger/formatters.ts @@ -1,7 +1,27 @@ +import { relative } from 'path'; + import { glowFormat } from '../markdown/glowFormat'; import type { Formatters } from './asyncFormatter'; import { formatObject } from './formatObject'; +function formatLocation(input: string) { + const cwd = process.env['LOG_RELATIVE_TO_CWD'] ?? process.cwd(); + if (input.startsWith(cwd)) { + return relative(cwd, input); + } + return input; +} + +function formatLocationArray(input: unknown) { + if (Array.isArray(input)) { + return input.map((item) => + typeof item === 'string' ? formatLocation(item) : (item as unknown) + ); + } else { + return input; + } +} + export const formatters: Formatters = { objective: (input) => typeof input === 'string' ? glowFormat({ input }) : input, @@ -9,4 +29,13 @@ export const formatters: Formatters = { typeof input === 'object' && input !== null ? formatObject(input) : input, + location: (input) => { + if (typeof input === 'string') { + return formatLocation(input); + } else { + return input; + } + }, + executionLog: (input) => formatLocationArray(input), + resultFilePaths: (input) => formatLocationArray(input), }; diff --git a/packages/refactor-bot/src/package-manager/runCheckCommand.ts b/packages/refactor-bot/src/package-manager/runCheckCommand.ts index 3e483d2..d017fee 100644 --- a/packages/refactor-bot/src/package-manager/runCheckCommand.ts +++ b/packages/refactor-bot/src/package-manager/runCheckCommand.ts @@ -123,7 +123,7 @@ export async function runCheckCommandWithParser(opts: { outputParser: (output: string) => Issue[]; location: string; }) { - const { stdout, stderr } = await runPackageManagerScript({ + const { stdout, stderr, args } = await runPackageManagerScript({ packageManager: opts.packageManager, script: opts.script.args[0], args: opts.script.args.slice(1), @@ -151,16 +151,19 @@ export async function runCheckCommandWithParser(opts: { () => opts.location ); - return opts.outputParser(chooseOutput()).map((data) => { - const issue = data.issue.replaceAll(parentDirectoryRegex, ''); - return { - ...data, - issue, - filePath: isAbsolute(data.filePath) - ? relative(realLocation, data.filePath) - : data.filePath, - }; - }); + return { + args, + issues: opts.outputParser(chooseOutput()).map((data) => { + const issue = data.issue.replaceAll(parentDirectoryRegex, ''); + return { + ...data, + issue, + filePath: isAbsolute(data.filePath) + ? relative(realLocation, data.filePath) + : data.filePath, + }; + }), + }; } export async function runCheckCommand(opts: { diff --git a/packages/refactor-bot/src/playground.ts b/packages/refactor-bot/src/playground.ts index 6fee480..4f26e99 100644 --- a/packages/refactor-bot/src/playground.ts +++ b/packages/refactor-bot/src/playground.ts @@ -1,45 +1,56 @@ -import { gitFilesDiff } from './git/gitFilesDiff'; -import { gitShowFile } from './git/gitShowFile'; +import { join } from 'path'; +import { ignoreElements, tap } from 'rxjs'; + +import { bootstrap } from './bootstrap'; +import { evaluateFileChanges } from './evaluate/evaluateFileChanges'; +import { runEpic } from './event-bus'; +import { gitShowFileCommitSummary } from './git/gitShowFileCommitSummary'; import { logger } from './logger/logger'; -import { changeInfo } from './ts-morph/quick-info/changeInfo'; /** * @note this is a playground file, it is not used in production, I basically * run this file in watch mode to see if the changes I make to the codebase - * are working as expected as I don't have a good way to test with "ts-morph" - * in dependencies + * are working as expected - because sometimes you just don't want to commit to + * maintaining tests (especially if they require a lot of "test-cases" and + * setup "data" to go along with them - this is especially true for functions + * that work with codebase) * * @note want to test something? try it yourself: * pnpm tsx --watch ./src/playground.ts - * - * @note try Quokka VSCode extension for a better playground experience */ const location = - '/private/var/folders/kk/cj4kd1wx1rg16xq_jhdw75kc0000gn/T/.refactor-bot/sandboxes/replace-read-file-sync-unr87ijk'; + '/var/folders/kk/cj4kd1wx1rg16xq_jhdw75kc0000gn/T/.refactor-bot/sandboxes/test-likely-failure-oTH22sfc'; -const filePath = 'src/pipeline/dependencies.ts'; - -const fileDiff = await gitFilesDiff({ - location, - filePaths: [filePath], - ref: 'HEAD~1', -}); +runEpic((stream) => + stream.pipe( + tap((event) => { + logger.trace(event); + }), + ignoreElements() + ) +); logger.info( - await changeInfo({ - location, - filePath, - oldFileContents: await gitShowFile({ - location, - filePath, - ref: 'HEAD~1', - }), - newFileContents: await gitShowFile({ - location, - filePath, - ref: 'HEAD', - }), - fileDiff, + await bootstrap(async () => { + return await evaluateFileChanges( + { + sandboxDirectoryPath: location, + requirements: [ + 'Replace all usages of `readFile` from `fs/promises` module with `readFileSync` from `fs` module in packages/refactor-bot/src/cache/dependencies.ts`.', + ], + ...(await gitShowFileCommitSummary({ + location, + filePath: 'packages/refactor-bot/src/cache/dependencies.ts', + ref: '28d5f0d1f7985bd16e4d3cc4f26ffd53c1a6f94b', + })), + }, + { + location: join( + '.refactor-bot/playground-cache', + 'evaluateFileChanges' + ), + } + ); }) ); diff --git a/packages/refactor-bot/src/prompt-formatters/formatBulletList.ts b/packages/refactor-bot/src/prompt-formatters/formatBulletList.ts new file mode 100644 index 0000000..2e767ac --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatBulletList.ts @@ -0,0 +1,17 @@ +export function formatBulletList(opts: { + items: string[]; + empty?: string; + heading?: string; +}) { + if (opts.items.length === 0) { + return opts.empty || ''; + } + + const list = opts.items.map((item) => `- ${item}`).join('\n'); + + if (opts.heading) { + return `${opts.heading}\n${list}`; + } + + return list; +} diff --git a/packages/refactor-bot/src/prompt-formatters/formatFencedCodeBlock.ts b/packages/refactor-bot/src/prompt-formatters/formatFencedCodeBlock.ts new file mode 100644 index 0000000..1223431 --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatFencedCodeBlock.ts @@ -0,0 +1,26 @@ +import { markdown } from '../markdown/markdown'; +import { escapeRegExp } from '../utils/escapeRegExp'; + +export function formatFencedCodeBlock(opts: { + code: string; + language?: string; + marker?: '```' | '~~~'; +}) { + if (!opts.code) { + return ''; + } + + const marker = + opts.marker ?? opts.code.search(/^```/g) >= 0 ? '~~~' : '```'; + const candidateMarker = new RegExp(`^${escapeRegExp(marker)}`, 'g'); + + if (opts.code.search(candidateMarker) >= 0) { + throw new Error(`The code block contains the marker "${marker}"`); + } + + return markdown` + ${marker}${opts.language ?? ''} + ${opts.code} + ${marker} + `; +} diff --git a/packages/refactor-bot/src/prompt-formatters/formatFileContents.ts b/packages/refactor-bot/src/prompt-formatters/formatFileContents.ts new file mode 100644 index 0000000..90d8514 --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatFileContents.ts @@ -0,0 +1,42 @@ +import { extname } from 'path'; + +import { formatFencedCodeBlock } from './formatFencedCodeBlock'; + +export function formatFileContents(opts: { + fileContents: string; + language?: string; + filePath?: string; +}) { + let { language } = opts; + + if (!language && opts.filePath) { + const ext = extname(opts.filePath); + + switch (ext) { + case '.js': + language = 'js'; + break; + case '.ts': + language = 'ts'; + break; + case '.json': + language = 'json'; + break; + case '.md': + language = 'markdown'; + break; + case '.sh': + language = 'sh'; + break; + case '.yml': + case '.yaml': + language = 'yaml'; + break; + } + } + + return formatFencedCodeBlock({ + code: opts.fileContents, + language, + }); +} diff --git a/packages/refactor-bot/src/prompt-formatters/formatFileDiff.ts b/packages/refactor-bot/src/prompt-formatters/formatFileDiff.ts new file mode 100644 index 0000000..e92080d --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatFileDiff.ts @@ -0,0 +1,8 @@ +import { formatFencedCodeBlock } from './formatFencedCodeBlock'; + +export function formatFileDiff(opts: { fileDiff: string }) { + return formatFencedCodeBlock({ + code: opts.fileDiff, + language: 'diff', + }); +} diff --git a/packages/refactor-bot/src/prompt-formatters/formatOptional.ts b/packages/refactor-bot/src/prompt-formatters/formatOptional.ts new file mode 100644 index 0000000..8ff1a58 --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatOptional.ts @@ -0,0 +1,12 @@ +export function formatOptional(opts: { + text?: string; + empty?: string; + heading?: string; + footer?: string; +}) { + if (!opts.text || opts.text.length === 0) { + return opts.empty || ''; + } + + return [opts.heading, opts.text, opts.footer].filter(Boolean).join('\n'); +} diff --git a/packages/refactor-bot/src/prompt-formatters/formatZodError.ts b/packages/refactor-bot/src/prompt-formatters/formatZodError.ts new file mode 100644 index 0000000..0ea189b --- /dev/null +++ b/packages/refactor-bot/src/prompt-formatters/formatZodError.ts @@ -0,0 +1,19 @@ +import type { z } from 'zod'; + +export function formatZodError(opts: { + error: z.ZodError; + heading?: string; + footer?: string; +}) { + const issues = opts.error.errors + .map((error) => `- ${error.message}`) + .join('\n'); + + return [ + opts.heading || 'Parsing failed, following issues found:', + issues, + opts.footer, + ] + .filter(Boolean) + .join('\n'); +} diff --git a/packages/refactor-bot/src/prompt/cli.ts b/packages/refactor-bot/src/prompt/cli.ts index b2cbc49..295389a 100644 --- a/packages/refactor-bot/src/prompt/cli.ts +++ b/packages/refactor-bot/src/prompt/cli.ts @@ -29,6 +29,12 @@ export const promptCommand: CommandModule< describe: 'Prompt for next action confirmation when new messages are received', default: false, + }) + .option('functions', { + type: 'string', + array: true, + describe: + 'Names of functions to allow in the prompt. If not specified, all functions are allowed.', }), handler: async (opts) => { try { diff --git a/packages/refactor-bot/src/prompt/run.ts b/packages/refactor-bot/src/prompt/run.ts index a7b4b79..b146efe 100644 --- a/packages/refactor-bot/src/prompt/run.ts +++ b/packages/refactor-bot/src/prompt/run.ts @@ -199,6 +199,7 @@ export const run = async (opts: { model?: Models; watch?: boolean; manual?: boolean; + functions?: string[]; }) => { const spinner = ora(); @@ -246,7 +247,7 @@ export const run = async (opts: { const convo = conversationState(conversationFile); await convo.load(); - const functions = await includeFunctions('all'); + const functions = await includeFunctions(opts.functions ?? 'all'); await goToEndOfFile(conversationFile); diff --git a/packages/refactor-bot/src/refactor/actions/acceptedEdit.ts b/packages/refactor-bot/src/refactor/actions/acceptedEdit.ts index a4704eb..88b2697 100644 --- a/packages/refactor-bot/src/refactor/actions/acceptedEdit.ts +++ b/packages/refactor-bot/src/refactor/actions/acceptedEdit.ts @@ -1,7 +1,7 @@ import type { z } from 'zod'; import { declareAction } from '../../event-bus'; -import type { refactorResultSchema } from '../types'; +import type { refactorFileResultSchema } from '../types'; /** * When result of a file edit is accepted, an accepted edit will @@ -10,5 +10,5 @@ import type { refactorResultSchema } from '../types'; */ export const acceptedEdit = declareAction( 'acceptedEdit', - (result: z.infer) => result + (result: z.infer) => result ); diff --git a/packages/refactor-bot/src/refactor/actions/discardedEdit.ts b/packages/refactor-bot/src/refactor/actions/discardedEdit.ts index a9816b1..39b33f6 100644 --- a/packages/refactor-bot/src/refactor/actions/discardedEdit.ts +++ b/packages/refactor-bot/src/refactor/actions/discardedEdit.ts @@ -1,12 +1,12 @@ import type { z } from 'zod'; import { declareAction } from '../../event-bus'; -import type { refactorResultSchema } from '../types'; +import type { refactorFileResultSchema } from '../types'; /** * When result of a file edit is discarded */ export const discardedEdit = declareAction( 'discardedEdit', - (result: z.infer) => result + (result: z.infer) => result ); diff --git a/packages/refactor-bot/src/refactor/check.ts b/packages/refactor-bot/src/refactor/check.ts index 12badff..359ba19 100644 --- a/packages/refactor-bot/src/refactor/check.ts +++ b/packages/refactor-bot/src/refactor/check.ts @@ -53,7 +53,11 @@ export const check = makeCachedFunction({ ); return { location: opts.location, - issues: results.reduce((acc, result) => acc.concat(result), []), + commands: results.map((result) => result.args.join(' ')), + issues: results.reduce( + (acc, result) => acc.concat(result.issues), + [] as (typeof results)[number]['issues'] + ), checkedFiles: opts.filePaths, }; }, @@ -140,6 +144,7 @@ export const checksSummary = (opts: { newIssues, remainingIssues, resolvedIssues, + commands: opts.checkResult.commands, totalNumberOfIssues: newIssues.length + remainingIssues.length, }; }; diff --git a/packages/refactor-bot/src/refactor/checkoutSandbox.ts b/packages/refactor-bot/src/refactor/checkoutSandbox.ts index 79f7402..eee1b19 100644 --- a/packages/refactor-bot/src/refactor/checkoutSandbox.ts +++ b/packages/refactor-bot/src/refactor/checkoutSandbox.ts @@ -78,11 +78,6 @@ export const checkoutSandbox = makeCachedFunction({ } else { logger.trace(`Creating sandbox from "${root}"`); - /** - * @todo: this might copy some files that might be - * sensitive, we should probably introduce a way to - * ignore some files (ie .env) - */ await createSandbox({ tag: config.name, source: root, diff --git a/packages/refactor-bot/src/refactor/cli.ts b/packages/refactor-bot/src/refactor/cli.ts index f2d7b35..af59c7b 100644 --- a/packages/refactor-bot/src/refactor/cli.ts +++ b/packages/refactor-bot/src/refactor/cli.ts @@ -22,24 +22,37 @@ export const refactorCommand: CommandModule< type: 'string', describe: line` Unique id of the refactoring that was previously run but - didn't finish to start from last successful point + didn't finish - use this to start from last successful step `, }) .option('save-to-cache', { type: 'boolean', describe: line` Whether to enable saving results to the cache, by default - it's enabled + it's enabled. `, default: true, + hidden: true, }) .option('enable-cache-for', { type: 'string', array: true, describe: line` - Enable cache for specific steps only, can be useful if we - want to disable cache for all other steps and replay them + Enable cache for specific steps - you can specify the name + of the step or a name followed by a hash of the cache entry. + This is for debugging purposes only. `, + hidden: true, + }) + .option('disable-cache-for', { + type: 'string', + array: true, + describe: line` + Disable cache for specific steps - you can specify the name + of the step or a name followed by a hash of the cache entry. + This is for debugging purposes only. + `, + hidden: true, }) .option('costs', { type: 'boolean', diff --git a/packages/refactor-bot/src/refactor/collectLlmUsage.ts b/packages/refactor-bot/src/refactor/collectLlmUsage.ts new file mode 100644 index 0000000..70bc800 --- /dev/null +++ b/packages/refactor-bot/src/refactor/collectLlmUsage.ts @@ -0,0 +1,145 @@ +import type { ObservedValueOf } from 'rxjs'; +import { filter, type Observable, scan, startWith } from 'rxjs'; +import type { z } from 'zod'; + +import { explainCacheKey } from '../cache/cache'; +import { calculatePrice } from '../chat-gpt/api'; +import { actions, type AnyAction } from '../event-bus'; +import { ofTypes } from '../event-bus/operators'; +import { gptRequestSuccess } from './actions/gptRequestSuccess'; +import type { llmUsageEntrySchema } from './types'; + +type Usage = z.output; + +export const collectLlmUsage = + (opts?: { key?: string }) => (input: Observable) => + input.pipe( + ofTypes(gptRequestSuccess), + filter((event) => { + if (!opts?.key) { + return true; + } + + if (event.data.key?.startsWith(opts.key)) { + return true; + } + return false; + }), + scan((usageByStep, event) => { + const steps = explainCacheKey(event.data.key) ?? [ + { name: 'unknown', hash: 'unknown' }, + ]; + + const usage = event.data.response.usage; + + usageByStep.push({ + model: event.data.model, + steps: steps.map((x) => x.name), + usage, + }); + + return usageByStep; + }, [] as Array), + startWith([] as Array) + ); + +export function startCollectingLlmUsage( + opts?: { key?: string }, + deps = { actions } +) { + let result: ObservedValueOf>>; + const subscription = deps + .actions() + .pipe(collectLlmUsage(opts)) + .subscribe({ + next: (usage) => { + result = usage; + }, + }); + + return { + getUsage() { + return result; + }, + finishCollecting() { + subscription.unsubscribe(); + return result; + }, + }; +} + +export function summarizeLlmUsagePrice(params: { usage: Array }) { + const priceBySteps = new Map< + string, + { + promptPrice: number; + completionPrice: number; + totalPrice: number; + } + >(); + + let totalPrice = 0; + + for (const { model, usage, steps } of params.usage) { + const price = calculatePrice({ model, usage }); + + for (const step of steps) { + const current = priceBySteps.get(step) ?? { + promptPrice: 0, + completionPrice: 0, + totalPrice: 0, + }; + + priceBySteps.set(step, { + promptPrice: current.promptPrice + price.promptPrice, + completionPrice: + current.completionPrice + price.completionPrice, + totalPrice: current.totalPrice + price.totalPrice, + }); + } + + totalPrice += price.totalPrice; + } + + return { + priceBySteps, + totalPrice, + }; +} + +export function summarizeLlmUsageTokens(params: { usage: Array }) { + const tokensBySteps = new Map< + string, + { + promptTokens: number; + completionTokens: number; + totalTokens: number; + } + >(); + + let totalTokens = 0; + + for (const { usage, steps } of params.usage) { + for (const step of steps) { + const current = tokensBySteps.get(step) ?? { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + }; + + tokensBySteps.set(step, { + promptTokens: current.promptTokens + usage.promptTokens, + completionTokens: + current.completionTokens + usage.completionTokens, + totalTokens: current.totalTokens + usage.totalTokens, + }); + } + + totalTokens += usage.totalTokens; + } + + return { + tokensBySteps, + totalTokens, + }; +} diff --git a/packages/refactor-bot/src/refactor/determineModelParameters.ts b/packages/refactor-bot/src/refactor/determineModelParameters.ts index 867306c..04e8e8e 100644 --- a/packages/refactor-bot/src/refactor/determineModelParameters.ts +++ b/packages/refactor-bot/src/refactor/determineModelParameters.ts @@ -1,18 +1,29 @@ import micromatch from 'micromatch'; -import type { z } from 'zod'; +import { z } from 'zod'; import { logger } from '../logger/logger'; import { hasOneElement } from '../utils/hasOne'; -import type { refactorConfigSchema } from './types'; +import { refactorConfigSchema } from './types'; -type ModelOpts = Pick< - z.output, - 'model' | 'modelByStepCode' | 'useMoreExpensiveModelsOnRetry' -> & { - attempt?: number; -}; +export const refactorConfigModelParamsSchema = refactorConfigSchema + .pick({ + model: true, + modelByStepCode: true, + useMoreExpensiveModelsOnRetry: true, + }) + .augment({ + attempt: z.number().optional(), + }); + +export type RefactorConfigModelParams = z.input< + typeof refactorConfigModelParamsSchema +>; -function determineModel(input: ModelOpts, ctx?: { location?: string }) { +function determineModel( + inputRaw: RefactorConfigModelParams, + ctx?: { location?: string } +) { + const input = refactorConfigModelParamsSchema.parse(inputRaw); if (ctx?.location) { const location = ctx.location; const matchingKeys = [...Object.keys(input.modelByStepCode)].filter( @@ -30,9 +41,10 @@ function determineModel(input: ModelOpts, ctx?: { location?: string }) { } export function determineModelParameters( - input: ModelOpts, + inputRaw: RefactorConfigModelParams, ctx?: { location?: string } ) { + const input = refactorConfigModelParamsSchema.parse(inputRaw); const model = determineModel(input, ctx); const result = diff --git a/packages/refactor-bot/src/refactor/filesToEdit.ts b/packages/refactor-bot/src/refactor/filesToEdit.ts new file mode 100644 index 0000000..a6166ec --- /dev/null +++ b/packages/refactor-bot/src/refactor/filesToEdit.ts @@ -0,0 +1,110 @@ +import { z } from 'zod'; + +import { makeCachedFunction } from '../cache/makeCachedFunction'; +import { markdown } from '../markdown/markdown'; +import { format } from '../text/format'; +import { validateAndParseListOfFiles } from './parsers/validateAndParseListOfFiles'; +import { + prompt, + promptParametersFrom, + refactorConfigPromptOptsSchema, +} from './prompt'; + +export const determineFilesToEditInputSchema = + refactorConfigPromptOptsSchema.augment({ + objective: z.string(), + }); + +export const determineFilesToEditResultSchema = z.object({ + /** + * List of files allowed to be edited or refactored when mentioned in the + * objective, in the same order as mentioned in the objective. + * + * If the objective does not mention any files, this field is empty. + */ + filesToEdit: z.array(z.string()), +}); + +const systemPrompt = markdown` + Think step by step. Be concise and to the point. Do not make assumptions and + follow instructions exactly. +`; + +const promptText = (opts: { objective: string }) => + format( + markdown` + + %objective% + + + Given the above objective that implies modifications to source code, + determine the list of files the user wants us to edit or refactor. + + 1. Check the objective for mentions of files to be changed, edited + or refactored. Could be a single file or multiple files. + + 2. If there is no mention of files respond "No mention of files to + be edited or refactored". + + 3. If the resulting list from step #1 is not empty, format it - + return a numbered bullet list as shown below. File paths should + be surrounded by a backtick. Retain the order of files as they + appear in the objective. + + + #. \`path/to/file.ts\` + #. \`path/to/another/file.ts\` + + `, + { + objective: opts.objective, + } + ); + +export const determineFilesToEdit = makeCachedFunction({ + name: 'files-to-edit', + inputSchema: determineFilesToEditInputSchema, + resultSchema: determineFilesToEditResultSchema, + transform: async (input, ctx) => { + const promptParams = promptParametersFrom( + { + ...input, + /** + * @note we do not want to allow any functions for this prompt + */ + allowedFunctions: [], + }, + ctx + ); + + const allowedFilesResult = await prompt( + { + preface: systemPrompt, + prompt: promptText({ + objective: input.objective, + }), + temperature: 0.2, + ...promptParams, + shouldStop: async (message) => { + await validateAndParseListOfFiles({ + sandboxDirectoryPath: input.sandboxDirectoryPath, + text: message.content, + sortBySize: false, + }); + return true as const; + }, + }, + ctx + ); + + const filesToEdit = await validateAndParseListOfFiles({ + sandboxDirectoryPath: input.sandboxDirectoryPath, + text: allowedFilesResult.choices[0].resultingMessage.content, + sortBySize: false, + }); + + return { + filesToEdit, + }; + }, +}); diff --git a/packages/refactor-bot/src/refactor/findRefactorStateLocation.ts b/packages/refactor-bot/src/refactor/findRefactorStateLocation.ts new file mode 100644 index 0000000..06bddc5 --- /dev/null +++ b/packages/refactor-bot/src/refactor/findRefactorStateLocation.ts @@ -0,0 +1,44 @@ +import { globby } from 'globby'; +import { basename, dirname } from 'path'; + +import { ConfigurationError } from '../errors/configurationError'; +import { findRepositoryRoot } from '../file-system/findRepositoryRoot'; +import { hasOneElement } from '../utils/hasOne'; + +/** + * Finds the location of the state files for a given refactor id, the state + * files will store temporary information about the refactor, cached data and + * other information that is not needed to be stored in the git repository. + */ +export async function findRefactorStateLocation(opts: { + id: string; + location?: string; +}) { + if (!opts.id) { + throw new Error('id is required'); + } + + const repoRoot = await findRepositoryRoot(opts.location); + + const location = await globby( + `.refactor-bot/refactors/*/state/${opts.id}/`, + { + cwd: repoRoot, + onlyDirectories: true, + } + ); + + if (!hasOneElement(location)) { + throw new ConfigurationError( + `Cannot find files to load state from for id "${opts.id}"` + ); + } + + const name = basename(dirname(dirname(location[0]))); + + return { + id: opts.id, + name, + location: location[0], + }; +} diff --git a/packages/refactor-bot/src/refactor/planAndRefactor.ts b/packages/refactor-bot/src/refactor/planAndRefactor.ts index 982283b..f403985 100644 --- a/packages/refactor-bot/src/refactor/planAndRefactor.ts +++ b/packages/refactor-bot/src/refactor/planAndRefactor.ts @@ -2,19 +2,17 @@ import { z } from 'zod'; import { CycleDetectedError } from '../errors/cycleDetectedError'; import { logger } from '../logger/logger'; +import { line } from '../text/line'; import { scriptSchema } from './check'; import { planFiles, planFilesResultSchema } from './planFiles'; import { refactorBatch } from './refactorBatch'; import { resetToLastAcceptedCommit } from './resetToLastAcceptedCommit'; import type { RefactorFilesResult } from './types'; -import { - mutateToMergeRefactorFilesResults, - refactorConfigSchema, - refactorFilesResultSchema, -} from './types'; +import { refactorConfigSchema, refactorFilesResultSchema } from './types'; export const planAndRefactorInputSchema = refactorConfigSchema.augment({ objective: z.string(), + requirements: z.array(z.string()).nonempty(), startCommit: z.string(), sandboxDirectoryPath: z.string(), scripts: z.array(scriptSchema), @@ -32,8 +30,8 @@ export const planAndRefactor = async ( input: z.input ) => { const files: RefactorFilesResult = { - accepted: {}, - discarded: {}, + accepted: [], + discarded: [], }; const planFilesResults: Array> = []; @@ -45,7 +43,7 @@ export const planAndRefactor = async ( rawResponse: planResult.rawResponse, }); - const { plannedFiles } = planResult; + const plannedFiles = [...planResult.plannedFiles]; while (plannedFiles.length > 0) { const result = await refactorBatch({ @@ -58,10 +56,8 @@ export const planAndRefactor = async ( result, }); - mutateToMergeRefactorFilesResults({ - from: result, - into: files, - }); + files.accepted.push(...result.accepted); + files.discarded.push(...result.discarded); const repeatedPlanResult = await planFiles(input).catch((err) => { if (err instanceof CycleDetectedError) { @@ -71,7 +67,11 @@ export const planAndRefactor = async ( * list instead. */ logger.warn( - 'Cycle detected when planning files to change, this is likely result of the last batch of changes not producing any changes.', + line` + Cycle detected when planning files to change, this is + likely result of the last batch of file edits not + producing any changes + `, { error: err, result, diff --git a/packages/refactor-bot/src/refactor/prompt.ts b/packages/refactor-bot/src/refactor/prompt.ts index 1cae15c..0b47d35 100644 --- a/packages/refactor-bot/src/refactor/prompt.ts +++ b/packages/refactor-bot/src/refactor/prompt.ts @@ -1,11 +1,20 @@ import hash from 'object-hash'; import type { Observable } from 'rxjs'; -import { defer, EMPTY, from, lastValueFrom } from 'rxjs'; -import { expand, mergeAll, mergeMap, toArray } from 'rxjs/operators'; +import { defer, EMPTY, from, lastValueFrom, of } from 'rxjs'; +import { + catchError, + expand, + map, + mergeAll, + mergeMap, + switchMap, + toArray, +} from 'rxjs/operators'; import type { TypeOf } from 'zod'; import { z } from 'zod'; import { makeCachedFunction } from '../cache/makeCachedFunction'; +import type { CacheStateRef } from '../cache/types'; import type { Message } from '../chat-gpt/api'; import { calculatePrice, @@ -27,6 +36,11 @@ import { ensureHasOneElement } from '../utils/hasOne'; import { isTruthy } from '../utils/isTruthy'; import { gptRequestFailed } from './actions/gptRequestFailed'; import { gptRequestSuccess } from './actions/gptRequestSuccess'; +import { + determineModelParameters, + refactorConfigModelParamsSchema, +} from './determineModelParameters'; +import { refactorConfigSchema } from './types'; export const promptInputSchema = z.object({ preface: z.string().optional(), @@ -46,9 +60,12 @@ export const promptInputSchema = z.object({ .optional(), model: modelsSchema.optional().default('gpt-3.5-turbo'), choices: z.number().optional(), + dedupe: z.boolean().optional(), + seed: z.string().optional(), }); export const promptResultSchema = z.object({ + key: z.string().optional(), choices: z .array( z.object({ @@ -210,6 +227,39 @@ function removeFunctionFromState( }); } +export type RefactorConfigPromptOpts = z.input< + typeof refactorConfigPromptOptsSchema +>; + +export const refactorConfigPromptOptsSchema = refactorConfigSchema + .pick({ + budgetCents: true, + scope: true, + tsConfigJsonFileName: true, + allowedFunctions: true, + }) + .merge(refactorConfigModelParamsSchema) + .augment({ + sandboxDirectoryPath: z.string(), + }); + +export const promptParametersFrom = ( + inputRaw: RefactorConfigPromptOpts, + ctx?: CacheStateRef +): Omit, 'temperature' | 'prompt'> => { + const input = refactorConfigPromptOptsSchema.parse(inputRaw); + return { + budgetCents: input.budgetCents, + functionsConfig: { + repositoryRoot: input.sandboxDirectoryPath, + scope: input.scope, + tsConfigJsonFileName: input.tsConfigJsonFileName, + allowedFunctions: input.allowedFunctions, + }, + ...determineModelParameters(input, ctx), + }; +}; + export const prompt = makeCachedFunction({ name: 'prompt', inputSchema: promptInputSchema, @@ -234,7 +284,7 @@ export const prompt = makeCachedFunction({ }; const stream = from([initialState()]).pipe( - expand((state, i) => { + expand((state) => { const lastMessage = state.messages[state.messages.length - 1]; if (!lastMessage) { throw new Error('Invalid state, no last message found'); @@ -254,18 +304,13 @@ export const prompt = makeCachedFunction({ opts.functionsConfig.allowedFunctions, model: opts.model, temperature: opts.temperature, - /** - * @note if this is the first prompt, we - * ask for multiple choices answer to start - * multiple branches of conversation - */ - ...(i === 0 && { - choices: opts.choices, - }), + choices: opts.choices, }, ctx ); + const shouldDedupe = opts.dedupe ?? false; + const unique = new Map( result.response.choices.map( (choice) => @@ -273,12 +318,15 @@ export const prompt = makeCachedFunction({ ) ); - const nextStateChoices = [...unique.values()].map( - (choice) => ({ - status: choice.finishReason, - messages: [...state.messages, choice.message], - }) - ); + const nextStateChoices = shouldDedupe + ? [...unique.values()].map((choice) => ({ + status: choice.finishReason, + messages: [...state.messages, choice.message], + })) + : result.response.choices.map((choice) => ({ + status: choice.finishReason, + messages: [...state.messages, choice.message], + })); if ( nextStateChoices.length === 1 && @@ -337,9 +385,9 @@ export const prompt = makeCachedFunction({ ); return defer(async () => { - const result = await Promise.resolve( - shouldStop(lastMessage) - ).catch((e) => + const result = await new Promise((resolve) => { + resolve(shouldStop(lastMessage)); + }).catch((e) => e instanceof Error ? e.message : String(e) ); @@ -370,24 +418,52 @@ export const prompt = makeCachedFunction({ } }), mergeMap((state) => { - // everything that gets into expand gets out of it, so we must - // filter out the initial state + // everything that gets into expand and what gets out of it + // will be result of the expand operator, so we must + // filter out only the stop messages that are not failed if (state.status !== 'stop') { return EMPTY; } - return [ - { - resultingMessage: regularAssistantMessageSchema.parse( + const shouldStop = opts.shouldStop; + if (shouldStop) { + // we also need to filter out the stop messages that are + // failed when shouldStop was called with their content + return defer(async () => { + const lastMessage = regularAssistantMessageSchema.parse( state.messages[state.messages.length - 1], { errorMap: () => ({ message: `Invalid algorithm, the last message in conversation doesn't conform to the expected schema`, }), } - ), - }, - ]; + ); + + const shouldUseStopMessage = + await shouldStop(lastMessage); + + return shouldUseStopMessage === true; + }).pipe( + catchError(() => of(false)), + switchMap((shouldUseStopMessage) => + shouldUseStopMessage ? of(state) : EMPTY + ) + ); + } + + return of(state); + }), + map((state) => { + return { + resultingMessage: regularAssistantMessageSchema.parse( + state.messages[state.messages.length - 1], + { + errorMap: () => ({ + message: `Invalid algorithm, the last message in conversation doesn't conform to the expected schema`, + }), + } + ), + }; }), toArray() ); @@ -395,6 +471,7 @@ export const prompt = makeCachedFunction({ const choices = ensureHasOneElement(await lastValueFrom(stream)); return { + key: ctx.location, choices, }; }, diff --git a/packages/refactor-bot/src/refactor/refactor.ts b/packages/refactor-bot/src/refactor/refactor.ts index 6de06d8..fc41bc4 100644 --- a/packages/refactor-bot/src/refactor/refactor.ts +++ b/packages/refactor-bot/src/refactor/refactor.ts @@ -1,3 +1,5 @@ +import { writeFile } from 'fs/promises'; +import { dump } from 'js-yaml'; import { join } from 'path'; import { createCachedPipeline } from '../cache/state'; @@ -13,19 +15,41 @@ import { randomText } from '../utils/randomText'; import { checkoutSandbox } from './checkoutSandbox'; import { enrichObjective } from './enrichObjective'; import { refactorGoal } from './refactorGoal'; -import { resultsCollector } from './resultsCollector'; +import { + collectedRefactorResultSchema, + resultsCollector, +} from './resultsCollector'; import { retrieveParameters } from './retrieveParameters'; import { type RefactorConfig } from './types'; -const createPipeline = (opts: { - location: string; - saveToCache: boolean; +export async function refactor(opts: { + config: RefactorConfig; + saveToCache?: boolean; enableCacheFor?: string[]; -}) => - createCachedPipeline({ - location: opts.location, + disableCacheFor?: string[]; +}) { + if ( + opts.enableCacheFor && + opts.enableCacheFor.filter(Boolean).length === 0 + ) { + throw new Error('enableCacheFor cannot be empty'); + } + + const root = process.env['CACHE_ROOT'] ?? (await findRepositoryRoot()); + + const id = opts.config.id ?? randomText(8); + + const location = join( + root, + `.refactor-bot/refactors/${opts.config.name}/state/`, + id + ); + + const { execute, abort } = createCachedPipeline({ + location, enableCacheFor: opts.enableCacheFor, - saveToCache: opts.saveToCache, + saveToCache: opts.saveToCache ?? true, + disableCacheFor: opts.disableCacheFor, pipeline: async (input: RefactorConfig) => { const checkoutResult = await checkoutSandbox(input); @@ -58,46 +82,6 @@ const createPipeline = (opts: { }, }); -async function loadRefactorState(opts: { - config: RefactorConfig; - saveToCache?: boolean; - enableCacheFor?: string[]; -}) { - const root = await findRepositoryRoot(); - - const id = opts.config.id ?? randomText(8); - - const location = join( - root, - `.refactor-bot/refactors/${opts.config.name}/state/`, - id - ); - - return { - ...createPipeline({ - location, - saveToCache: opts.saveToCache ?? true, - enableCacheFor: opts.enableCacheFor, - }), - location, - id, - }; -} - -export async function refactor(opts: { - config: RefactorConfig; - saveToCache?: boolean; - enableCacheFor?: string[]; -}) { - if ( - opts.enableCacheFor && - opts.enableCacheFor.filter(Boolean).length === 0 - ) { - throw new Error('enableCacheFor cannot be empty'); - } - - const { execute, abort, id } = await loadRefactorState(opts); - logger.info( format( line` @@ -152,7 +136,13 @@ export async function refactor(opts: { teardown(); } - const result = finalizeResults(error); + const result = finalizeResults(opts.config, error); + + await writeFile( + join(location, 'result.yaml'), + dump(collectedRefactorResultSchema.parse(result)), + 'utf-8' + ); const lastCommit = await gitRevParse({ location: result.sandboxDirectoryPath, diff --git a/packages/refactor-bot/src/refactor/refactorBatch.ts b/packages/refactor-bot/src/refactor/refactorBatch.ts index 78aed6a..d30e8cc 100644 --- a/packages/refactor-bot/src/refactor/refactorBatch.ts +++ b/packages/refactor-bot/src/refactor/refactorBatch.ts @@ -1,8 +1,7 @@ +import assert from 'assert'; import { z } from 'zod'; -import { AbortError } from '../errors/abortError'; -import { CycleDetectedError } from '../errors/cycleDetectedError'; -import { OutOfContextBoundsError } from '../errors/outOfContextBoundsError'; +import { evaluateFileScore } from '../evaluate/evaluateFileScore'; import { dispatch } from '../event-bus'; import { gitResetHard } from '../git/gitResetHard'; import { gitRevParse } from '../git/gitRevParse'; @@ -11,12 +10,8 @@ import { acceptedEdit } from './actions/acceptedEdit'; import { discardedEdit } from './actions/discardedEdit'; import { scriptSchema } from './check'; import { refactorFile } from './refactorFile'; -import type { RefactorFileResult, RefactorFilesResult } from './types'; -import { - pushRefactorFileResults, - refactorConfigSchema, - refactorResultSchema, -} from './types'; +import type { RefactorFilesResult } from './types'; +import { refactorConfigSchema } from './types'; export const refactorBatchInputSchema = refactorConfigSchema .pick({ @@ -27,21 +22,15 @@ export const refactorBatchInputSchema = refactorConfigSchema modelByStepCode: true, useMoreExpensiveModelsOnRetry: true, allowedFunctions: true, + evaluate: true, + evaluateMinScore: true, }) .augment({ objective: z.string(), + requirements: z.array(z.string()).nonempty(), plannedFiles: z.array(z.string()), startCommit: z.string(), sandboxDirectoryPath: z.string(), - /** - * Controls whether to accept the result of a refactor or not. - */ - shouldAcceptResult: z - .function( - z.tuple([refactorResultSchema]), - z.promise(z.boolean()).or(z.boolean()) - ) - .optional(), scripts: z.array(scriptSchema), prettierScriptLocation: z.string().optional(), }); @@ -52,12 +41,8 @@ export const refactorBatch = async ( ) => { const { plannedFiles } = await refactorBatchInputSchema.parseAsync(input); - const shouldAcceptResult = - input.shouldAcceptResult ?? - ((result) => result.status === 'success' && Boolean(result.lastCommit)); - - const accepted: RefactorFilesResult['accepted'] = {}; - const discarded: RefactorFilesResult['discarded'] = {}; + const accepted: RefactorFilesResult['accepted'] = []; + const discarded: RefactorFilesResult['discarded'] = []; for (const filePath of plannedFiles) { const beforeRefactorCommit = await gitRevParse({ @@ -65,36 +50,32 @@ export const refactorBatch = async ( ref: 'HEAD', }); - const { file } = await refactorFile({ + const refactorFileResult = await refactorFile({ filePath, ...input, - }).catch((err) => { - if ( - err instanceof AbortError && - !(err instanceof CycleDetectedError) && - !(err instanceof OutOfContextBoundsError) - ) { - return Promise.reject(err); - } - - /** - * @note this is temporary, ideally the refactorFile - * function should not throw and return a failure result - */ - return { - file: { - status: 'failure', - failureDescription: - err instanceof Error ? err.message : String(err), - filePath, - issues: [], - steps: [], - timestamp: performance.now(), - }, - } as RefactorFileResult; }); - const shouldAccept = await Promise.resolve(shouldAcceptResult(file)); + const { file } = refactorFileResult; + + let shouldAccept = + file.status === 'success' && Boolean(file.lastCommit); + + if (shouldAccept && input.evaluate) { + const lastCommit = file.lastCommit; + assert(lastCommit); + + refactorFileResult.evaluation = await evaluateFileScore({ + ...input, + filePath: file.filePath, + commitBeforeChanges: beforeRefactorCommit, + commit: lastCommit, + choices: 3, + }); + + shouldAccept = + refactorFileResult.evaluation.score >= + (input.evaluateMinScore ?? 0.5); + } if (shouldAccept) { if (file.lastCommit) { @@ -112,12 +93,10 @@ export const refactorBatch = async ( }); } } - pushRefactorFileResults({ - into: accepted, - result: file, - }); - deps.dispatch(acceptedEdit(file)); + accepted.push(refactorFileResult); + + deps.dispatch(acceptedEdit(refactorFileResult)); } else { const currentCommit = await gitRevParse({ location: input.sandboxDirectoryPath, @@ -136,14 +115,9 @@ export const refactorBatch = async ( }); } - if (file.status === 'failure') { - pushRefactorFileResults({ - into: discarded, - result: file, - }); - } + discarded.push(refactorFileResult); - deps.dispatch(discardedEdit(file)); + deps.dispatch(discardedEdit(refactorFileResult)); } } diff --git a/packages/refactor-bot/src/refactor/refactorFile.ts b/packages/refactor-bot/src/refactor/refactorFile.ts index 3457bff..20fc14d 100644 --- a/packages/refactor-bot/src/refactor/refactorFile.ts +++ b/packages/refactor-bot/src/refactor/refactorFile.ts @@ -7,6 +7,7 @@ import { z } from 'zod'; import { makeCachedFunction } from '../cache/makeCachedFunction'; import { AbortError } from '../errors/abortError'; import { CycleDetectedError } from '../errors/cycleDetectedError'; +import { OutOfContextBoundsError } from '../errors/outOfContextBoundsError'; import { gitDiffRange } from '../git/gitDiffRange'; import { gitFilesDiff } from '../git/gitFilesDiff'; import { gitResetHard } from '../git/gitResetHard'; @@ -19,6 +20,7 @@ import { ensureHasOneElement, hasTwoElements } from '../utils/hasOne'; import { UnreachableError } from '../utils/UnreachableError'; import { applyChanges } from './applyChanges'; import { check, checksSummary, scriptSchema } from './check'; +import { startCollectingLlmUsage } from './collectLlmUsage'; import { edit } from './edit'; import { formatCommitMessage } from './prompts/formatCommitMessage'; import { formatFileContents } from './prompts/formatFileContents'; @@ -164,353 +166,427 @@ export const refactorFile = makeCachedFunction({ transform: async (input, ctx) => { const { filePath, sandboxDirectoryPath } = input; - const packageManager = await determinePackageManager({ - directory: input.sandboxDirectoryPath, + const refactorFileLlmUsage = startCollectingLlmUsage({ + key: ctx.location, }); - const steps = new Array<{ - commit: string; - task: 'refactor' | 'fix-issues' | 'fix-export-issues'; - key: string; - patch: string; - fileContents: string; - checkSummary: ReturnType; - timestamp: number; - }>(); - - const initialCheck = await check( - { - packageManager, - location: input.sandboxDirectoryPath, - startCommit: input.startCommit, - filePaths: [filePath], - scripts: input.scripts, - }, - ctx - ); - if (initialCheck.issues.length > 0) { - throw new AbortError( - 'We should have no errors initially, this should be guaranteed by the initial pre-check made in refactorGoal function. If we got here there must be a bug in the code.' + try { + const packageManager = await determinePackageManager({ + directory: input.sandboxDirectoryPath, + }); + + const steps = new Array<{ + commit?: string; + task: 'refactor' | 'fix-issues' | 'fix-export-issues'; + key: string; + patch?: string; + fileContents: string; + checkSummary?: ReturnType; + timestamp: number; + }>(); + + const initialCheck = await check( + { + packageManager, + location: input.sandboxDirectoryPath, + startCommit: input.startCommit, + filePaths: [filePath], + scripts: input.scripts, + }, + ctx ); - } + if (initialCheck.issues.length > 0) { + throw new AbortError( + 'We should have no errors initially, this should be guaranteed by the initial pre-check made in refactorGoal function. If we got here there must be a bug in the code.' + ); + } - const issues: Issue[] = []; - const commonEditOpts = { - ...input, - filePath, - sandboxDirectoryPath: input.sandboxDirectoryPath, - budgetCents: input.budgetCents, - eslintAutoFixScriptArgs: input.scripts.find((script) => - script.args.includes('eslint') - )?.args, - prettierScriptLocation: input.prettierScriptLocation, - }; - - const advice: string[] = []; - - const buildResult = () => { - return { + const issues: Issue[] = []; + const commonEditOpts = { + ...input, filePath, - issues, - steps: steps.map((step) => ({ - commit: step.commit, - fileContents: step.fileContents, - task: step.task, - timestamp: step.timestamp, - })), - lastCommit: lastCommit(steps), - timestamp: lastTimestamp(steps) ?? performance.now(), + sandboxDirectoryPath: input.sandboxDirectoryPath, + budgetCents: input.budgetCents, + eslintAutoFixScriptArgs: input.scripts.find((script) => + script.args.includes('eslint') + )?.args, + prettierScriptLocation: input.prettierScriptLocation, }; - }; - const startCommit = await gitRevParse({ - location: input.sandboxDirectoryPath, - ref: 'HEAD', - }); - const initialFileContents = await readFile( - join(input.sandboxDirectoryPath, filePath), - 'utf-8' - ); - do { - try { - const fileContents = await readFile( - join(input.sandboxDirectoryPath, filePath), - 'utf-8' - ); + const advice: string[] = []; - const fileDiff = await gitFilesDiff({ - location: input.sandboxDirectoryPath, - filePaths: [filePath], - ref: startCommit, - }); + const buildResult = () => { + return { + filePath, + issues, + steps: steps.map((step) => ({ + commit: step.commit, + fileContents: step.fileContents, + task: step.task, + checkSummary: step.checkSummary, + timestamp: step.timestamp, + })), + lastCommit: lastCommit(steps), + timestamp: lastTimestamp(steps) ?? performance.now(), + }; + }; - const localIssues = issues.filter( - (issue) => issue.filePath === filePath - ); + const startCommit = await gitRevParse({ + location: input.sandboxDirectoryPath, + ref: 'HEAD', + }); + const initialFileContents = await readFile( + join(input.sandboxDirectoryPath, filePath), + 'utf-8' + ); + do { + try { + const fileContents = await readFile( + join(input.sandboxDirectoryPath, filePath), + 'utf-8' + ); - const externalIssues = issues.filter( - (issue) => issue.filePath !== filePath - ); + const fileDiff = await gitFilesDiff({ + location: input.sandboxDirectoryPath, + filePaths: [filePath], + ref: startCommit, + }); - let task: 'refactor' | 'fix-issues' | 'fix-export-issues' = - 'refactor'; - if (issues.length > 0) { - if (localIssues.length > 0) { - task = 'fix-issues'; - } else { - task = 'fix-export-issues'; + const localIssues = issues.filter( + (issue) => issue.filePath === filePath + ); + + const externalIssues = issues.filter( + (issue) => issue.filePath !== filePath + ); + + let task: 'refactor' | 'fix-issues' | 'fix-export-issues' = + 'refactor'; + if (issues.length > 0) { + if (localIssues.length > 0) { + task = 'fix-issues'; + } else { + task = 'fix-export-issues'; + } } - } - let objective: string; - switch (task) { - case 'refactor': - objective = performFileRefactoringPromptText({ - fileContents, - filePath, - language: 'TypeScript', - objective: input.objective, - }); - break; - case 'fix-issues': - objective = fixLocalIssuesPromptText({ - fileContents, - filePath, - language: 'TypeScript', - objective: input.objective, - issues: localIssues.map(({ issue }) => issue), - fileDiff, - advice, - }); - break; - case 'fix-export-issues': - objective = fixExportIssuesPromptText({ - fileContents, - filePath, - language: 'TypeScript', - objective: input.objective, - issues: externalIssues.map(({ issue }) => issue), - fileDiff, - advice, - ...(await changeInfo({ - location: input.sandboxDirectoryPath, - oldFileContents: initialFileContents, - newFileContents: fileContents, + let objective: string; + switch (task) { + case 'refactor': + objective = performFileRefactoringPromptText({ + fileContents, filePath, + language: 'TypeScript', + objective: input.objective, + }); + break; + case 'fix-issues': + objective = fixLocalIssuesPromptText({ + fileContents, + filePath, + language: 'TypeScript', + objective: input.objective, + issues: localIssues.map(({ issue }) => issue), fileDiff, - })), - }); - break; - default: - throw new UnreachableError(task); - } - - const { choices } = await edit( - { - ...commonEditOpts, - objective, - fileContents, - }, - ctx - ); - - const editResult = choices[0]; - - if (editResult.status !== 'success') { - if (issues.length > 0) { - const file = { - status: 'failure' as const, - failureDescription: - 'The model has failed to fix the issues in the file', - ...buildResult(), - }; - - return { - file, - }; + advice, + }); + break; + case 'fix-export-issues': + objective = fixExportIssuesPromptText({ + fileContents, + filePath, + language: 'TypeScript', + objective: input.objective, + issues: externalIssues.map( + ({ issue }) => issue + ), + fileDiff, + advice, + ...(await changeInfo({ + location: input.sandboxDirectoryPath, + oldFileContents: initialFileContents, + newFileContents: fileContents, + filePath, + fileDiff, + })), + }); + break; + default: + throw new UnreachableError(task); } - continue; - } - const commitMessage = - issues.length === 0 - ? formatCommitMessage({ - type: 'refactor', - filePath, - description: input.objective, - }) - : formatCommitMessage({ - type: 'fix', - filePath, - description: issues - .map((issue) => issue.issue) - .join('\n'), - }); - - const { commit } = await applyChanges({ - commitMessage, - filePath, - sandboxDirectoryPath, - fileContents: editResult.fileContents, - fileContentsHash: editResult.fileContentsHash, - }); - - const checkResult = await check( - { - packageManager, - location: input.sandboxDirectoryPath, - startCommit: input.startCommit, - scripts: input.scripts, - }, - ctx - ); + const { choices } = await edit( + { + ...commonEditOpts, + objective, + fileContents, + }, + ctx + ); - const checkSummary = checksSummary({ - issues, - checkResult, - checkCommit: commit, - }); - - issues.splice( - 0, - issues.length, - ...checkSummary.newIssues, - ...checkSummary.remainingIssues - ); + const editResult = choices[0]; + + if (editResult.status !== 'success') { + if (issues.length > 0) { + const file = { + status: 'failure' as const, + failureDescription: + 'The model has failed to fix the issues in the file', + ...buildResult(), + }; + + return { + ...(ctx.location && { + key: ctx.location, + }), + file, + usage: refactorFileLlmUsage.finishCollecting(), + }; + } else { + const key = editResult.key; + assert(key); + steps.push({ + task, + key, + fileContents, + timestamp: performance.now(), + }); + } + continue; + } - const key = editResult.key; - assert(key); + const commitMessage = + issues.length === 0 + ? formatCommitMessage({ + type: 'refactor', + filePath, + description: input.objective, + }) + : formatCommitMessage({ + type: 'fix', + filePath, + description: issues + .map((issue) => issue.issue) + .join('\n'), + }); + + const { commit } = await applyChanges({ + commitMessage, + filePath, + sandboxDirectoryPath, + fileContents: editResult.fileContents, + fileContentsHash: editResult.fileContentsHash, + }); + + const checkResult = await check( + { + packageManager, + location: input.sandboxDirectoryPath, + startCommit: input.startCommit, + scripts: input.scripts, + }, + ctx + ); - steps.push({ - task, - key, - patch: await gitFilesDiff({ - location: input.sandboxDirectoryPath, - filePaths: [filePath], - ref: lastCommit(steps) || startCommit, - }), - fileContents: editResult.fileContents, - commit, - checkSummary, - timestamp: performance.now(), - }); - } catch (exc) { - if (exc instanceof CycleDetectedError) { - const error = exc; - const index = steps.findIndex( - (step) => step.key === error.key + const checkSummary = checksSummary({ + issues, + checkResult, + checkCommit: commit, + }); + + issues.splice( + 0, + issues.length, + ...checkSummary.newIssues, + ...checkSummary.remainingIssues ); - const stepsLeadingToCycle = steps.slice(index); - const leastProblematicStep = ensureHasOneElement( - orderBy( - stepsLeadingToCycle, - (step) => step.checkSummary.totalNumberOfIssues, - 'asc' - ) - )[0]; - const leastProblematicStepIndex = - stepsLeadingToCycle.findIndex( - (value) => value === leastProblematicStep - ); - /** - * @todo haven't seen more complex cycles at this - * moment, but it could be possible, so let's double - * check that we are hitting the expected case - */ - if (hasTwoElements(stepsLeadingToCycle)) { - const [startRef, endRef] = - leastProblematicStepIndex === 0 - ? [ - leastProblematicStep.commit, - stepsLeadingToCycle[1].commit, - ] - : [ - leastProblematicStep.commit, - stepsLeadingToCycle[0].commit, - ]; - const mostIssues = stepsLeadingToCycle.reduce( - (a, b) => - Math.max(a, b.checkSummary.totalNumberOfIssues), - 0 - ); - const patch = await gitDiffRange({ + const key = editResult.key; + assert(key); + + steps.push({ + task, + key, + patch: await gitFilesDiff({ location: input.sandboxDirectoryPath, filePaths: [filePath], - startRef, - endRef, - }); - // - advice.push( - formatFileDiff({ - fileDiff: patch, - filePath, - headline: `Do not make changes represented by the following diff as that would cause ${mostIssues} lint and compilation issues:`, - }) - ); - await gitResetHard({ - location: input.sandboxDirectoryPath, - ref: leastProblematicStep.commit, - }); - const leastProblematicStepGlobalIndex = steps.findIndex( - (value) => value === leastProblematicStep - ); - steps.splice( - leastProblematicStepGlobalIndex + 1, - steps.length - leastProblematicStepGlobalIndex + 1 + ref: lastCommit(steps) || startCommit, + }), + fileContents: editResult.fileContents, + commit, + checkSummary, + timestamp: performance.now(), + }); + } catch (exc) { + if (exc instanceof CycleDetectedError) { + const error = exc; + const index = steps.findIndex( + (step) => step.key === error.key ); - issues.splice( - 0, - issues.length, - ...leastProblematicStep.checkSummary.newIssues, - ...leastProblematicStep.checkSummary.remainingIssues - ); - - logger.debug('Handling cycle', { - leastProblematicStepIndex, - leastProblematicStepCommit: - leastProblematicStep.commit, - key: error.key, - advice, - steps: steps.map((step) => ({ - task: step.task, - key: basename(step.key), - commit: step.commit, - totalNumberOfIssues: - step.checkSummary.totalNumberOfIssues, - })), - }); + const stepsLeadingToCycle = steps + .slice(index) + .filter((step) => step.commit !== undefined); + const leastProblematicStep = ensureHasOneElement( + orderBy( + stepsLeadingToCycle, + (step) => + step.checkSummary?.totalNumberOfIssues ?? 0, + 'asc' + ) + )[0]; + const leastProblematicStepIndex = + stepsLeadingToCycle.findIndex( + (value) => value === leastProblematicStep + ); + + /** + * @todo haven't seen more complex cycles at this + * moment, but it could be possible, so let's double + * check that we are hitting the expected case + */ + if (hasTwoElements(stepsLeadingToCycle)) { + const [startRef, endRef] = + leastProblematicStepIndex === 0 + ? [ + leastProblematicStep.commit, + stepsLeadingToCycle[1].commit, + ] + : [ + leastProblematicStep.commit, + stepsLeadingToCycle[0].commit, + ]; + const mostIssues = stepsLeadingToCycle.reduce( + (a, b) => + Math.max( + a, + b.checkSummary?.totalNumberOfIssues ?? 0 + ), + 0 + ); + + // we check for commit !== undefined above + assert(startRef); + assert(endRef); + assert(leastProblematicStep.commit); + + const patch = await gitDiffRange({ + location: input.sandboxDirectoryPath, + filePaths: [filePath], + startRef, + endRef, + }); + // + advice.push( + formatFileDiff({ + fileDiff: patch, + filePath, + headline: `Do not make changes represented by the following diff as that would cause ${mostIssues} lint and compilation issues:`, + }) + ); + await gitResetHard({ + location: input.sandboxDirectoryPath, + ref: leastProblematicStep.commit, + }); + const leastProblematicStepGlobalIndex = + steps.findIndex( + (value) => value === leastProblematicStep + ); + steps.splice( + leastProblematicStepGlobalIndex + 1, + steps.length - + leastProblematicStepGlobalIndex + + 1 + ); + issues.splice( + 0, + issues.length, + ...(leastProblematicStep.checkSummary + ?.newIssues || []), + ...(leastProblematicStep.checkSummary + ?.remainingIssues || []) + ); + + logger.debug('Handling cycle', { + leastProblematicStepIndex, + leastProblematicStepCommit: + leastProblematicStep.commit, + key: error.key, + advice, + steps: steps.map((step) => ({ + task: step.task, + key: basename(step.key), + commit: step.commit, + totalNumberOfIssues: + step.checkSummary + ?.totalNumberOfIssues ?? 0, + })), + }); + } else { + logger.log(stepsLeadingToCycle); + logger.log({ + leastProblematicStepIndex, + leastProblematicStepCommit: + leastProblematicStep.commit, + key: error.key, + advice, + steps: steps.map((step) => ({ + task: step.task, + key: basename(step.key), + commit: step.commit, + totalNumberOfIssues: + step.checkSummary + ?.totalNumberOfIssues ?? 0, + })), + }); + throw exc; + } } else { - logger.log(stepsLeadingToCycle); - logger.log({ - leastProblematicStepIndex, - leastProblematicStepCommit: - leastProblematicStep.commit, - key: error.key, - advice, - steps: steps.map((step) => ({ - task: step.task, - key: basename(step.key), - commit: step.commit, - totalNumberOfIssues: - step.checkSummary.totalNumberOfIssues, - })), - }); throw exc; } - } else { - throw exc; } - } - } while (issues.length > 0); + } while (issues.length > 0); - const file = { - status: 'success' as const, - ...buildResult(), - }; + const file = { + status: 'success' as const, + ...buildResult(), + }; + + return { + ...(ctx.location && { + key: ctx.location, + }), + file, + usage: refactorFileLlmUsage.finishCollecting(), + }; + } catch (err) { + /** + * If there was any error - do not save to the cache + */ + ctx.skipSavingToCache(err); + + if ( + err instanceof AbortError && + !(err instanceof CycleDetectedError) && + !(err instanceof OutOfContextBoundsError) + ) { + return Promise.reject(err); + } - return { - file, - }; + return { + ...(ctx.location && { + key: ctx.location, + }), + file: { + status: 'failure' as const, + failureDescription: + err instanceof Error ? err.message : String(err), + filePath: input.filePath, + issues: [], + steps: [], + timestamp: performance.now(), + }, + usage: refactorFileLlmUsage.finishCollecting(), + }; + } }, }); diff --git a/packages/refactor-bot/src/refactor/refactorGoal.ts b/packages/refactor-bot/src/refactor/refactorGoal.ts index e89bc62..9a72f74 100644 --- a/packages/refactor-bot/src/refactor/refactorGoal.ts +++ b/packages/refactor-bot/src/refactor/refactorGoal.ts @@ -17,6 +17,7 @@ import { refactorConfigSchema } from './types'; export const refactorGoalInputSchema = refactorConfigSchema.augment({ objective: z.string(), + requirements: z.array(z.string()).nonempty(), sandboxDirectoryPath: z.string(), startCommit: z.string(), filesToEdit: z.array(z.string()), diff --git a/packages/refactor-bot/src/refactor/resetToLastAcceptedCommit.ts b/packages/refactor-bot/src/refactor/resetToLastAcceptedCommit.ts index 5e9fbbe..86885d9 100644 --- a/packages/refactor-bot/src/refactor/resetToLastAcceptedCommit.ts +++ b/packages/refactor-bot/src/refactor/resetToLastAcceptedCommit.ts @@ -11,10 +11,8 @@ export async function resetToLastAcceptedCommit(opts: { result: RefactorFilesResult; }) { const accepted = orderBy( - Object.entries(opts.result.accepted).flatMap(([_, entry]) => - entry.filter((file) => file.lastCommit).map((file) => file) - ), - ['timestamp'], + opts.result.accepted.filter((entry) => entry.file.lastCommit), + ['file.timestamp'], ['desc'] ); @@ -29,7 +27,8 @@ export async function resetToLastAcceptedCommit(opts: { location: opts.location, ref: 'HEAD', }); - const lastCommit = accepted[0].lastCommit; + + const lastCommit = accepted[0].file.lastCommit; if (lastCommit && lastCommit !== currentCommit) { logger.info(`Resetting to last successful commit ${lastCommit}`); diff --git a/packages/refactor-bot/src/refactor/resultsCollector.ts b/packages/refactor-bot/src/refactor/resultsCollector.ts index b73acea..ad52fb6 100644 --- a/packages/refactor-bot/src/refactor/resultsCollector.ts +++ b/packages/refactor-bot/src/refactor/resultsCollector.ts @@ -1,21 +1,73 @@ -import type { z } from 'zod'; +import { z } from 'zod'; -import { executionTiming } from '../cache/actions/executionStatus'; +import { + executionStarted, + executionTiming, +} from '../cache/actions/executionStatus'; import { explainCacheKey } from '../cache/cache'; -import { calculatePrice } from '../chat-gpt/api'; import { actions } from '../event-bus'; import { ofTypes } from '../event-bus/operators'; +import { extractErrorInfo } from '../logger/extractErrorInfo'; +import { logger } from '../logger/logger'; +import { line } from '../text/line'; import { ensureHasOneElement, hasOneElement } from '../utils/hasOne'; import { UnreachableError } from '../utils/UnreachableError'; import { acceptedEdit } from './actions/acceptedEdit'; import { checkoutSandboxCompleted } from './actions/checkoutSandboxCompleted'; import { discardedEdit } from './actions/discardedEdit'; -import { gptRequestSuccess } from './actions/gptRequestSuccess'; import { planFilesCompleted } from './actions/planFilesCompleted'; -import type { checkoutSandboxResultSchema } from './checkoutSandbox'; -import type { planFilesResultSchema } from './planFiles'; -import type { RefactorFilesResult } from './types'; -import { mutateToMergeRefactorRecords } from './types'; +import { checkoutSandboxResultSchema } from './checkoutSandbox'; +import { startCollectingLlmUsage } from './collectLlmUsage'; +import { planFilesResultSchema } from './planFiles'; +import type { RefactorConfig, refactorFileResultSchema } from './types'; +import { llmUsageEntrySchema, refactorFilesResultSchema } from './types'; + +export const collectedRefactorResultSchema = z + .object({ + objective: z.string(), + status: z.enum(['success', 'failure']), + error: z.record(z.unknown()).optional(), + }) + .merge(checkoutSandboxResultSchema) + .merge(refactorFilesResultSchema) + .augment({ + planFilesResults: z.array(planFilesResultSchema), + usage: z.array(llmUsageEntrySchema), + performance: z.object({ + totalDurationMs: z.number(), + durationMsByStep: z.record( + z.object({ + durationMs: z.number(), + }) + ), + }), + }); + +const commitHashRegex = /^[a-f0-9]{40}$/; + +function scanForCommitHashes(data: unknown, ignore: string[]): string[] { + if (typeof data === 'object' && data !== null) { + const entries = Object.entries(data); + for (const [key, value] of entries) { + if (ignore.includes(key)) { + continue; + } + + const result = scanForCommitHashes(value, ignore); + if (result.length > 0) { + return result; + } + } + } + + if (typeof data === 'string') { + if (commitHashRegex.test(data) && !ignore.includes(data)) { + return [data]; + } + } + + return []; +} /** * Results collector allows us to collect all the results of a refactor @@ -33,29 +85,17 @@ import { mutateToMergeRefactorRecords } from './types'; export function resultsCollector(deps = { actions }) { const timestamp = performance.now(); - const files: RefactorFilesResult = { - accepted: {}, - discarded: {}, - }; - const checkoutResults: z.infer[] = []; const planFilesResults: z.infer[] = []; - const costsByStep: { - total: ReturnType; - unknown: ReturnType; - } & Record> = { - total: { - completionPrice: 0, - promptPrice: 0, - totalPrice: 0, - }, - unknown: { - completionPrice: 0, - promptPrice: 0, - totalPrice: 0, - }, + const accepted: z.infer[] = []; + + const discarded: z.infer[] = []; + + const files = { + accepted, + discarded, }; type DurationSample = { @@ -66,6 +106,8 @@ export function resultsCollector(deps = { actions }) { const samples: DurationSample[] = []; + const usage = startCollectingLlmUsage(); + const subscription = deps .actions() .pipe( @@ -74,7 +116,7 @@ export function resultsCollector(deps = { actions }) { acceptedEdit, discardedEdit, planFilesCompleted, - gptRequestSuccess, + executionStarted, executionTiming ) ) @@ -85,53 +127,40 @@ export function resultsCollector(deps = { actions }) { checkoutResults.push(event.data); break; case 'acceptedEdit': - mutateToMergeRefactorRecords({ - from: { - [event.data.filePath]: [event.data], - }, - into: files.accepted, - }); + accepted.push(event.data); break; case 'discardedEdit': - mutateToMergeRefactorRecords({ - from: { - [event.data.filePath]: [event.data], - }, - into: files.discarded, - }); + discarded.push(event.data); break; case 'planFilesCompleted': planFilesResults.push(event.data); break; - case 'gptRequestSuccess': + case 'executionStarted': { - const price = calculatePrice({ - ...event.data.response, - model: event.data.model, - }); + if (event.data.name === 'checkout-sandbox') { + return; + } - const steps = explainCacheKey(event.data.key) ?? [ - { name: 'unknown', hash: 'unknown' }, - ]; + const hashes = scanForCommitHashes( + event.data, + checkoutResults + .map((r) => r.startCommit) + .concat(['diffHash', 'filesDiffHash']) + ); - steps.push({ name: 'total', hash: 'unknown' }); - steps.forEach(({ name }) => { - const current = costsByStep[name] ?? { - completionPrice: 0, - promptPrice: 0, - totalPrice: 0, - }; - - costsByStep[name] = { - completionPrice: - current.completionPrice + - price.completionPrice, - promptPrice: - current.promptPrice + price.promptPrice, - totalPrice: - current.totalPrice + price.totalPrice, - }; - }); + if (hashes.length > 0) { + logger.warn( + line` + Found commit hash in the input of a + cached function "${event.data.name}": + "${hashes.join(', ')}", this is likely + a mistake, because commits made at a + different time will result in different + hashes, which will invalidate the cache + and cause the function to run again. + ` + ); + } } break; case 'executionTiming': @@ -161,72 +190,78 @@ export function resultsCollector(deps = { actions }) { }); const finalizePerformance = () => { - const totalDuration = performance.now() - timestamp; + const totalDurationMs = performance.now() - timestamp; type DurationSummary = { - duration: number; + durationMs: number; }; - const durationByStep = samples.reduce< + const durationMsByStep = samples.reduce< { total: DurationSummary } & Record >( (acc, { duration, name, level }) => { const existing = acc[name] ?? { - duration: 0, + durationMs: 0, }; if (level === 1) { - acc.total.duration += duration; + acc.total.durationMs += duration; } return { ...acc, [name]: { - duration: duration + existing.duration, + durationMs: duration + existing.durationMs, }, }; }, { total: { - duration: 0, + durationMs: 0, }, unknown: { - duration: 0, + durationMs: 0, }, } ); return { - totalDuration, - durationByStep, + totalDurationMs, + durationMsByStep, }; }; return { teardown: () => { subscription.unsubscribe(); + usage.finishCollecting(); }, - finalizeResults: (error?: Error) => { + finalizeResults: ( + config: RefactorConfig, + error?: Error + ): z.output => { if (error) { if (!hasOneElement(checkoutResults)) { throw error; } return { status: 'failure' as const, - error, + error: extractErrorInfo(error), + objective: config.objective, ...ensureHasOneElement(checkoutResults)[0], ...files, planFilesResults, - costsByStep, + usage: usage.getUsage(), performance: finalizePerformance(), }; } else { return { status: 'success' as const, + objective: config.objective, ...ensureHasOneElement(checkoutResults)[0], ...files, planFilesResults, - costsByStep, + usage: usage.getUsage(), performance: finalizePerformance(), }; } diff --git a/packages/refactor-bot/src/refactor/retrieveParameters.ts b/packages/refactor-bot/src/refactor/retrieveParameters.ts index 8cd7e58..118f57f 100644 --- a/packages/refactor-bot/src/refactor/retrieveParameters.ts +++ b/packages/refactor-bot/src/refactor/retrieveParameters.ts @@ -1,123 +1,50 @@ import { z } from 'zod'; -import { makeCachedFunction } from '../cache/makeCachedFunction'; -import { markdown } from '../markdown/markdown'; -import { format } from '../text/format'; -import { determineModelParameters } from './determineModelParameters'; -import { validateAndParseListOfFiles } from './parsers/validateAndParseListOfFiles'; -import { prompt } from './prompt'; -import { refactorConfigSchema } from './types'; - -export const retrieveParametersInputSchema = refactorConfigSchema - .pick({ - objective: true, - budgetCents: true, - model: true, - modelByStepCode: true, - useMoreExpensiveModelsOnRetry: true, - scope: true, - tsConfigJsonFileName: true, - allowedFunctions: true, - }) - .augment({ - sandboxDirectoryPath: z.string(), +import type { CacheStateRef } from '../cache/types'; +import { extractRequirements } from '../evaluate/extractRequirements'; +import { + determineFilesToEdit, + determineFilesToEditResultSchema, +} from './filesToEdit'; +import { refactorConfigPromptOptsSchema } from './prompt'; + +export const retrieveParametersInputSchema = + refactorConfigPromptOptsSchema.augment({ + objective: z.string(), + filesToEdit: z.array(z.string()).nonempty().optional(), }); -export const retrieveParametersResultSchema = z.object({ - /** - * List of files allowed to be edited or refactored when mentioned in the - * objective, in the same order as mentioned in the objective. - * - * If the objective does not mention any files, this field is empty. - */ - filesToEdit: z.array(z.string()), -}); - -export type RetrieveParametersResponse = z.infer< - typeof retrieveParametersResultSchema ->; - -const systemPrompt = markdown` - Think step by step. Be concise and to the point. Do not make assumptions - other than what was given in the instructions. -`; - -const listFilesToEditPromptText = (objective: string) => - format( - markdown` - %objective% - - Given the above objective that implies modifications to source code, - follow the steps below to determine the list of files the user wants - us to edit or refactor. - - 1. Check the objective for mentions of files to be changed, edited - or refactored. Could be a single file or multiple files. - - 2. If there is no mention of files respond "No mention of files to - be edited or refactored". - - 3. If the resulting list from step #1 is not empty, format it - - return one file path per line in your response. File paths should - be surrounded by a backtick. File paths should be relative to - repository root. The result must be a numbered list in the - format: - - #. \`path/to/file.ts\` - - #. \`path/to/another/file.ts\` +export const retrieveParametersResultSchema = + determineFilesToEditResultSchema.augment({ + requirements: z.array(z.string()).nonempty(), + }); - The number of each entry must be followed by a period. Do not prefix - the list of files with any text. Do not follow the list of files - with any other text. If the objective asks to edit in a specific - order, retain the order. - `, +export const retrieveParameters = async ( + input: z.input, + ctx?: CacheStateRef +) => { + const { filesToEdit } = input.filesToEdit + ? { + filesToEdit: input.filesToEdit, + } + : await determineFilesToEdit(input, ctx); + + const { choices } = await extractRequirements( { - objective, - } + ...input, + choices: 2, + }, + ctx ); -export const retrieveParameters = makeCachedFunction({ - name: 'retrieve-parameters', - inputSchema: retrieveParametersInputSchema, - resultSchema: retrieveParametersResultSchema, - transform: async (input, ctx) => { - const allowedFilesResult = await prompt( - { - preface: systemPrompt, - prompt: listFilesToEditPromptText(input.objective), - temperature: 0.2, - budgetCents: input.budgetCents, - functionsConfig: { - repositoryRoot: input.sandboxDirectoryPath, - scope: input.scope, - tsConfigJsonFileName: input.tsConfigJsonFileName, - /** - * @note we do not want to allow any functions for this prompt - */ - allowedFunctions: [], - }, - ...determineModelParameters(input, ctx), - shouldStop: async (message) => { - await validateAndParseListOfFiles({ - sandboxDirectoryPath: input.sandboxDirectoryPath, - text: message.content, - sortBySize: false, - }); - return true as const; - }, - }, - ctx - ); - - const filesToEdit = await validateAndParseListOfFiles({ - sandboxDirectoryPath: input.sandboxDirectoryPath, - text: allowedFilesResult.choices[0].resultingMessage.content, - sortBySize: false, - }); + const requirements = choices.reduce( + (acc, choice) => + choice.requirements.length > acc.length ? acc : choice.requirements, + choices[0].requirements + ); - return { - filesToEdit, - }; - }, -}); + return { + filesToEdit, + requirements, + }; +}; diff --git a/packages/refactor-bot/src/refactor/runRefactor.ts b/packages/refactor-bot/src/refactor/runRefactor.ts index d522206..74ff7aa 100644 --- a/packages/refactor-bot/src/refactor/runRefactor.ts +++ b/packages/refactor-bot/src/refactor/runRefactor.ts @@ -12,12 +12,14 @@ import { formatObject } from '../logger/formatObject'; import { glowFormat } from '../markdown/glowFormat'; import { markdown } from '../markdown/markdown'; import { goToEndOfFile } from '../prompt/editor'; +import { formatFencedCodeBlock } from '../prompt-formatters/formatFencedCodeBlock'; +import { formatOptional } from '../prompt-formatters/formatOptional'; import { format } from '../text/format'; -import { line } from '../text/line'; import { hasOneElement } from '../utils/hasOne'; +import { summarizeLlmUsagePrice } from './collectLlmUsage'; import { loadRefactorConfigs } from './loadRefactors'; import { refactor } from './refactor'; -import type { RefactorConfig } from './types'; +import { type RefactorConfig, summarizeRefactorFilesResult } from './types'; const validateUsingSchema = (schema: T) => @@ -128,6 +130,15 @@ async function determineConfig(opts: { ); } + if (opts.id) { + return { + config: { + ...config, + id: opts.id, + }, + }; + } + return { config, }; @@ -177,14 +188,19 @@ const currentRepositoryRefactoringReport = async ( successBranch: string; } ) => { - const { accepted, discarded, successBranch, sandboxDirectoryPath } = opts; + const { successBranch, sandboxDirectoryPath } = opts; - const perFile = Object.entries(accepted) + const { accepted, discarded } = summarizeRefactorFilesResult(opts); + + const perFile = Object.entries(accepted.resultsByFilePaths) .flatMap(([file, results]) => { - const firstCommit = results[0]?.steps[0]?.commit?.substring(0, 7); + const firstCommit = results[0]?.file.steps[0]?.commit?.substring( + 0, + 7 + ); const lastCommit = results[ results.length - 1 - ]?.lastCommit?.substring(0, 7); + ]?.file.lastCommit?.substring(0, 7); if (!firstCommit || !lastCommit) { return []; } @@ -194,22 +210,25 @@ const currentRepositoryRefactoringReport = async ( }) .join('\n'); - const firstCommits = Object.entries(discarded) + const firstCommits = Object.entries(discarded.resultsByFilePaths) .flatMap(([file, results]) => { - const firstCommit = results[0]?.steps[0]?.commit?.substring(0, 7); + const firstCommit = results[0]?.file.steps[0]?.commit?.substring( + 0, + 7 + ); if (!firstCommit) { - return [`# ${file}\n# No commits found for this file`]; + return []; } return [`# ${file}\ngit cherry-pick -n ${firstCommit}`]; }) .join('\n'); const failedToRefactor = - Object.keys(discarded).length > 0 + Object.keys(discarded.resultsByFilePaths).length > 0 ? markdown` - Failed to refactor: + Attempted to refactor: - ${Object.keys(discarded) + ${Object.keys(discarded.resultsByFilePaths) .map((file, i) => `${i + 1}. \`${file}\``) .join('\n')} ` @@ -219,9 +238,9 @@ const currentRepositoryRefactoringReport = async ( if (Object.keys(discarded).length > 0 && firstCommits) { firstCommitsOfFailures = format( markdown` - ## First commits of failed files + ## Change attempts that failed - These are least invasive commits focused on the goal which + These should be least invasive commits focused on the goal which didn't pass checks. You can try to fix them manually. ~~~sh @@ -232,7 +251,7 @@ const currentRepositoryRefactoringReport = async ( ); } - const successfullyRefactored = Object.keys(accepted) + const successfullyRefactored = Object.keys(accepted.resultsByFilePaths) .map((file, i) => `${i + 1}. \`${file}\``) .join('\n'); @@ -300,20 +319,45 @@ const currentRepositoryRefactoringReport = async ( const currentRepositoryFailedRefactoringReport = async ( opts: RefactorResult ) => { - const { discarded, sandboxDirectoryPath } = opts; + const { sandboxDirectoryPath } = opts; - const firstCommits = Object.entries(discarded) + const { discarded } = summarizeRefactorFilesResult(opts); + + const firstCommits = Object.entries(discarded.resultsByFilePaths) .flatMap(([file, results]) => { - const firstCommit = results[0]?.steps[0]?.commit?.substring(0, 7); + const firstCommit = results[0]?.file.steps[0]?.commit?.substring( + 0, + 7 + ); if (!firstCommit) { - return [`# ${file}\n# No commits found for this file`]; + return []; } return [`# ${file}\ngit cherry-pick -n ${firstCommit}`]; }) .join('\n'); - const failedToRefactor = Object.keys(discarded) - .map((file, i) => `${i + 1}. \`${file}\``) + const failedToRefactor = Object.entries(discarded.resultsByFilePaths) + .map(([file, results], i) => { + const lastResult = results[results.length - 1]?.file; + if (lastResult?.status === 'failure') { + return `${i + 1}. \`${file}\` - ${ + lastResult.failureDescription + }`; + } else if (lastResult?.status === 'success') { + if ( + lastResult.steps.length === 1 && + !lastResult.steps[0]?.commit + ) { + return `${ + i + 1 + }. \`${file}\` - the file appears to not require any changes`; + } else { + return `${i + 1}. \`${file}\``; + } + } else { + return `${i + 1}. \`${file}\``; + } + }) .join('\n'); return format( @@ -326,22 +370,27 @@ const currentRepositoryFailedRefactoringReport = async ( \`$sandboxDirectoryPath$\` - Failed to refactor: + Attempted to refactor: %failedToRefactor% - ## First commits of failed files - - These are least invasive commits focused on the goal which - didn't pass checks. You can try to fix them manually. - - ~~~sh %firstCommits% - ~~~ `, { failedToRefactor, - firstCommits, + firstCommits: formatOptional({ + text: formatFencedCodeBlock({ + code: firstCommits, + language: 'sh', + }), + heading: markdown` + ## Change attempts that failed + + These should be least invasive commits focused on + the goal which didn't pass checks. You can try to + fix them manually. + `, + }), } ), }), @@ -395,7 +444,7 @@ const currentRepositoryPlanningEmptyReasonReport = async ( ); }; -const unhandledErrorReport = async (opts: { error: Error }) => { +const unhandledErrorReport = async (opts: { error: unknown }) => { console.log( await glowFormat({ input: format( @@ -407,9 +456,14 @@ const unhandledErrorReport = async (opts: { error: Error }) => { ~~~ `, { - errorDetails: formatObject(extractErrorInfo(opts.error), { - indent: '', - }), + errorDetails: formatObject( + opts.error instanceof Error + ? extractErrorInfo(opts.error) + : (opts.error as object), + { + indent: '', + } + ), } ), }) @@ -448,26 +502,18 @@ const analyticsReport = async (opts: { performance: boolean; }) => { if (opts.costs) { - const total = opts.result.costsByStep.total.totalPrice; - const costs = Object.entries(opts.result.costsByStep) - .map(([step, costs]) => ({ + const totalCost = summarizeLlmUsagePrice({ + usage: opts.result.usage, + }); + const costs = Array.from(totalCost.priceBySteps).map( + ([step, costs]) => ({ step, costs, - })) - .filter((data) => data.step !== 'total' && data.step !== 'unknown'); + }) + ); const orderedCosts = orderBy(costs, ['costs.totalPrice'], ['desc']); - const warning = - total === 0 - ? '\n' + - line` - **NOTE** The costs cannot be collected if we cache high - level steps. Try running the refactor with following - options: \`--enable-cache-for chat exec check auto-fix\` - ` - : ``; - console.log( await glowFormat({ input: format( @@ -477,11 +523,9 @@ const analyticsReport = async (opts: { Total cost: %total% USD %costByStep% - - %warning% `, { - total: total.toFixed(3), + total: totalCost.totalPrice.toFixed(3), costByStep: orderedCosts .filter( /** @@ -500,7 +544,6 @@ const analyticsReport = async (opts: { }) ) .join('\n'), - warning, } ), }) @@ -510,14 +553,14 @@ const analyticsReport = async (opts: { if (opts.performance) { const perf = opts.result.performance; - const perfEntries = Object.entries(perf.durationByStep) + const perfEntries = Object.entries(perf.durationMsByStep) .map(([step, data]) => ({ step, - duration: data.duration, + durationMs: data.durationMs, })) .filter((data) => data.step !== 'total' && data.step !== 'unknown'); - const orderedDurations = orderBy(perfEntries, ['duration'], ['desc']); + const orderedDurations = orderBy(perfEntries, ['durationMs'], ['desc']); console.log( await glowFormat({ @@ -531,12 +574,7 @@ const analyticsReport = async (opts: { `, { totalDuration: - (perf.totalDuration / 1000).toFixed(3) + ' sec', - - measuredDuration: - (perf.durationByStep.total.duration / 1000).toFixed( - 3 - ) + ' sec', + (perf.totalDurationMs / 1000).toFixed(3) + ' sec', durationByStep: orderedDurations .filter( @@ -553,7 +591,7 @@ const analyticsReport = async (opts: { format(`- %step% - %duration%`, { step: data.step.padEnd(20, ' '), duration: - (data.duration / 1000).toFixed(3) + + (data.durationMs / 1000).toFixed(3) + ' sec', }) ) @@ -570,9 +608,9 @@ export async function runRefactor(opts: { name?: string; saveToCache?: boolean; enableCacheFor?: string[]; + disableCacheFor?: string[]; costs?: boolean; performance?: boolean; - // }) { try { const configs = await loadRefactorConfigs(); @@ -586,6 +624,7 @@ export async function runRefactor(opts: { config, saveToCache: opts.saveToCache, enableCacheFor: opts.enableCacheFor, + disableCacheFor: opts.disableCacheFor, }); if (result.status === 'failure') { @@ -607,6 +646,7 @@ export async function runRefactor(opts: { ); } } + await analyticsReport({ result, costs: opts.costs ?? true, diff --git a/packages/refactor-bot/src/refactor/types.ts b/packages/refactor-bot/src/refactor/types.ts index 916d322..094b8f3 100644 --- a/packages/refactor-bot/src/refactor/types.ts +++ b/packages/refactor-bot/src/refactor/types.ts @@ -23,6 +23,18 @@ export const refactorConfigSchema = z.object({ */ objective: z.string(), + /** + * List of files to edit or refactor - the refactoring will be done in the + * same order as the files are specified here. File names should be relative + * to the repository root. + * + * When not explicitly specified, the files are determined from the + * objective. When the objective doesn't explicitly state the files to be + * edited or refactored, the files are determined automatically by the LLM + * by analyzing the objective and contents of the repository. + */ + filesToEdit: z.array(z.string()).nonempty().optional(), + /** * Name of the `tsconfig.json` file to use for the refactor, defaults * to `tsconfig.json`. In mono-repos scenarios this will affect the name @@ -168,6 +180,22 @@ export const refactorConfigSchema = z.object({ }) .optional(), + /** + * Whether to evaluate every file before accepting it as success and + * committing it to the repository. + * + * This will lead to a slower refactor, but will ensure that the files + * reported as refactored are actually valid and lead to a reported outcome + * that we can rely on. + */ + evaluate: z.boolean().optional().default(true), + + /** + * Minimal score to accept a file as refactored, only used when the + * `evaluate` option is enabled. Defaults to 0.5. + */ + evaluateMinScore: z.number().optional().default(0.5), + /** * Unique identifier of the refactor, used to identify and restore * the refactor state and finding the right sandbox when running @@ -178,23 +206,39 @@ export const refactorConfigSchema = z.object({ export type RefactorConfig = z.input; +export const issueSchema = z.object({ + command: z.string(), + issue: z.string(), + filePath: z.string(), + commit: z.string(), + code: z.string().optional(), +}); + +export const checkSummarySchema = z.object({ + newIssues: z.array(issueSchema), + remainingIssues: z.array(issueSchema), + resolvedIssues: z.array(issueSchema), + totalNumberOfIssues: z.number(), +}); + export const refactorStepResultSchema = z.object({ task: z.string(), + key: z.string().optional(), fileContents: z.string(), - commit: z.string(), + commit: z.string().optional(), timestamp: z.number(), + checkSummary: checkSummarySchema.optional(), }); -export type RefactorStepResult = z.infer; - -export const issueSchema = z.object({ - command: z.string(), - issue: z.string(), - filePath: z.string(), +export const refactorStepEmptyResultSchema = z.object({ + task: z.string(), commit: z.string(), - code: z.string().optional(), + timestamp: z.number(), + checkSummary: checkSummarySchema, }); +export type RefactorStepResult = z.infer; + export const refactorSuccessResultSchema = z.object({ status: z.literal('success'), filePath: z.string(), @@ -221,107 +265,70 @@ export const refactorResultSchema = z.discriminatedUnion('status', [ export type RefactorResult = z.infer; +export const llmUsageEntrySchema = z.object({ + model: z.string(), + steps: z.array(z.string()), + usage: z.object({ + promptTokens: z.number(), + completionTokens: z.number(), + totalTokens: z.number(), + }), +}); + +export const evaluateFileScoreSchema = z.object({ + key: z.string().optional(), + score: z.number(), +}); + export const refactorFileResultSchema = z.object({ + /** + * Optional key to identify the file in the cache - for diagnostic + * purposes only. + */ + key: z.string().optional(), file: refactorResultSchema, + usage: z.array(llmUsageEntrySchema), + evaluation: evaluateFileScoreSchema.optional(), }); export type RefactorFileResult = z.infer; -export const refactorFilesRecordSchema = z.record( - z.string(), - z.array(refactorResultSchema) -); - -export type RefactorResultByFilePathRecord = z.infer< - typeof refactorFilesRecordSchema ->; - export const refactorFilesResultSchema = z.object({ - accepted: z.record(z.string(), z.array(refactorResultSchema)), - discarded: z.record(z.string(), z.array(refactorResultSchema)), + accepted: z.array(refactorFileResultSchema), + discarded: z.array(refactorFileResultSchema), }); export type RefactorFilesResult = z.infer; -export const lastCommit = (steps: T[]) => { - return steps[steps.length - 1]?.commit; -}; - -export const lastTimestamp = (steps: T[]) => { - return steps[steps.length - 1]?.timestamp; -}; - -export const pushRefactorFileResults = (opts: { - result: RefactorResult; - into: RefactorResultByFilePathRecord; -}) => { - const array = opts.into[opts.result.filePath]; - if (array) { - array.push(opts.result); - } else { - opts.into[opts.result.filePath] = [opts.result]; +export const firstCommit = (steps: T[]) => { + const first = steps[0]; + if (!first) { + return undefined; } + return first.commit; }; -export const mutateToMergeRefactorRecords = (opts: { - from: RefactorResultByFilePathRecord; - into: RefactorResultByFilePathRecord; -}) => { - for (const [file, tasks] of Object.entries(opts.from)) { - const existing = opts.into[file]; - if (existing) { - opts.into[file] = existing.concat(tasks); - } else { - opts.into[file] = tasks; - } +export const lastCommit = ( + steps: T[] +) => { + const last = steps.filter((step) => step.commit || step.lastCommit)[ + steps.length - 1 + ]; + if (!last) { + return undefined; } + return last.commit || last.lastCommit; }; -const moveRefactorFileResults = (opts: { - filePath: string; - into: RefactorResultByFilePathRecord; - from: RefactorResultByFilePathRecord; -}) => { - opts.into[opts.filePath] = (opts.from[opts.filePath] || []).concat( - opts.into[opts.filePath] || [] - ); - delete opts.from[opts.filePath]; -}; - -export const mutateToMergeRefactorFilesResults = (opts: { - from: RefactorFilesResult; - into: RefactorFilesResult; -}) => { - mutateToMergeRefactorRecords({ - from: opts.from.accepted, - into: opts.into.accepted, - }); - mutateToMergeRefactorRecords({ - from: opts.from.discarded, - into: opts.into.discarded, - }); - - /** - * @note if we get a file in discarded list after previously - * getting a file in accepted list, we move the file to the - * discarded list - */ - for (const filePath of Object.keys(opts.into.discarded)) { - const accepted = opts.into.accepted[filePath]; - if (accepted) { - moveRefactorFileResults({ - filePath, - from: opts.into.accepted, - into: opts.into.discarded, - }); - } - } +export const lastTimestamp = (steps: T[]) => { + return steps[steps.length - 1]?.timestamp; }; export type Issue = z.infer; export const checkIssuesResultSchema = z.object({ checkedFiles: z.array(z.string()).optional(), + commands: z.array(z.string()), issues: z.array( z.object({ command: z.string(), @@ -333,3 +340,64 @@ export const checkIssuesResultSchema = z.object({ }); export type CheckIssuesResult = z.infer; + +export const summarizeRefactorFileResultArray = ( + results: Array +) => { + const filePaths = results.map((result) => result.file.filePath); + return { + resultsByFilePaths: Object.fromEntries( + filePaths.map((filePath) => [ + filePath, + results.filter((result) => result.file.filePath === filePath), + ]) + ), + usageByFilePaths: Object.fromEntries( + filePaths.map((filePath) => [ + filePath, + results + .filter((result) => result.file.filePath === filePath) + .flatMap((result) => result.usage), + ]) + ), + }; +}; + +export const summarizeRefactorFilesResult = ( + results: RefactorFilesResult, + opts?: { + filterOutDuplicatesFromDiscarded?: boolean; + } +) => { + const accepted = summarizeRefactorFileResultArray(results.accepted); + const discarded = summarizeRefactorFileResultArray(results.discarded); + + /** + * The algorithm is built in a way that it relies on "planFiles" to + * return a list of files to be refactored. This means that if a file + * was already refactored, but for some reason the refactoring produced + * an outcome that "planFiles" wants to still refactor, the file will + * be refactored again - which will cause an infinite loop, which will be + * safely detected by the algorithm and the refactoring for that file will + * be aborted. + * + * This will cause some "accepted" files to be duplicated in the "discarded" + * list, which we filter out here for cleaner UX purposes - the users do not + * see the "discarded" commits anyway. + */ + if (opts?.filterOutDuplicatesFromDiscarded ?? true) { + const discardedFilePaths = Object.keys(discarded.resultsByFilePaths); + const duplicatedFilePaths = discardedFilePaths.filter( + (filePath) => filePath in accepted.resultsByFilePaths + ); + duplicatedFilePaths.forEach((filePath) => { + delete discarded.resultsByFilePaths[filePath]; + delete discarded.usageByFilePaths[filePath]; + }); + } + + return { + accepted, + discarded, + }; +}; diff --git a/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.test.ts b/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.test.ts new file mode 100644 index 0000000..a2f2def --- /dev/null +++ b/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.test.ts @@ -0,0 +1,142 @@ +import { expect, it } from '@jest/globals'; +import dedent from 'dedent'; + +import { markdown } from '../markdown/markdown'; +import { parseFencedCodeBlocks } from './parseFencedCodeBlocks'; + +it('should return empty array when no blocks', () => { + expect(parseFencedCodeBlocks(`Hello world`)).toEqual([]); +}); + +it('should parse single block', () => { + expect( + parseFencedCodeBlocks(markdown` + ~~~TypeScript + // some code + ~~~ + `) + ).toEqual([ + { + code: ` // some code\n`, + language: 'TypeScript', + }, + ]); +}); + +it('should be allowed without language tag', () => { + expect( + parseFencedCodeBlocks(markdown` + ~~~ + unknown language + ~~~ + `) + ).toEqual([ + { + code: 'unknown language\n', + }, + ]); +}); + +it('should allow empty code block', () => { + expect( + parseFencedCodeBlocks(dedent` + ~~~txt + ~~~ + `) + ).toEqual([ + { + code: '', + language: 'txt', + }, + ]); +}); + +it('should parse backtick block', () => { + expect( + parseFencedCodeBlocks(dedent` + \`\`\`md + Hello! + \`\`\` + `) + ).toEqual([ + { + code: `Hello!\n`, + language: 'md', + }, + ]); +}); + +it('should parse multiple blocks', () => { + expect( + parseFencedCodeBlocks(dedent` + Confirm the file is there: + \`\`\`sh + ls . + \`\`\` + + Then execute: + \`\`\`sh + cat file.txt + \`\`\` + `) + ).toEqual([ + { + code: `ls .\n`, + language: 'sh', + }, + { + code: `cat file.txt\n`, + language: 'sh', + }, + ]); +}); + +it('should not trigger internal code blocks', () => { + expect( + parseFencedCodeBlocks(markdown` + ~~~TypeScript + const value = /* yaml */\` + \`\`\`TypeScript + // some code + \`\`\` + \` + ~~~ + `) + ).toEqual([ + { + code: dedent/* ts */ ` + const value = /* yaml */\` + \`\`\`TypeScript + // some code + \`\`\` + \`\n + `, + language: 'TypeScript', + }, + ]); +}); + +it('should not trigger other internal code blocks on the same level', () => { + expect( + parseFencedCodeBlocks(dedent` + ~~~~TypeScript + const value = /* ts */\` + ~~~TypeScript + // some code + ~~~ + \` + ~~~~ + `) + ).toEqual([ + { + code: dedent` + const value = /* ts */\` + ~~~TypeScript + // some code + ~~~ + \`\n + `, + language: 'TypeScript', + }, + ]); +}); diff --git a/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.ts b/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.ts new file mode 100644 index 0000000..8ca8d30 --- /dev/null +++ b/packages/refactor-bot/src/response-parsers/parseFencedCodeBlocks.ts @@ -0,0 +1,60 @@ +import { line } from '../text/line'; +import { escapeRegExp } from '../utils/escapeRegExp'; +import { firstLineOf } from '../utils/firstLineOf'; + +export function parseFencedCodeBlocks(text: string) { + const results: Array<{ + code: string; + language?: string; + }> = []; + + const openingMarkerRegex = + /^(?(```*)|(~~~*))(?\w+)?\n/gm; + + let openingRes = openingMarkerRegex.exec(text); + while (openingRes) { + const { openingMarker, language } = openingRes.groups as { + openingMarker: string; + language?: string; + }; + + const closingMarkerRegex = new RegExp( + `^${escapeRegExp(openingMarker)}$`, + 'gm' + ); + + closingMarkerRegex.lastIndex = openingMarkerRegex.lastIndex; + const closingRes = closingMarkerRegex.exec(text); + + if (!closingRes) { + const firstLineOfCode = firstLineOf( + text.slice(openingMarkerRegex.lastIndex) + ); + throw new Error( + line` + Could not find closing fenced code block marker + "${openingMarker}" for opening marker at index + ${openingMarkerRegex.lastIndex} which starts as + "${openingRes[0]}↩︎${firstLineOfCode}" + ` + ); + } + + const code = text.slice( + openingMarkerRegex.lastIndex, + closingMarkerRegex.lastIndex - closingRes[0].length + ); + + results.push({ + code, + ...(language && { + language, + }), + }); + + openingMarkerRegex.lastIndex = closingMarkerRegex.lastIndex; + openingRes = openingMarkerRegex.exec(text); + } + + return results; +} diff --git a/packages/refactor-bot/src/response-parsers/parseJsonResponse.test.ts b/packages/refactor-bot/src/response-parsers/parseJsonResponse.test.ts new file mode 100644 index 0000000..6b86080 --- /dev/null +++ b/packages/refactor-bot/src/response-parsers/parseJsonResponse.test.ts @@ -0,0 +1,58 @@ +import { expect, it } from '@jest/globals'; +import dedent from 'dedent'; +import { z } from 'zod'; + +import { parseJsonResponse } from './parseJsonResponse'; + +it('should return parsed response', () => { + expect( + parseJsonResponse( + dedent` + \`\`\`json + { + "summary": "The objective was not achieved. The algorithm did not replace 'readFile' from 'fs/promises' with 'readFileSync' from 'fs'. Instead, it redefined 'readFile' within the 'defaultDeps' object to call itself recursively without any changes, which will lead to a stack overflow if executed.", + "requirements": [ + { + "description": "Replace all usages of 'readFile' from 'fs/promises' with 'readFileSync' from 'fs'", + "isObjective": true, + "satisfied": false + }, + { + "description": "The code produced is minimal and doesn't make modifications which are unnecessary or not requested by the user", + "isObjective": false, + "satisfied": false + } + ] + } + \`\`\` + `, + z.object({ + summary: z.string(), + requirements: z.array( + z.object({ + description: z.string(), + isObjective: z.boolean(), + satisfied: z.boolean(), + }) + ), + }) + ) + ).toEqual({ + summary: + "The objective was not achieved. The algorithm did not replace 'readFile' from 'fs/promises' with 'readFileSync' from 'fs'. Instead, it redefined 'readFile' within the 'defaultDeps' object to call itself recursively without any changes, which will lead to a stack overflow if executed.", + requirements: [ + { + description: + "Replace all usages of 'readFile' from 'fs/promises' with 'readFileSync' from 'fs'", + isObjective: true, + satisfied: false, + }, + { + description: + "The code produced is minimal and doesn't make modifications which are unnecessary or not requested by the user", + isObjective: false, + satisfied: false, + }, + ], + }); +}); diff --git a/packages/refactor-bot/src/response-parsers/parseJsonResponse.ts b/packages/refactor-bot/src/response-parsers/parseJsonResponse.ts new file mode 100644 index 0000000..30e07e7 --- /dev/null +++ b/packages/refactor-bot/src/response-parsers/parseJsonResponse.ts @@ -0,0 +1,52 @@ +import { z, ZodError } from 'zod'; + +import { parseJsonSchema } from '../utils/parseJsonSchema'; +import { parseFencedCodeBlocks } from './parseFencedCodeBlocks'; + +export function parseJsonResponse>( + content: string, + schema: Schema +) { + const json = parseJsonSchema(schema); + + const fencedJson = z + .string() + .transform((content, ctx) => { + try { + const blocks = parseFencedCodeBlocks(content).filter( + (block) => !block.language || block.language === 'json' + ); + + if (blocks.length === 0) { + ctx.addIssue({ + message: 'No fenced code blocks found of json type', + code: z.ZodIssueCode.custom, + }); + } + + return blocks.map((block) => block.code); + } catch (err) { + ctx.addIssue({ + message: String(err), + code: z.ZodIssueCode.custom, + }); + return z.NEVER; + } + }) + .pipe(z.array(json).nonempty()); + + const nonFencedJson = json.transform((response) => [response] as const); + + const schemas = [nonFencedJson, fencedJson]; + const results = schemas.map((schema) => schema.safeParse(content)); + + const successResult = results.find((r) => r.success); + + if (successResult && successResult.success) { + return successResult.data[0]; + } else { + throw new ZodError([ + ...results.flatMap((r) => (r.success ? [] : r.error.issues)), + ]); + } +} diff --git a/packages/refactor-bot/src/utils/hasOne.ts b/packages/refactor-bot/src/utils/hasOne.ts index c67a60b..3017644 100644 --- a/packages/refactor-bot/src/utils/hasOne.ts +++ b/packages/refactor-bot/src/utils/hasOne.ts @@ -28,14 +28,14 @@ export function hasTwoElements(arr: unknown[] | readonly unknown[]): boolean { } export function ensureHasTwoElements< - Arr extends unknown[] | readonly unknown[] + Arr extends unknown[] | readonly unknown[], >(arr: Arr): [Arr[0], Arr[1], ...Arr[number][]]; export function ensureHasTwoElements( arr: Arr ): readonly [Arr[0], Arr[1], ...Arr[number][]]; export function ensureHasTwoElements< - Arr extends unknown[] | readonly unknown[] + Arr extends unknown[] | readonly unknown[], >(arr: Arr) { - assert(hasOneElement(arr)); + assert(hasTwoElements(arr)); return arr; } diff --git a/packages/refactor-bot/src/utils/isFileNotFoundError.ts b/packages/refactor-bot/src/utils/isFileNotFoundError.ts new file mode 100644 index 0000000..52fc8f0 --- /dev/null +++ b/packages/refactor-bot/src/utils/isFileNotFoundError.ts @@ -0,0 +1,5 @@ +export function isFileNotFoundError(err: unknown) { + return Boolean( + typeof err === 'object' && err && 'code' in err && err.code === 'ENOENT' + ); +} diff --git a/packages/refactor-bot/src/utils/parseJsonSchema.ts b/packages/refactor-bot/src/utils/parseJsonSchema.ts new file mode 100644 index 0000000..ab12cb8 --- /dev/null +++ b/packages/refactor-bot/src/utils/parseJsonSchema.ts @@ -0,0 +1,22 @@ +import { z } from 'zod'; + +export function parseJsonSchema(schema: Schema) { + return z + .unknown() + .pipe(z.string()) + .transform((content: string, ctx) => { + try { + return JSON.parse(content) as unknown; + } catch (err) { + const message = + err instanceof Error ? err.message : String(err); + ctx.addIssue({ + message: `Not a valid JSON string - ${message}`, + code: z.ZodIssueCode.custom, + path: ctx.path, + }); + return z.NEVER; + } + }) + .pipe(schema); +} diff --git a/packages/refactor-bot/tsconfig.json b/packages/refactor-bot/tsconfig.json index 57e4c48..a0a2be3 100644 --- a/packages/refactor-bot/tsconfig.json +++ b/packages/refactor-bot/tsconfig.json @@ -3,7 +3,7 @@ "compilerOptions": { "outDir": ".tsc-out", "tsBuildInfoFile": ".tsc-out/.tsbuildinfo", - "noErrorTruncation": true + "noErrorTruncation": false }, "include": ["src/**/*.ts", "src/ts-morph/builtinLibs.cjs"] }