diff --git a/packages/frontend/components/evals/ResultsMatrix.tsx b/packages/frontend/components/evals/ResultsMatrix.tsx index d23f5f8a..60b97027 100644 --- a/packages/frontend/components/evals/ResultsMatrix.tsx +++ b/packages/frontend/components/evals/ResultsMatrix.tsx @@ -1,5 +1,6 @@ import { Badge, + Button, Code, Group, HoverCard, @@ -12,6 +13,7 @@ import { formatCost } from "@/utils/format" import { ChatMessage } from "../SmartViewer/Message" import SmartViewer from "../SmartViewer" import { MODELS, Provider } from "shared" +import { IconFileExport } from "@tabler/icons-react" // We create a matrix of results for each prompt, variable and model. // The matrix is a 3D array, where each dimension represents a different variable, prompt and model. @@ -63,23 +65,25 @@ function ResultDetails({ details }) { ) } -function ResultCell({ result }) { +function ResultCell({ result, showTestIndicator }) { return result ? ( <> {result.status === "success" ? ( - - - - {result.passed ? "Passed" : "Failed"} - - - - - - + {showTestIndicator && ( + + + + {result.passed ? "Passed" : "Failed"} + + + + + + + )} {(+result.duration / 1000).toFixed(2)}s -{" "} @@ -96,12 +100,12 @@ function ResultCell({ result }) { ) } -function AggregateContent({ results }) { +function AggregateContent({ results, showTestIndicator }) { const { passed, failed, duration, cost } = getAggegateForVariation(results) return ( <> - {passed + failed > 1 && ( + {passed + failed > 1 && showTestIndicator && ( JSON.stringify(result.messages))), ).map((result: any) => JSON.parse(result)) @@ -174,8 +178,72 @@ export default function ResultsMatrix({ data }) { ...prompts.map((messages) => getVariableKeysForPrompt(messages).length), ) + function exportToCsv() { + const columns = [ + "Prompt", + "Variable Variation", + "Model", + "Passed", + "Output", + ] + const rows = [] + + prompts.forEach((messages) => { + const variableVariations = getVariableVariationsForPrompt(messages) + variableVariations.forEach((variables) => { + providers.forEach((provider) => { + const result = getResultForPromptVariationProvider( + messages, + variables, + provider, + ) + if (result) { + const textResult = result.error + ? JSON.stringify(result.error) + : result.output?.content + + rows.push([ + JSON.stringify(messages), + JSON.stringify(variables), + provider.model, + result.passed ? "Yes" : "No", + `"${textResult.replace(/"/g, '""')}"`, // Escape double quotes and wrap in double quotes + ]) + } + }) + }) + }) + + const csvContent = [ + columns.join(","), + ...rows.map((row) => row.join(",")), + ].join("\n") + + const blob = new Blob([csvContent], { type: "text/csv;charset=utf-8;" }) + const link = document.createElement("a") + const url = URL.createObjectURL(blob) + link.setAttribute("href", url) + link.setAttribute("download", "results.csv") + link.style.visibility = "hidden" + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + } + return ( - + <> +
@@ -203,6 +271,7 @@ export default function ResultsMatrix({ data }) { compareObjects(result.provider, provider), )} @@ -234,6 +303,7 @@ export default function ResultsMatrix({ data }) { compareObjects(result.messages, messages), )} @@ -265,7 +335,10 @@ export default function ResultsMatrix({ data }) { ) return ( ) })} @@ -275,6 +348,6 @@ export default function ResultsMatrix({ data }) {
- +
-
+ ) } diff --git a/packages/frontend/components/prompts/PromptVariableEditor.tsx b/packages/frontend/components/prompts/PromptVariableEditor.tsx index 8c44acbc..b505f5be 100644 --- a/packages/frontend/components/prompts/PromptVariableEditor.tsx +++ b/packages/frontend/components/prompts/PromptVariableEditor.tsx @@ -41,8 +41,8 @@ export default function PromptVariableEditor({ > ) => void + [key: string]: any } export default function VariableTextarea({ name, value, + w, onChange, ...props }: VariableTextareaProps) { @@ -29,7 +34,7 @@ export default function VariableTextarea({ Edit variable} + title={Edit variable content} overlayProps={{ backgroundOpacity: 0.55, blur: 3, @@ -38,6 +43,7 @@ export default function VariableTextarea({ >