diff --git a/e2e/projects.spec.ts b/e2e/projects.spec.ts index ef13735b..b46829d2 100644 --- a/e2e/projects.spec.ts +++ b/e2e/projects.spec.ts @@ -25,6 +25,6 @@ test("create new project, rename it and delete it", async ({ page }) => { await page.getByTestId("delete-project-button").click() await page.getByTestId("delete-project-popover-button").click() - // If the project was deleted successfully, it redirects to the analytics page + // // If the project was deleted successfully, it redirects to the analytics page await page.waitForURL("**/analytics") }) diff --git a/package-lock.json b/package-lock.json index f2461511..6db71b9c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5270,6 +5270,14 @@ } ] }, + "node_modules/queue-promise": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/queue-promise/-/queue-promise-2.2.1.tgz", + "integrity": "sha512-C3eyRwLF9m6dPV4MtqMVFX+Xmc7keZ9Ievm3jJ/wWM5t3uVbFnGsJXwpYzZ4LaIEcX9bss/mdaKzyrO6xheRuA==", + "engines": { + "node": ">=8.12.0" + } + }, "node_modules/queue-tick": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", @@ -6992,6 +7000,7 @@ "p-queue": "^8.0.1", "postgres": "^3.4.3", "prexit": "^2.2.0", + "queue-promise": "^2.2.1", "rouge": "^1.0.3", "samlify": "^2.8.11", "shared": "*", diff --git a/packages/backend/package.json b/packages/backend/package.json index 64e742bc..bc99d10d 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -35,6 +35,7 @@ "p-queue": "^8.0.1", "postgres": "^3.4.3", "prexit": "^2.2.0", + "queue-promise": "^2.2.1", "rouge": "^1.0.3", "samlify": "^2.8.11", "shared": "*", diff --git a/packages/backend/src/api/v1/evaluations/index.ts b/packages/backend/src/api/v1/evaluations/index.ts index bf9fc112..d44c17c8 100644 --- a/packages/backend/src/api/v1/evaluations/index.ts +++ b/packages/backend/src/api/v1/evaluations/index.ts @@ -6,7 +6,8 @@ import sql from "@/src/utils/db" import Context from "@/src/utils/koa" import Router from "koa-router" import { RunEvent } from "lunary/types" -import PQueue from "p-queue" + +import Queue from "queue-promise" import { PassThrough } from "stream" import { runEval } from "./utils" @@ -31,9 +32,9 @@ evaluations.post( Connection: "keep-alive", }) - const queue = new PQueue({ - concurrency: MAX_PARALLEL_EVALS, - timeout: 10000, + const queue = new Queue({ + concurrent: MAX_PARALLEL_EVALS, + start: true, }) const [{ plan }] = @@ -70,16 +71,17 @@ evaluations.post( for (const variation of variations) { for (const provider of evaluation.providers) { count++ - queue.add(() => - runEval({ + queue.enqueue(async () => { + await runEval({ evaluationId: evaluation.id, promptId: prompt.id, variation, provider, prompt: prompt.messages, checklistId, - }), - ) + }) + console.log(`Task ${count} don with model ${provider.model} done`) + }) } } } @@ -89,17 +91,24 @@ evaluations.post( ctx.status = 200 ctx.body = stream - queue.on("active", () => { - const percentDone = ((count - queue.size) / count) * 100 - console.log(`Active: ${queue.size} of ${count} (${percentDone}%)`) + let done = 0 + + queue.on("dequeue", () => { + done++ + const percentDone = (1 - (count - done) / count) * 100 + console.log(`Active: ${done} of ${count} (${percentDone}%)`) stream.write(JSON.stringify({ percentDone }) + "\n") }) - await queue.onIdle() + console.log(`Queue started with ${count} tasks`) - stream.write(JSON.stringify({ id: evaluation?.id }) + "\n") + queue.on("end", () => { + console.log("Queue is empty now") - stream.end() + stream.write(JSON.stringify({ id: evaluation?.id }) + "\n") + + stream.end() + }) }, ) diff --git a/packages/backend/src/api/v1/evaluations/utils.ts b/packages/backend/src/api/v1/evaluations/utils.ts index 1ffa8d84..fc294fc3 100644 --- a/packages/backend/src/api/v1/evaluations/utils.ts +++ b/packages/backend/src/api/v1/evaluations/utils.ts @@ -117,6 +117,8 @@ export async function runEval({ duration, })} ` + + console.log(`Eval for ${provider.model} passed: ${passed}`) } catch (error: any) { await sql` insert into evaluation_result ${sql({ diff --git a/packages/backend/src/api/v1/radars.ts b/packages/backend/src/api/v1/radars.ts index 8ff2babf..f2e4d0b0 100644 --- a/packages/backend/src/api/v1/radars.ts +++ b/packages/backend/src/api/v1/radars.ts @@ -36,55 +36,55 @@ const DEFAULT_RADARS = [ }, ], }, - { - description: - "Answer potentially contains PII (Personal Identifiable Information)", - negative: true, - view: [ - "AND", - { - id: "type", - params: { - type: "llm", - }, - }, - ], - checks: [ - "AND", - { - id: "pii", - params: { - field: "input", - type: "contains", - entities: ["person", "location", "email", "cc", "phone", "ssn"], - }, - }, - ], - }, - { - description: "Prompt contains PII (Personal Identifiable Information)", - negative: true, - view: [ - "AND", - { - id: "type", - params: { - type: "llm", - }, - }, - ], - checks: [ - "AND", - { - id: "pii", - params: { - field: "input", - type: "contains", - entities: ["person", "location", "email", "cc", "phone", "ssn"], - }, - }, - ], - }, + // { + // description: + // "Answer potentially contains PII (Personal Identifiable Information)", + // negative: true, + // view: [ + // "AND", + // { + // id: "type", + // params: { + // type: "llm", + // }, + // }, + // ], + // checks: [ + // "AND", + // { + // id: "pii", + // params: { + // field: "input", + // type: "contains", + // entities: ["person", "location", "email", "cc", "phone", "ssn"], + // }, + // }, + // ], + // }, + // { + // description: "Prompt contains PII (Personal Identifiable Information)", + // negative: true, + // view: [ + // "AND", + // { + // id: "type", + // params: { + // type: "llm", + // }, + // }, + // ], + // checks: [ + // "AND", + // { + // id: "pii", + // params: { + // field: "input", + // type: "contains", + // entities: ["person", "location", "email", "cc", "phone", "ssn"], + // }, + // }, + // ], + // }, { description: "Contains profanity or toxic language", negative: true, diff --git a/packages/backend/src/checks/index.ts b/packages/backend/src/checks/index.ts index 087da3e8..9a67c693 100644 --- a/packages/backend/src/checks/index.ts +++ b/packages/backend/src/checks/index.ts @@ -348,6 +348,7 @@ export const CHECK_RUNNERS: CheckRunner[] = [ return sql`${field} ${operator} ${textParam}` }, }, + { id: "assertion", async evaluator(run, params) { diff --git a/packages/backend/src/utils/db.ts b/packages/backend/src/utils/db.ts index 79044525..bacff10a 100644 --- a/packages/backend/src/utils/db.ts +++ b/packages/backend/src/utils/db.ts @@ -14,8 +14,8 @@ const sql = postgres(process.env.DATABASE_URL!, { connection: { application_name: `backend-${isProduction ? "production" : "development"}-${new Date().getTime()}`, }, - debug: process.env.DEBUG ? debugFn : () => {}, - onnotice: process.env.DEUG ? console.log : () => {},// TODO: replace `() => {}` by false when porsager/postgres PR is merged + // debug: process.env.DEBUG ? debugFn : () => {}, + // onnotice: process.env.DEUG ? console.log : () => {},// TODO: replace `() => {}` by false when porsager/postgres PR is merged }) function debugFn( diff --git a/packages/backend/src/utils/license.ts b/packages/backend/src/utils/license.ts index 7654941a..cdf38f6f 100644 --- a/packages/backend/src/utils/license.ts +++ b/packages/backend/src/utils/license.ts @@ -22,7 +22,6 @@ async function licenseMiddleware(ctx: Context, next: Next) { try { if (Date.now() - cache.lastFetch > TWO_HOURS) { - console.log("Fetching") const licenseData = await fetch( `https://license.lunary.ai/v1/licenses/${LICENSE_KEY}`, ).then((res) => res.json()) diff --git a/packages/frontend/components/blocks/SettingsCard.tsx b/packages/frontend/components/blocks/SettingsCard.tsx index ca424389..6f52182b 100644 --- a/packages/frontend/components/blocks/SettingsCard.tsx +++ b/packages/frontend/components/blocks/SettingsCard.tsx @@ -1,17 +1,41 @@ import { Card, Stack, Title } from "@mantine/core" +import Paywall from "../layout/Paywall" // so we can have an harmonized title for all cards export function SettingsCard({ title, children, align, + paywallConfig, gap = "lg", }: { title children: React.ReactNode + paywallConfig?: any align?: string gap?: string }) { + if (paywallConfig?.enabled) { + return ( + + + {title} + + {children} + + + + ) + } + return ( diff --git a/packages/frontend/components/checks/ChecksUIData.tsx b/packages/frontend/components/checks/ChecksUIData.tsx index af7e1567..4b9e8444 100644 --- a/packages/frontend/components/checks/ChecksUIData.tsx +++ b/packages/frontend/components/checks/ChecksUIData.tsx @@ -3,15 +3,18 @@ import { IconBiohazard, IconBraces, IconBracketsContainStart, + IconBrain, IconBrandOpenai, IconCalendar, IconCheck, + IconCheckbox, IconCircleLetterT, IconClock, IconCoin, IconCreditCard, IconEyeCheck, IconFilter, + IconFocus, IconHelpCircle, IconHtml, IconIdBadge, @@ -27,6 +30,8 @@ import { IconSearch, IconShieldBolt, IconTag, + IconTarget, + IconTextWrap, IconThumbUp, IconTools, IconUser, @@ -210,6 +215,22 @@ const CHECKS_UI_DATA: ChecksUIData = { icon: IconIdBadge, color: "orange", }, + summarization: { + icon: IconTextWrap, + color: "blue", + }, + "context-recall": { + icon: IconBrain, + color: "blue", + }, + "context-precision": { + icon: IconTarget, + color: "blue", + }, + relevancy: { + icon: IconCheckbox, + color: "green", + }, other: { icon: IconFilter, color: "gray", diff --git a/packages/frontend/components/checks/SmartSelectInput.tsx b/packages/frontend/components/checks/SmartSelectInput.tsx index 2287f620..9d7f22ba 100644 --- a/packages/frontend/components/checks/SmartSelectInput.tsx +++ b/packages/frontend/components/checks/SmartSelectInput.tsx @@ -100,18 +100,23 @@ export default function SmartCheckSelect({ .toLowerCase() .includes(search.trim().toLowerCase()) } - const renderedOptions = data?.filter(optionsFilter).map((item) => ( - - - {value?.includes(getItemValue(item)) ? : null} - {renderListItem ? renderListItem(item) : renderLabel(item)} - - - )) + const renderedOptions = data?.filter(optionsFilter).map((item) => { + const active = multiple + ? fixedValue.includes(getItemValue(item)) + : getItemValue(item) === value + return ( + + + {active ? : null} + {renderListItem ? renderListItem(item) : renderLabel(item)} + + + ) + }) useEffect(() => { if (!value) { @@ -125,6 +130,7 @@ export default function SmartCheckSelect({ combobox.openDropdown()} variant="unstyled" + size="xs" miw={width} w="min-content" > diff --git a/packages/frontend/components/evals/ResultsMatrix.tsx b/packages/frontend/components/evals/ResultsMatrix.tsx index b9dbbe07..d23f5f8a 100644 --- a/packages/frontend/components/evals/ResultsMatrix.tsx +++ b/packages/frontend/components/evals/ResultsMatrix.tsx @@ -68,7 +68,7 @@ function ResultCell({ result }) { <> {result.status === "success" ? ( - + diff --git a/packages/frontend/components/layout/Paywall.tsx b/packages/frontend/components/layout/Paywall.tsx index 4334934e..3afb8394 100644 --- a/packages/frontend/components/layout/Paywall.tsx +++ b/packages/frontend/components/layout/Paywall.tsx @@ -41,27 +41,34 @@ export default function Paywall({ plan, feature, children, + enabled, list, description, Icon, + p, }: { plan: string feature: string description: string - list: string[] + enabled?: boolean + list?: string[] children: React.ReactNode Icon?: React.ComponentType + p?: number }) { const { org } = useOrg() // Automatically disable paywall in these cases if ( - ["custom", "unlimited", plan].includes(org?.plan) || - process.env.NEXT_PUBLIC_DEMO + typeof enabled !== "undefined" + ? !enabled + : ["custom", plan].includes(org?.plan) || process.env.NEXT_PUBLIC_DEMO ) { return children } + const isEnterpriseFeature = plan === "enterprise" + return ( {Icon && } - - {feature} is available in Lunary {capitalize(plan)} + <Title order={3} lh={1}> + { + <Text + span + fw={"inherit"} + fz={"inherit"} + lts={"inherit"} + variant="gradient" + gradient={{ from: "indigo", to: "cyan", deg: 45 }} + > + {feature} + </Text> + } + <span>{` is available ${isEnterpriseFeature ? "as an addon in" : ""} Lunary ${capitalize(plan)}`}</span> - {description} - + {description && {description}} + {list && } diff --git a/packages/frontend/components/layout/Sidebar.tsx b/packages/frontend/components/layout/Sidebar.tsx index 467ca762..712b7b5d 100644 --- a/packages/frontend/components/layout/Sidebar.tsx +++ b/packages/frontend/components/layout/Sidebar.tsx @@ -2,11 +2,15 @@ import { Box, Flex, Menu, NavLink, Stack, Text, ThemeIcon } from "@mantine/core" import { IconActivity, + IconActivityHeartbeat, IconAnalyze, IconBolt, + IconCheckbox, IconChevronRight, IconCreditCard, + IconDatabase, IconFlask2Filled, + IconFlaskFilled, IconHelpOctagon, IconListSearch, IconLogout, @@ -42,6 +46,7 @@ function NavbarLink({ soon, onClick, c, + subMenu, disabled = false, }) { const router = useRouter() @@ -67,10 +72,21 @@ function NavbarLink({ } - /> + > + {subMenu?.map((item) => )} + ) } +type MenuItem = { + label: string + icon: any + link: string + resource: ResourceName + disabled?: boolean + subMenu?: MenuItem[] +} + export default function Sidebar() { const auth = useAuth() const router = useRouter() @@ -89,13 +105,7 @@ export default function Sidebar() { const billingEnabled = process.env.NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY && !config.IS_SELF_HOSTED - const APP_MENU: { - label: string - icon: any - link: string - resource: ResourceName - disabled?: boolean - }[] = [ + const APP_MENU: MenuItem[] = [ { label: "Analytics", icon: IconTimeline, @@ -128,6 +138,32 @@ export default function Sidebar() { link: "/evaluations", resource: "evaluations", disabled: isSelfHosted ? org.license && !org.license.evalEnabled : false, + subMenu: [ + { + label: "Playground", + icon: IconFlaskFilled, + link: "/evaluations/new", + resource: "evaluations", + }, + { + label: "Real-time", + icon: IconActivityHeartbeat, + link: "/evaluations/realtime", + resource: "evaluations", + }, + { + label: "Datasets", + icon: IconDatabase, + link: "/datasets", + resource: "datasets", + }, + { + label: "Checklists", + icon: IconCheckbox, + link: "/evaluations/checklists", + resource: "checklists", + }, + ], }, { label: "Settings & Keys", diff --git a/packages/frontend/components/prompts/TemplateMenu.tsx b/packages/frontend/components/prompts/TemplateMenu.tsx index 2042105f..3a7a1511 100644 --- a/packages/frontend/components/prompts/TemplateMenu.tsx +++ b/packages/frontend/components/prompts/TemplateMenu.tsx @@ -34,7 +34,7 @@ export const defaultTemplateVersion = { { content: "Hi!", role: "user" }, ], extra: { - model: "gpt-4-turbo-preview", + model: "gpt-4-turbo", temperature: 1.0, max_tokens: 1000, }, diff --git a/packages/frontend/pages/evaluations/checklists.tsx b/packages/frontend/pages/evaluations/checklists.tsx index d64bbe3a..b36dd40d 100644 --- a/packages/frontend/pages/evaluations/checklists.tsx +++ b/packages/frontend/pages/evaluations/checklists.tsx @@ -189,7 +189,7 @@ export default function Checklists() { Checklists are collections of assertions that you can use in - evaluations (SDK or Dashboard). + evaluations (Playground or SDK). {loading ? ( diff --git a/packages/frontend/pages/evaluations/index.tsx b/packages/frontend/pages/evaluations/index.tsx index 6e0e1eee..1e6f3322 100644 --- a/packages/frontend/pages/evaluations/index.tsx +++ b/packages/frontend/pages/evaluations/index.tsx @@ -52,7 +52,7 @@ export default function Evaluations() { Evaluations - Alpha + Beta @@ -110,89 +110,6 @@ export default function Evaluations() { - - - History - - - {isLoading ? ( - - ) : ( - <> - {!evaluations?.length ? ( - - ) : ( - - {evaluations.map((evaluation) => ( - - - - - - router.push(`/evaluations/${evaluation.id}`) - } - style={{ cursor: "pointer" }} - > - {evaluation.name} - - - Complete - - - - - {evaluation.models?.map((model, index) => ( - - {model} - - ))} - - - {evaluation.providers?.map((provider, index) => ( - - {provider.model} - - ))} - - - - - - - - - - - ))} - - )} - - )} diff --git a/packages/frontend/pages/evaluations/new.tsx b/packages/frontend/pages/evaluations/new.tsx index 5d4ec906..0c7a0b69 100644 --- a/packages/frontend/pages/evaluations/new.tsx +++ b/packages/frontend/pages/evaluations/new.tsx @@ -1,15 +1,24 @@ import Steps from "@/components/blocks/Steps" import Paywall from "@/components/layout/Paywall" -import { useChecklists, useDatasets, useProject } from "@/utils/dataHooks" +import { + useChecklists, + useDatasets, + useEvaluations, + useProject, + useUser, +} from "@/utils/dataHooks" import { fetcher } from "@/utils/fetcher" import { + Alert, Anchor, Badge, Button, + Card, Container, Group, InputBase, + Loader, Modal, Pill, Progress, @@ -19,13 +28,15 @@ import { Title, Tooltip, } from "@mantine/core" -import { IconFlask2Filled } from "@tabler/icons-react" +import { IconFlask2Filled, IconRefresh, IconTable } from "@tabler/icons-react" import { useRouter } from "next/router" import { useEffect, useRef, useState } from "react" import { ChecklistModal } from "./checklists" import ProviderEditor from "@/components/prompts/Provider" -import { MODELS, Provider } from "shared" +import { MODELS, Provider, hasAccess } from "shared" import { useLocalStorage } from "@mantine/hooks" +import OrgUserBadge from "@/components/blocks/OrgUserBadge" +import Link from "next/link" const FEATURE_LIST = [ "Define assertions to test variations of prompts", @@ -99,6 +110,9 @@ export default function NewEvaluation() { const { datasets, isLoading: datasetsLoading } = useDatasets() const { checklists, loading: checklistsLoading } = useChecklists("evaluation") + const { evaluations, isLoading } = useEvaluations() + const { user } = useUser() + // make sure to only fetch once const ref = useRef({ done: false }) @@ -156,7 +170,10 @@ export default function NewEvaluation() { setLoading(false) } - const canStartEvaluation = datasetId && providers.length > 0 + const canStartEvaluation = + datasetId && + providers.length > 0 && + hasAccess(user.role, "evaluations", "create") return ( - ← Back to Evaluations Evaluation Playground - Alpha + Beta no-code @@ -325,6 +341,89 @@ export default function NewEvaluation() { Start Evaluation + + + History + + + {isLoading ? ( + + ) : ( + <> + {!evaluations?.length ? ( + + ) : ( + + {evaluations.map((evaluation) => ( + + + + + + router.push(`/evaluations/${evaluation.id}`) + } + style={{ cursor: "pointer" }} + > + {evaluation.name} + + + Complete + + + + + {evaluation.models?.map((model, index) => ( + + {model} + + ))} + + + {evaluation.providers?.map((provider, index) => ( + + {provider.model} + + ))} + + + + + + + + + + + ))} + + )} + + )} diff --git a/packages/frontend/pages/evaluations/realtime.tsx b/packages/frontend/pages/evaluations/realtime.tsx new file mode 100644 index 00000000..5d3c574b --- /dev/null +++ b/packages/frontend/pages/evaluations/realtime.tsx @@ -0,0 +1,60 @@ +import Paywall from "@/components/layout/Paywall" +import { useOrg } from "@/utils/dataHooks" + +import { + Badge, + Button, + Container, + Group, + Stack, + Text, + Title, +} from "@mantine/core" +import { IconActivityHeartbeat, IconPlus } from "@tabler/icons-react" + +const FEATURE_LIST = [ + "Real-time LLM-based evaluations on production data", + "Enrich logs, with sentiment analysis, topic recognition, PII detection, and more", + "Use local models like Llama 3 or connect to external APIs", +] + +export default function Checklists() { + const { org } = useOrg() + + return ( + + + + + + Realtime Evaluations + + Enteprise + + + + + + + + + + Run evaluations on your production data in real-time. They can be + used to enrich your data with additional information, such as + sentiment analysis, topic recognition, and more. + + + + + ) +} diff --git a/packages/frontend/pages/radars/index.tsx b/packages/frontend/pages/radars/index.tsx index 35992020..fed57b9f 100644 --- a/packages/frontend/pages/radars/index.tsx +++ b/packages/frontend/pages/radars/index.tsx @@ -385,6 +385,7 @@ export default function Radar() { } - {/* {org.plan === "custom" && ( - Smart Data Exclusion ✨} align="start"> - - Smart Data Exclusion allows you to filter out sensitive data from - your project. Data that matches the filters will not be ingested. - - - ["tools", "tags", "metadata", "users", "pii", "regex"].includes( - f.id, - ) - } - /> + Smart Data Masking ✨} + align="start" + paywallConfig={{ + Icon: IconFilter, + feature: "Smart Data Masking", + p: 12, + plan: "enterprise", + list: [ + "Mask or filter out sensitive data", + "LLM-powered detection or custom regex patterns", + ], + enabled: true, + }} + > + + Smart Data Masking allows you to filter out sensitive data from your + project. + + + ["tools", "tags", "metadata", "users", "pii", "regex"].includes( + f.id, + ) + } + /> - - - - - )} */} + + + + + + Custom Models 🧠} + align="start" + paywallConfig={{ + Icon: IconFilter, + feature: "Custom Models", + p: 12, + plan: "enterprise", + list: [ + "Use custom models for evaluations", + "Add and overwrite costs mappings", + ], + + enabled: true, + }} + > + Add custom models and costs mappings to your project. + + + + + {user && hasAccess(user.role, "projects", "delete") && ( diff --git a/packages/frontend/pages/team.tsx b/packages/frontend/pages/team.tsx index 199fd642..7294d971 100644 --- a/packages/frontend/pages/team.tsx +++ b/packages/frontend/pages/team.tsx @@ -31,6 +31,7 @@ import { IconCopy, IconDotsVertical, IconDownload, + IconLogin, IconTrash, } from "@tabler/icons-react" import { NextSeo } from "next-seo" @@ -56,6 +57,7 @@ import { SettingsCard } from "@/components/blocks/SettingsCard" import { SEAT_ALLOWANCE } from "@/utils/pricing" import { openUpgrade } from "@/components/layout/UpgradeModal" import config from "@/utils/config" +import Paywall from "@/components/layout/Paywall" function SAMLConfig() { const { org, updateOrg, mutate } = useOrg() @@ -106,91 +108,102 @@ function SAMLConfig() { setSpLoading(false) } - return ( - - - SAML configuration - - - 1. Provider your Identity Provider (IDP) Metadata XML. - - - setIdpXml(e.currentTarget.value)} - /> + const samlEnabled = config.IS_SELF_HOSTED + ? org.license.samlEnabled + : org.samlEnabled - - - - - 2. Setup the configuration in your Identity Provider (IDP) - - - - - - Identifier (Entity ID): - - - - - - Assertion Consumer Service (ACS) URL: - - - - - - Single Logout Service (SLO) URL: - - - - - - Sign on URL: - - - - - - Single Logout URL: - - - - - -
+ return ( + + + 1. Provider your Identity Provider (IDP) Metadata XML. + + + setIdpXml(e.currentTarget.value)} + /> -
-
+ + + + 2. Setup the configuration in your Identity Provider (IDP) + + + + + + Identifier (Entity ID): + + + + + + Assertion Consumer Service (ACS) URL: + + + + + + Single Logout Service (SLO) URL: + + + + + + Sign on URL: + + + + + + Single Logout URL: + + + + + +
+ + +
) } @@ -789,9 +802,7 @@ export default function Team() { {hasAccess(user.role, "teamMembers", "create") && } - {samlEnabled && ["admin", "owner"].includes(user.role) && ( - - )} + {["admin", "owner"].includes(user.role) && } ) diff --git a/packages/shared/checks/index.ts b/packages/shared/checks/index.ts index cd952181..737c81e2 100644 --- a/packages/shared/checks/index.ts +++ b/packages/shared/checks/index.ts @@ -582,6 +582,7 @@ export const CHECKS: Check[] = [ }, { id: "pii", + soon: true, name: "PII", uiType: "ai", @@ -635,7 +636,7 @@ export const CHECKS: Check[] = [ name: "Assertion", uiType: "ai", description: - "Checks if the output matches the given requirement, using GPT-4 to grade the output.", + "Checks if the output matches the given requirement, using an LLM to grade the output.", onlyInEvals: true, params: [ { @@ -650,12 +651,58 @@ export const CHECKS: Check[] = [ }, ], }, + { + id: "geval", + name: "G-Eval", + uiType: "ai", + description: + "G-Eval is a framework that uses LLMs with chain-of-thoughts (CoT) to evaluate LLM outputs based on ANY custom criteria", + soon: true, + params: [ + { + type: "label", + label: "G-Eval", + }, + { + type: "text", + id: "criteria", + placeholder: "Is spoken like a pirate", + width: 140, + }, + ], + }, + { + id: "context-precision", + name: "Contextual Precision", + uiType: "ai", + description: + "The contextual precision metric measures your RAG pipeline's retriever by evaluating whether nodes in your context that are relevant to the given input are ranked higher than irrelevant ones.", + soon: true, + params: [], + }, + { + id: "context-recall", + name: "Contextual Recall", + uiType: "ai", + description: + "The contextual recall metric measures the quality of your RAG pipeline's retriever by evaluating the extent of which the context aligns with the expected_output.", + soon: true, + params: [], + }, + { + id: "summarization", + name: "Summarization", + uiType: "ai", + soon: true, + description: + "The summarization metric uses LLMs to determine whether your agent is generating factually correct summaries while including the neccessary details from the original text.", + params: [], + }, { id: "sentiment", name: "Sentiment", uiType: "ai", - description: - "Uses AI to check if content is positive, neutral, or negative.", + description: "Uses AI to detect the sentiment of the given field.", params: [ FIELD_PARAM, { @@ -690,7 +737,7 @@ export const CHECKS: Check[] = [ uiType: "ai", onlyInEvals: true, description: - "Assesses if the tone of the LLM response matches with the desired persona.", + "Assesses if the tone of the response matches with the desired persona.", params: [ { type: "label",