diff --git a/.github/workflows/backend_formatting_check.yml b/.github/workflows/backend_formatting_check.yml new file mode 100644 index 0000000000..d93f108b4e --- /dev/null +++ b/.github/workflows/backend_formatting_check.yml @@ -0,0 +1,36 @@ +name: "Backend Formatting Check" +run-name: "Backend Formatting Check on ${{ github.ref_name }} by @${{ github.actor }}" + +on: + pull_request: + paths: + - "apps/opik-backend/**/*.java" + push: + branches: + - "main" + paths: + - "apps/opik-backend/**/*.java" + + workflow_dispatch: + +jobs: + run-backend-formatting-check: + runs-on: ubuntu-latest + defaults: + run: + working-directory: apps/opik-backend/ + steps: + - name: Checkout + uses: actions/checkout@v4.1.1 + with: + fetch-depth: 1 + + - name: Set up JDK 21 + uses: actions/setup-java@v4 + with: + java-version: "21" + distribution: "corretto" + cache: maven + + - name: Run Formatting Check for backend + run: mvn clean spotless:check diff --git a/.github/workflows/documentation_codeblock_tests.yml b/.github/workflows/documentation_codeblock_tests.yml new file mode 100644 index 0000000000..c9baa2f40f --- /dev/null +++ b/.github/workflows/documentation_codeblock_tests.yml @@ -0,0 +1,87 @@ +name: Documentation - Test codeblocks +on: + workflow_dispatch: + inputs: + install_opik: + description: 'Enable opik installation from source files' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + pull_request: + paths: + - 'apps/opik-documentation/documentation/docs/*.md' + - 'apps/opik-documentation/documentation/docs/*.mdx' + - 'apps/opik-documentation/documentation/docs/**/*.md' + - 'apps/opik-documentation/documentation/docs/**/*.mdx' + +jobs: + collect_test_paths: + runs-on: ubuntu-latest + outputs: + test_paths: ${{ steps.paths.outputs.paths }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for git diff + + - id: paths + working-directory: apps/opik-documentation/documentation + run: | + # Get list of changed files in docs directory + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + # For pull requests, compare with base branch + echo "paths=$( + git diff --name-only origin/${{ github.base_ref }} | + grep -E '^apps/opik-documentation/documentation/docs/.*\.(md|mdx)$' | + sed 's|apps/opik-documentation/documentation/||' | + jq -R -s -c 'split("\n")[:-1]' + )" >> $GITHUB_OUTPUT + else + # For manual runs and scheduled runs, check all files + echo "paths=$( + ( + ls -d docs/*/ 2>/dev/null; + find docs -maxdepth 1 -type f -name "*.md" -o -name "*.mdx" + ) | jq -R -s -c 'split("\n")[:-1]' + )" >> $GITHUB_OUTPUT + fi + + test: + needs: collect_test_paths + runs-on: ubuntu-latest + env: + OPENAI_API_KEY: ${{ secrets.DOCS_OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPIK_WORKSPACE: ${{ secrets.COMET_WORKSPACE }} + OPIK_API_KEY: ${{ secrets.COMET_API_KEY }} + strategy: + matrix: + path: ${{ fromJson(needs.collect_test_paths.outputs.test_paths) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + working-directory: apps/opik-documentation/documentation + run: | + python -m pip install --upgrade pip + pip install pytest + pip install -r requirements.txt + if [ "${{ github.event.inputs.install_opik }}" = "true" ]; then + pip install -e . + fi + + - name: Run tests + working-directory: apps/opik-documentation/documentation + run: | + if [ -n "${{ matrix.path }}" ]; then + pytest ${{ matrix.path }} -v --suppress-no-test-exit-code + fi diff --git a/.github/workflows/documentation_cookbook_tests.yml b/.github/workflows/documentation_cookbook_tests.yml index 302a467c47..6da2f444f6 100644 --- a/.github/workflows/documentation_cookbook_tests.yml +++ b/.github/workflows/documentation_cookbook_tests.yml @@ -30,6 +30,7 @@ jobs: - apps/opik-documentation/documentation/docs/cookbook/openai.ipynb - apps/opik-documentation/documentation/docs/cookbook/litellm.ipynb - apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb + - apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb env: NOTEBOOK_TO_TEST: ${{ matrix.notebooks }} steps: diff --git a/.gitignore b/.gitignore index d4500de3d1..93c4d35c8e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,9 @@ target/ **/dependency-reduced-pom.xml +# BE related +/apps/opik-backend/redoc/openapi.yaml + # FE related /apps/opik-frontend/dist /apps/opik-frontend/build diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7ba8fa6279..74486ca3b9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -283,6 +283,23 @@ Replace `{project.pom.version}` with the version of the project in the pom file. Once the backend is running, you can access the Opik API at `http://localhost:8080`. +#### Formatting the code + +Before submitting a PR, please ensure that your code is formatted correctly. +Run the following command to automatically format your code: + +```bash +mvn spotless:apply +``` + +Our CI will check that the code is formatted correctly and will fail if it is not by running the following command: + +```bash +mvn spotless:check +``` + +#### Testing the backend + Before submitting a PR, please ensure that your code passes the test suite: ```bash diff --git a/README.md b/README.md index 66d20d46be..7b9c39014b 100644 --- a/README.md +++ b/README.md @@ -127,9 +127,10 @@ The easiest way to get started is to use one of our integrations. Opik supports: | OpenAI | Log traces for all OpenAI LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/openai/?utm_source=opik&utm_medium=github&utm_content=openai_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/openai.ipynb) | | LiteLLM | Call any LLM model using the OpenAI format | [Documentation](/tracing/integrations/litellm.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/litellm.ipynb) | | LangChain | Log traces for all LangChain LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/langchain/?utm_source=opik&utm_medium=github&utm_content=langchain_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/langchain.ipynb) | -| Haystack | Log traces for all Haystack calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/haystack/?utm_source=opik&utm_medium=github&utm_content=haystack_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/haystack.ipynb) | -| Bedrock | Log traces for all Bedrock LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/bedrock?utm_source=opik&utm_medium=github&utm_content=bedrock_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/bedrock.ipynb) | +| Haystack | Log traces for all Haystack calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/haystack/?utm_source=opik&utm_medium=github&utm_content=haystack_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/haystack.ipynb) | | Anthropic | Log traces for all Anthropic LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/anthropic?utm_source=opik&utm_medium=github&utm_content=anthropic_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/anthropic.ipynb) | +| Bedrock | Log traces for all Bedrock LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/bedrock?utm_source=opik&utm_medium=github&utm_content=bedrock_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/bedrock.ipynb) | +| DSPy | Log traces for all DSPy runs | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/dspy?utm_source=opik&utm_medium=github&utm_content=dspy_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb) | | Gemini | Log traces for all Gemini LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/gemini?utm_source=opik&utm_medium=github&utm_content=gemini_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/gemini.ipynb) | | Groq | Log traces for all Groq LLM calls | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/groq?utm_source=opik&utm_medium=github&utm_content=groq_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/groq.ipynb) | | LangGraph | Log traces for all LangGraph executions | [Documentation](https://www.comet.com/docs/opik/tracing/integrations/langgraph/?utm_source=opik&utm_medium=github&utm_content=langchain_link&utm_campaign=opik) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/langgraph.ipynb) | diff --git a/apps/opik-backend/config.yml b/apps/opik-backend/config.yml index abfa13ed74..c4ed04eb61 100644 --- a/apps/opik-backend/config.yml +++ b/apps/opik-backend/config.yml @@ -224,8 +224,22 @@ llmProviderClient: # Default: 60s # Description: Write timeout for LLM providers writeTimeout: ${LLM_PROVIDER_CLIENT_WRITE_TIMEOUT:-60s} + # Default: false + # Description: Whether or not to log requests + logRequests: ${LLM_PROVIDER_CLIENT_LOG_REQUESTS:-false} + # Default: false + # Description: Whether or not to log responses + logResponses: ${LLM_PROVIDER_CLIENT_LOG_RESPONSES:-false} # Configuration for OpenAI client openAiClient: # Default: # Description: OpenAI API URL url: ${LLM_PROVIDER_OPENAI_URL:-} + # Configuration for Anthropic client + anthropicClient: + # Default: https://api.anthropic.com/v1/ + # Description: Anthropic API URL + url: ${LLM_PROVIDER_ANTHROPIC_URL:-https://api.anthropic.com/v1/} + # Default: 2023-06-01 + # Description: Anthropic API version https://docs.anthropic.com/en/api/versioning + version: ${LLM_PROVIDER_ANTHROPIC_VERSION:-'2023-06-01'} diff --git a/apps/opik-backend/pom.xml b/apps/opik-backend/pom.xml index 0f5ea17287..b39e700a90 100644 --- a/apps/opik-backend/pom.xml +++ b/apps/opik-backend/pom.xml @@ -32,6 +32,7 @@ 3.41.0 2.10.0 2.29.9 + 2.9.0 com.comet.opik.OpikApplication @@ -207,10 +208,19 @@ java-uuid-generator ${uuid.java.generator.version} + + com.jayway.jsonpath + json-path + ${json-path.version} + dev.langchain4j langchain4j-open-ai + + dev.langchain4j + langchain4j-anthropic + diff --git a/apps/opik-backend/src/main/java/com/comet/opik/OpikApplication.java b/apps/opik-backend/src/main/java/com/comet/opik/OpikApplication.java index c85c0dbdfd..3c025b9d02 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/OpikApplication.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/OpikApplication.java @@ -82,6 +82,7 @@ public void initialize(Bootstrap bootstrap) { @Override public void run(OpikConfiguration configuration, Environment environment) { EncryptionUtils.setConfig(configuration); + // Resources var jersey = environment.jersey(); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluator.java b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluator.java index 5f3f90ed09..7070f769da 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluator.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluator.java @@ -3,7 +3,6 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.annotation.JsonView; -import com.fasterxml.jackson.databind.JsonNode; import io.swagger.v3.oas.annotations.media.DiscriminatorMapping; import io.swagger.v3.oas.annotations.media.Schema; import jakarta.validation.constraints.NotBlank; @@ -11,11 +10,8 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.ToString; import lombok.experimental.SuperBuilder; -import java.beans.ConstructorProperties; import java.time.Instant; import java.util.List; import java.util.UUID; @@ -24,44 +20,23 @@ @SuperBuilder(toBuilder = true) @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type", visible = true) @JsonSubTypes({ - @JsonSubTypes.Type(value = AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class, name = "llm_as_judge") + @JsonSubTypes.Type(value = AutomationRuleEvaluatorLlmAsJudge.class, name = "llm_as_judge") }) @Schema(name = "AutomationRuleEvaluator", discriminatorProperty = "type", discriminatorMapping = { - @DiscriminatorMapping(value = "llm_as_judge", schema = AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class) + @DiscriminatorMapping(value = "llm_as_judge", schema = AutomationRuleEvaluatorLlmAsJudge.class) }) @AllArgsConstructor -public abstract sealed class AutomationRuleEvaluator implements AutomationRule { - - @EqualsAndHashCode(callSuper = true) - @Data - @SuperBuilder(toBuilder = true) - @ToString(callSuper = true) - public static final class AutomationRuleEvaluatorLlmAsJudge extends AutomationRuleEvaluator { - - @NotNull @JsonView({View.Public.class, View.Write.class}) - @Schema(accessMode = Schema.AccessMode.READ_WRITE) - JsonNode code; - - @ConstructorProperties({"id", "projectId", "name", "samplingRate", "code", "createdAt", "createdBy", "lastUpdatedAt", "lastUpdatedBy"}) - public AutomationRuleEvaluatorLlmAsJudge(UUID id, UUID projectId, @NotBlank String name, float samplingRate, @NotNull JsonNode code, - Instant createdAt, String createdBy, Instant lastUpdatedAt, String lastUpdatedBy) { - super(id, projectId, name, samplingRate, createdAt, createdBy, lastUpdatedAt, lastUpdatedBy); - this.code = code; - } - - @Override - public AutomationRuleEvaluatorType type() { - return AutomationRuleEvaluatorType.LLM_AS_JUDGE; - } - } +public abstract sealed class AutomationRuleEvaluator + implements + AutomationRule + permits AutomationRuleEvaluatorLlmAsJudge { @JsonView({View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) UUID id; @JsonView({View.Public.class, View.Write.class}) - @NotNull - UUID projectId; + @NotNull UUID projectId; @JsonView({View.Public.class, View.Write.class}) @Schema(accessMode = Schema.AccessMode.READ_WRITE) @@ -100,17 +75,21 @@ public AutomationRuleAction getAction() { } public static class View { - public static class Write {} - public static class Public {} + public static class Write { + } + public static class Public { + } } @Builder(toBuilder = true) public record AutomationRuleEvaluatorPage( - @JsonView({View.Public.class}) int page, + @JsonView( { + View.Public.class}) int page, @JsonView({View.Public.class}) int size, @JsonView({View.Public.class}) long total, @JsonView({View.Public.class}) List content) - implements Page{ + implements + Page{ public static AutomationRuleEvaluator.AutomationRuleEvaluatorPage empty(int page) { return new AutomationRuleEvaluator.AutomationRuleEvaluatorPage(page, 0, 0, List.of()); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorCriteria.java b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorCriteria.java new file mode 100644 index 0000000000..178450d4dd --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorCriteria.java @@ -0,0 +1,19 @@ +package com.comet.opik.api; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.Builder; + +import java.util.Set; +import java.util.UUID; + +@Builder(toBuilder = true) +@JsonIgnoreProperties(ignoreUnknown = true) +public record AutomationRuleEvaluatorCriteria( + AutomationRuleEvaluatorType type, + String name, + Set ids) { + + public AutomationRule.AutomationRuleAction action() { + return AutomationRule.AutomationRuleAction.EVALUATOR; + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorLlmAsJudge.java b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorLlmAsJudge.java new file mode 100644 index 0000000000..ee5d87640e --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorLlmAsJudge.java @@ -0,0 +1,81 @@ +package com.comet.opik.api; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonView; +import dev.langchain4j.data.message.ChatMessageType; +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; +import lombok.experimental.SuperBuilder; + +import java.beans.ConstructorProperties; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +@EqualsAndHashCode(callSuper = true) +@Data +@SuperBuilder(toBuilder = true) +@ToString(callSuper = true) +public final class AutomationRuleEvaluatorLlmAsJudge + extends + AutomationRuleEvaluator { + + @NotNull @JsonView({View.Public.class, View.Write.class}) + @Schema(accessMode = Schema.AccessMode.READ_WRITE) + private LlmAsJudgeCode code; + + @Builder(toBuilder = true) + @JsonIgnoreProperties(ignoreUnknown = true) + public record LlmAsJudgeCode( + @JsonView( { + View.Public.class, View.Write.class}) @NotNull LlmAsJudgeModelParameters model, + @JsonView({View.Public.class, View.Write.class}) @NotNull List messages, + @JsonView({View.Public.class, View.Write.class}) @NotNull Map variables, + @JsonView({View.Public.class, View.Write.class}) @NotNull List schema){ + } + + @Builder(toBuilder = true) + @JsonIgnoreProperties(ignoreUnknown = true) + public record LlmAsJudgeMessage( + @JsonView( { + View.Public.class, View.Write.class}) @NotNull ChatMessageType role, + @JsonView({View.Public.class, View.Write.class}) @NotNull String content){ + } + + @Builder(toBuilder = true) + @JsonIgnoreProperties(ignoreUnknown = true) + public record LlmAsJudgeOutputSchema( + @JsonView( { + View.Public.class, View.Write.class}) @NotNull String name, + @JsonView({View.Public.class, View.Write.class}) @NotNull LlmAsJudgeOutputSchemaType type, + @JsonView({View.Public.class, View.Write.class}) @NotNull String description){ + } + + @Builder(toBuilder = true) + @JsonIgnoreProperties(ignoreUnknown = true) + public record LlmAsJudgeModelParameters( + @JsonView( { + View.Public.class, View.Write.class}) @NotNull String name, + @JsonView({View.Public.class, View.Write.class}) @NotNull Double temperature){ + } + + @ConstructorProperties({"id", "projectId", "name", "samplingRate", "code", "createdAt", "createdBy", + "lastUpdatedAt", "lastUpdatedBy"}) + public AutomationRuleEvaluatorLlmAsJudge(UUID id, UUID projectId, @NotBlank String name, Float samplingRate, + @NotNull LlmAsJudgeCode code, + Instant createdAt, String createdBy, Instant lastUpdatedAt, String lastUpdatedBy) { + super(id, projectId, name, samplingRate, createdAt, createdBy, lastUpdatedAt, lastUpdatedBy); + this.code = code; + } + + @Override + public AutomationRuleEvaluatorType type() { + return AutomationRuleEvaluatorType.LLM_AS_JUDGE; + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorUpdate.java b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorUpdate.java index b81e16d08b..e1eafb8e13 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorUpdate.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/AutomationRuleEvaluatorUpdate.java @@ -1,7 +1,6 @@ package com.comet.opik.api; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; import jakarta.validation.constraints.NotNull; @@ -12,6 +11,6 @@ @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) public record AutomationRuleEvaluatorUpdate( @NotNull String name, - @NotNull JsonNode code, + @NotNull AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode code, @NotNull Float samplingRate) { } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemSearchCriteria.java b/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemSearchCriteria.java new file mode 100644 index 0000000000..eee0b1fc6f --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemSearchCriteria.java @@ -0,0 +1,13 @@ +package com.comet.opik.api; + +import lombok.Builder; + +import java.util.UUID; + +@Builder(toBuilder = true) +public record ExperimentItemSearchCriteria( + String experimentName, + Integer limit, + UUID lastRetrievedId, + boolean truncate) { +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemStreamRequest.java b/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemStreamRequest.java index 97fc11bcc2..6150d5f142 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemStreamRequest.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/ExperimentItemStreamRequest.java @@ -3,9 +3,11 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; +import io.swagger.v3.oas.annotations.media.Schema; import jakarta.validation.constraints.Max; import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotBlank; +import jakarta.ws.rs.DefaultValue; import lombok.Builder; import java.util.UUID; @@ -16,7 +18,8 @@ public record ExperimentItemStreamRequest( @NotBlank String experimentName, @Min(1) @Max(2000) Integer limit, - UUID lastRetrievedId) { + UUID lastRetrievedId, + @Schema(description = "Truncate image included in either input, output or metadata", defaultValue = "true") @DefaultValue("true") boolean truncate) { @Override public Integer limit() { diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/LlmAsJudgeOutputSchemaType.java b/apps/opik-backend/src/main/java/com/comet/opik/api/LlmAsJudgeOutputSchemaType.java new file mode 100644 index 0000000000..0dc7544eff --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/LlmAsJudgeOutputSchemaType.java @@ -0,0 +1,7 @@ +package com.comet.opik.api; + +public enum LlmAsJudgeOutputSchemaType { + BOOLEAN, + INTEGER, + DOUBLE +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/LlmProvider.java b/apps/opik-backend/src/main/java/com/comet/opik/api/LlmProvider.java index 12d7cd45ed..36de62b825 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/LlmProvider.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/LlmProvider.java @@ -10,8 +10,8 @@ @Getter @RequiredArgsConstructor public enum LlmProvider { - - OPEN_AI("openai"); + OPEN_AI("openai"), + ANTHROPIC("anthropic"); @JsonValue private final String value; diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/events/TracesCreated.java b/apps/opik-backend/src/main/java/com/comet/opik/api/events/TracesCreated.java index b47529ae6d..3e94bee9a4 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/events/TracesCreated.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/events/TracesCreated.java @@ -1,20 +1,29 @@ package com.comet.opik.api.events; +import com.comet.opik.api.Trace; import com.comet.opik.infrastructure.events.BaseEvent; import lombok.Getter; import lombok.NonNull; import lombok.experimental.Accessors; +import java.util.List; import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; @Getter @Accessors(fluent = true) public class TracesCreated extends BaseEvent { - private final @NonNull Set projectIds; + private final @NonNull List traces; - public TracesCreated(@NonNull Set projectIds, @NonNull String workspaceId, @NonNull String userName) { + public TracesCreated(@NonNull List traces, @NonNull String workspaceId, @NonNull String userName) { super(workspaceId, userName); - this.projectIds = projectIds; + this.traces = traces; + } + + public Set projectIds() { + return traces.stream() + .map(Trace::projectId) + .collect(Collectors.toSet()); } } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRender.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRender.java new file mode 100644 index 0000000000..91c49c486d --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRender.java @@ -0,0 +1,136 @@ +package com.comet.opik.api.resources.v1.events; + +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; +import com.comet.opik.api.Trace; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.JsonPath; +import dev.ai4j.openai4j.chat.Message; +import dev.ai4j.openai4j.chat.SystemMessage; +import dev.ai4j.openai4j.chat.UserMessage; +import lombok.Builder; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.text.StringSubstitutor; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +@UtilityClass +@Slf4j +class LlmAsJudgeMessageRender { + + /** + * Render the rule evaluator message template using the values from an actual trace. + * + * As the rule my consist in multiple messages, we check each one of them for variables to fill. + * Then we go through every variable template to replace them for the value from the trace. + * + * @param trace the trace with value to use to replace template variables + * @param evaluatorCode the evaluator + * @return a list of AI messages, with templates rendered + */ + public static List renderMessages(Trace trace, + AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode evaluatorCode) { + // prepare the map of replacements to use in all messages + var parsedVariables = variableMapping(evaluatorCode.variables()); + + // extract the actual value from the Trace + var replacements = parsedVariables.stream().map(mapper -> { + var traceSection = switch (mapper.traceSection) { + case INPUT -> trace.input(); + case OUTPUT -> trace.output(); + case METADATA -> trace.metadata(); + }; + + return mapper.toBuilder() + .valueToReplace(extractFromJson(traceSection, mapper.jsonPath())) + .build(); + }) + .filter(mapper -> mapper.valueToReplace() != null) + .collect( + Collectors.toMap(LlmAsJudgeMessageRender.MessageVariableMapping::variableName, + LlmAsJudgeMessageRender.MessageVariableMapping::valueToReplace)); + + // will convert all '{{key}}' into 'value' + // TODO: replace with Mustache Java to be in confirm with FE + var templateRenderer = new StringSubstitutor(replacements, "{{", "}}"); + + // render the message templates from evaluator rule + return evaluatorCode.messages().stream() + .map(templateMessage -> { + var renderedMessage = templateRenderer.replace(templateMessage.content()); + + return switch (templateMessage.role()) { + case USER -> UserMessage.from(renderedMessage); + case SYSTEM -> SystemMessage.from(renderedMessage); + default -> { + log.info("No mapping for message role type {}", templateMessage.role()); + yield null; + } + }; + }) + .filter(Objects::nonNull) + .toList(); + } + + /** + * Parse evaluator\'s variable mapper into an usable list of + * + * @param evaluatorVariables a map with variables and a path into a trace input/output/metadata to replace + * @return a parsed list of mappings, easier to use for the template rendering + */ + public static List variableMapping(Map evaluatorVariables) { + return evaluatorVariables.entrySet().stream() + .map(mapper -> { + var templateVariable = mapper.getKey(); + var tracePath = mapper.getValue(); + + var builder = MessageVariableMapping.builder().variableName(templateVariable); + + if (tracePath.startsWith("input.")) { + builder.traceSection(TraceSection.INPUT) + .jsonPath("$" + tracePath.substring("input".length())); + } else if (tracePath.startsWith("output.")) { + builder.traceSection(TraceSection.OUTPUT) + .jsonPath("$" + tracePath.substring("output".length())); + } else if (tracePath.startsWith("metadata.")) { + builder.traceSection(TraceSection.METADATA) + .jsonPath("$" + tracePath.substring("metadata".length())); + } else { + log.info("Couldn't map trace path '{}' into a input/output/metadata path", tracePath); + return null; + } + + return builder.build(); + }) + .filter(Objects::nonNull) + .toList(); + } + + final ObjectMapper objectMapper = new ObjectMapper(); + + String extractFromJson(JsonNode json, String path) { + try { + // JsonPath didnt work with JsonNode, even explicitly using JacksonJsonProvider, so we convert to a Map + var forcedObject = objectMapper.convertValue(json, Map.class); + return JsonPath.parse(forcedObject).read(path); + } catch (Exception e) { + log.debug("Couldn't find path '{}' inside json {}: {}", path, json, e.getMessage()); + return null; + } + } + + public enum TraceSection { + INPUT, + OUTPUT, + METADATA + } + + @Builder(toBuilder = true) + public record MessageVariableMapping(TraceSection traceSection, String variableName, String jsonPath, + String valueToReplace) { + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListener.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListener.java new file mode 100644 index 0000000000..0fa042b77c --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListener.java @@ -0,0 +1,101 @@ +package com.comet.opik.api.resources.v1.events; + +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; +import com.comet.opik.api.AutomationRuleEvaluatorType; +import com.comet.opik.api.Trace; +import com.comet.opik.api.events.TracesCreated; +import com.comet.opik.domain.AutomationRuleEvaluatorService; +import com.comet.opik.domain.ChatCompletionService; +import com.comet.opik.domain.FeedbackScoreService; +import com.google.common.eventbus.EventBus; +import com.google.common.eventbus.Subscribe; +import dev.ai4j.openai4j.chat.ChatCompletionRequest; +import jakarta.inject.Inject; +import lombok.extern.slf4j.Slf4j; +import ru.vyarus.dropwizard.guice.module.installer.feature.eager.EagerSingleton; + +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.UUID; +import java.util.stream.Collectors; + +@EagerSingleton +@Slf4j +public class OnlineScoringEventListener { + + private final AutomationRuleEvaluatorService ruleEvaluatorService; + private final ChatCompletionService aiProxyService; + private final FeedbackScoreService feedbackScoreService; + + @Inject + public OnlineScoringEventListener(EventBus eventBus, + AutomationRuleEvaluatorService ruleEvaluatorService, + ChatCompletionService aiProxyService, + FeedbackScoreService feedbackScoreService) { + this.ruleEvaluatorService = ruleEvaluatorService; + this.aiProxyService = aiProxyService; + this.feedbackScoreService = feedbackScoreService; + eventBus.register(this); + } + + /** + * Listen for trace batches to check for existent Automation Rules to score them. + * + * Automation Rule registers the percentage of traces to score, how to score them and so on. + * + * @param tracesBatch a traces batch with workspaceId and userName + */ + @Subscribe + public void onTracesCreated(TracesCreated tracesBatch) { + log.debug(tracesBatch.traces().toString()); + + Map> tracesByProject = tracesBatch.traces().stream() + .collect(Collectors.groupingBy(Trace::projectId)); + + Map countMap = tracesByProject.entrySet().stream() + .collect(Collectors.toMap(entry -> "projectId: " + entry.getKey(), + entry -> entry.getValue().size())); + + log.debug("[OnlineScoring] Received traces for workspace '{}': {}", tracesBatch.workspaceId(), countMap); + + Random random = new Random(System.currentTimeMillis()); + + // fetch automation rules per project + tracesByProject.forEach((projectId, traces) -> { + log.debug("[OnlineScoring] Fetching evaluators for {} traces, project '{}' on workspace '{}'", + traces.size(), projectId, tracesBatch.workspaceId()); + List evaluators = ruleEvaluatorService.findAll( + projectId, tracesBatch.workspaceId(), AutomationRuleEvaluatorType.LLM_AS_JUDGE); + log.info("[OnlineScoring] Found {} evaluators for project '{}' on workspace '{}'", evaluators.size(), + projectId, tracesBatch.workspaceId()); + + // for each rule, sample traces and score them + evaluators.forEach(evaluator -> traces.stream() + .filter(e -> random.nextFloat() < evaluator.getSamplingRate()) + .forEach(trace -> score(trace, tracesBatch.workspaceId(), evaluator))); + }); + } + + /** + * Use AI Proxy to score the trace and store it as a FeedbackScore. + * If the evaluator has multiple score definitions, it calls the LLM once per score definition. + * + * @param trace the trace to score + * @param workspaceId the workspace the trace belongs + * @param evaluator the automation rule to score the trace + */ + private void score(Trace trace, String workspaceId, AutomationRuleEvaluatorLlmAsJudge evaluator) { + // TODO prepare base request + var baseRequestBuilder = ChatCompletionRequest.builder() + .model(evaluator.getCode().model().name()) + .temperature(evaluator.getCode().model().temperature()) + .messages(LlmAsJudgeMessageRender.renderMessages(trace, evaluator.getCode())) + .build(); + + // TODO: call AI Proxy and parse response into 1+ FeedbackScore + + // TODO: store FeedbackScores + } + +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResource.java index b2a5e81493..41c70c97e5 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResource.java @@ -2,6 +2,7 @@ import com.codahale.metrics.annotation.Timed; import com.comet.opik.api.AutomationRuleEvaluator; +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; import com.comet.opik.api.AutomationRuleEvaluatorUpdate; import com.comet.opik.api.BatchDelete; import com.comet.opik.api.Page; @@ -59,14 +60,14 @@ public class AutomationRuleEvaluatorsResource { }) @JsonView(AutomationRuleEvaluator.View.Public.class) public Response find(@PathParam("projectId") UUID projectId, - @QueryParam("name") String name, - @QueryParam("page") @Min(1) @DefaultValue("1") int page, - @QueryParam("size") @Min(1) @DefaultValue("10") int size) { + @QueryParam("name") String name, + @QueryParam("page") @Min(1) @DefaultValue("1") int page, + @QueryParam("size") @Min(1) @DefaultValue("10") int size) { String workspaceId = requestContext.get().getWorkspaceId(); log.info("Looking for automated evaluators for project id '{}' on workspaceId '{}' (page {})", projectId, workspaceId, page); - Page definitionPage = service.find(projectId, workspaceId, name, page, size); + Page definitionPage = service.find(projectId, workspaceId, name, page, size); log.info("Found {} automated evaluators for project id '{}' on workspaceId '{}' (page {}, total {})", definitionPage.size(), projectId, workspaceId, page, definitionPage.total()); @@ -99,8 +100,7 @@ public Response getEvaluator(@PathParam("projectId") UUID projectId, @PathParam( }) @RateLimited public Response createEvaluator( - @RequestBody(content = @Content(schema = @Schema(implementation = AutomationRuleEvaluator.class))) - @JsonView(AutomationRuleEvaluator.View.Write.class) @NotNull @Valid AutomationRuleEvaluator> evaluator, + @RequestBody(content = @Content(schema = @Schema(implementation = AutomationRuleEvaluator.class))) @JsonView(AutomationRuleEvaluator.View.Write.class) @NotNull @Valid AutomationRuleEvaluator> evaluator, @Context UriInfo uriInfo) { String workspaceId = requestContext.get().getWorkspaceId(); @@ -148,12 +148,15 @@ public Response updateEvaluator(@PathParam("id") UUID id, @ApiResponse(responseCode = "204", description = "No Content"), }) public Response deleteEvaluators( - @NotNull @RequestBody(content = @Content(schema = @Schema(implementation = BatchDelete.class))) @Valid BatchDelete batchDelete, @PathParam("projectId") UUID projectId) { + @NotNull @RequestBody(content = @Content(schema = @Schema(implementation = BatchDelete.class))) @Valid BatchDelete batchDelete, + @PathParam("projectId") UUID projectId) { String workspaceId = requestContext.get().getWorkspaceId(); - log.info("Deleting automation rule evaluators by ids, count '{}', on workspace_id '{}'", batchDelete.ids().size(), + log.info("Deleting automation rule evaluators by ids, count '{}', on workspace_id '{}'", + batchDelete.ids().size(), workspaceId); service.delete(batchDelete.ids(), projectId, workspaceId); - log.info("Deleted automation rule evaluators by ids, count '{}', on workspace_id '{}'", batchDelete.ids().size(), + log.info("Deleted automation rule evaluators by ids, count '{}', on workspace_id '{}'", + batchDelete.ids().size(), workspaceId); return Response.noContent().build(); } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResource.java index 55aecc46dd..e9721a8ce2 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResource.java @@ -3,6 +3,7 @@ import com.codahale.metrics.annotation.Timed; import com.comet.opik.domain.ChatCompletionService; import com.comet.opik.infrastructure.auth.RequestContext; +import com.comet.opik.utils.ChunkedOutputHandlers; import dev.ai4j.openai4j.chat.ChatCompletionRequest; import dev.ai4j.openai4j.chat.ChatCompletionResponse; import io.dropwizard.jersey.errors.ErrorMessage; @@ -26,6 +27,7 @@ import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.glassfish.jersey.server.ChunkedOutput; @Path("/v1/private/chat/completions") @Produces(MediaType.APPLICATION_JSON) @@ -57,7 +59,10 @@ public Response create( log.info("Creating and streaming chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); type = MediaType.SERVER_SENT_EVENTS; - entity = chatCompletionService.createAndStreamResponse(request, workspaceId); + var chunkedOutput = new ChunkedOutput(String.class, "\r\n"); + chatCompletionService.createAndStreamResponse(request, workspaceId, + new ChunkedOutputHandlers(chunkedOutput)); + entity = chunkedOutput; } else { log.info("Creating chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); type = MediaType.APPLICATION_JSON; diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/DatasetsResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/DatasetsResource.java index 30e5b2c5db..35e148f2b8 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/DatasetsResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/DatasetsResource.java @@ -272,7 +272,7 @@ public Response getDatasetItems( @PathParam("id") UUID id, @QueryParam("page") @Min(1) @DefaultValue("1") int page, @QueryParam("size") @Min(1) @DefaultValue("10") int size, - @QueryParam("truncate") boolean truncate) { + @QueryParam("truncate") @Schema(description = "Truncate image included in either input, output or metadata") boolean truncate) { String workspaceId = requestContext.get().getWorkspaceId(); log.info("Finding dataset items by id '{}', page '{}', size '{} on workspace_id '{}''", id, page, size, @@ -373,7 +373,7 @@ public Response findDatasetItemsWithExperimentItems( @QueryParam("size") @Min(1) @DefaultValue("10") int size, @QueryParam("experiment_ids") @NotNull @NotBlank String experimentIdsQueryParam, @QueryParam("filters") String filters, - @QueryParam("truncate") boolean truncate) { + @QueryParam("truncate") @Schema(description = "Truncate image included in either input, output or metadata") boolean truncate) { var experimentIds = IdParamsValidator.getIds(experimentIdsQueryParam); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ExperimentsResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ExperimentsResource.java index f7121d0823..10ed2fdcc7 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ExperimentsResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/ExperimentsResource.java @@ -3,6 +3,7 @@ import com.codahale.metrics.annotation.Timed; import com.comet.opik.api.Experiment; import com.comet.opik.api.ExperimentItem; +import com.comet.opik.api.ExperimentItemSearchCriteria; import com.comet.opik.api.ExperimentItemStreamRequest; import com.comet.opik.api.ExperimentItemsBatch; import com.comet.opik.api.ExperimentItemsDelete; @@ -220,7 +221,13 @@ public ChunkedOutput streamExperimentItems( var userName = requestContext.get().getUserName(); var workspaceName = requestContext.get().getWorkspaceName(); log.info("Streaming experiment items by '{}', workspaceId '{}'", request, workspaceId); - var items = experimentItemService.getExperimentItems(request) + var criteria = ExperimentItemSearchCriteria.builder() + .experimentName(request.experimentName()) + .limit(request.limit()) + .lastRetrievedId(request.lastRetrievedId()) + .truncate(request.truncate()) + .build(); + var items = experimentItemService.getExperimentItems(criteria) .contextWrite(ctx -> ctx.put(RequestContext.USER_NAME, userName) .put(RequestContext.WORKSPACE_NAME, workspaceName) .put(RequestContext.WORKSPACE_ID, workspaceId)); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/SpansResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/SpansResource.java index 7c9ab3a947..04095cfb74 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/SpansResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/SpansResource.java @@ -87,7 +87,7 @@ public Response getSpansByProject( @QueryParam("trace_id") UUID traceId, @QueryParam("type") SpanType type, @QueryParam("filters") String filters, - @QueryParam("truncate") boolean truncate) { + @QueryParam("truncate") @Schema(description = "Truncate image included in either input, output or metadata") boolean truncate) { validateProjectNameAndProjectId(projectName, projectId); var spanFilters = filtersFactory.newFilters(filters, SpanFilter.LIST_TYPE_REFERENCE); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/TracesResource.java b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/TracesResource.java index 5150619935..6632b6e612 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/TracesResource.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/resources/v1/priv/TracesResource.java @@ -84,7 +84,7 @@ public Response getTracesByProject( @QueryParam("project_name") String projectName, @QueryParam("project_id") UUID projectId, @QueryParam("filters") String filters, - @QueryParam("truncate") boolean truncate) { + @QueryParam("truncate") @Schema(description = "Truncate image included in either input, output or metadata") boolean truncate) { validateProjectNameAndProjectId(projectName, projectId); var traceFilters = filtersFactory.newFilters(filters, TraceFilter.LIST_TYPE_REFERENCE); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationModelEvaluatorMapper.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationModelEvaluatorMapper.java index a7ef7ebcaf..4ab5557911 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationModelEvaluatorMapper.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationModelEvaluatorMapper.java @@ -1,7 +1,8 @@ package com.comet.opik.domain; -import com.comet.opik.api.AutomationRuleEvaluator; +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; import org.mapstruct.Mapper; +import org.mapstruct.Mapping; import org.mapstruct.factory.Mappers; import java.time.Instant; @@ -11,8 +12,12 @@ interface AutomationModelEvaluatorMapper { AutomationModelEvaluatorMapper INSTANCE = Mappers.getMapper(AutomationModelEvaluatorMapper.class); - AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge map(LlmAsJudgeAutomationRuleEvaluatorModel model); + @Mapping(target = "code", expression = "java(map(model.code()))") + AutomationRuleEvaluatorLlmAsJudge map(LlmAsJudgeAutomationRuleEvaluatorModel model); - LlmAsJudgeAutomationRuleEvaluatorModel map(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge dto); + LlmAsJudgeAutomationRuleEvaluatorModel map(AutomationRuleEvaluatorLlmAsJudge dto); + AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode map(LlmAsJudgeAutomationRuleEvaluatorModel.LlmAsJudgeCode detail); + + LlmAsJudgeAutomationRuleEvaluatorModel.LlmAsJudgeCode map(AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode code); } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleDAO.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleDAO.java index 9b0dc2c8eb..aed0ce0a3b 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleDAO.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleDAO.java @@ -23,8 +23,8 @@ @RegisterConstructorMapper(AutomationRuleEvaluator.class) interface AutomationRuleDAO { - @SqlUpdate("INSERT INTO automation_rules(id, project_id, workspace_id, `action`, name, sampling_rate) "+ - "VALUES (:rule.id, :rule.projectId, :workspaceId, :rule.action, :rule.name, :rule.samplingRate)") + @SqlUpdate("INSERT INTO automation_rules(id, project_id, workspace_id, `action`, name, sampling_rate) " + + "VALUES (:rule.id, :rule.projectId, :workspaceId, :rule.action, :rule.name, :rule.samplingRate)") void saveBaseRule(@BindMethods("rule") AutomationRuleModel rule, @Bind("workspaceId") String workspaceId); @SqlUpdate(""" @@ -34,32 +34,33 @@ interface AutomationRuleDAO { WHERE id = :id AND project_id = :projectId AND workspace_id = :workspaceId """) int updateBaseRule(@Bind("id") UUID id, - @Bind("projectId") UUID projectId, - @Bind("workspaceId") String workspaceId, - @Bind("name") String name, - @Bind("samplingRate") float samplingRate, - @Bind("lastUpdatedBy") String lastUpdatedBy); + @Bind("projectId") UUID projectId, + @Bind("workspaceId") String workspaceId, + @Bind("name") String name, + @Bind("samplingRate") float samplingRate, + @Bind("lastUpdatedBy") String lastUpdatedBy); @SqlUpdate(""" - DELETE FROM automation_rules - WHERE project_id = :projectId AND workspace_id = :workspaceId - AND id IN () - """) + DELETE FROM automation_rules + WHERE project_id = :projectId AND workspace_id = :workspaceId + AND id IN () + """) @UseStringTemplateEngine @AllowUnusedBindings - void deleteBaseRules(@Define("ids") @BindList(onEmpty = BindList.EmptyHandling.NULL_VALUE, value = "ids") Set ids, - @Bind("projectId") UUID projectId, - @Bind("workspaceId") String workspaceId); + void deleteBaseRules( + @Define("ids") @BindList(onEmpty = BindList.EmptyHandling.NULL_VALUE, value = "ids") Set ids, + @Bind("projectId") UUID projectId, + @Bind("workspaceId") String workspaceId); @SqlQuery(""" - SELECT COUNT(*) - FROM automation_rules - WHERE project_id = :projectId AND workspace_id = :workspaceId - AND `action` = :action - """) + SELECT COUNT(*) + FROM automation_rules + WHERE project_id = :projectId AND workspace_id = :workspaceId + AND `action` = :action + """) @UseStringTemplateEngine @AllowUnusedBindings long findCount(@Bind("projectId") UUID projectId, - @Bind("workspaceId") String workspaceId, - @Define("action") @Bind("action") AutomationRule.AutomationRuleAction action); + @Bind("workspaceId") String workspaceId, + @Define("action") @Bind("action") AutomationRule.AutomationRuleAction action); } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorDAO.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorDAO.java index 3cc439fb6b..40c83249bf 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorDAO.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorDAO.java @@ -1,7 +1,8 @@ package com.comet.opik.domain; import com.comet.opik.api.AutomationRule; -import com.comet.opik.api.AutomationRuleEvaluatorUpdate; +import com.comet.opik.api.AutomationRuleEvaluatorCriteria; +import com.comet.opik.api.AutomationRuleEvaluatorType; import com.comet.opik.infrastructure.db.JsonNodeArgumentFactory; import com.comet.opik.infrastructure.db.UUIDArgumentFactory; import org.jdbi.v3.sqlobject.config.RegisterArgumentFactory; @@ -26,17 +27,17 @@ @RegisterRowMapper(AutomationRuleEvaluatorRowMapper.class) public interface AutomationRuleEvaluatorDAO extends AutomationRuleDAO { - @SqlUpdate("INSERT INTO automation_rule_evaluators(id, `type`, code, created_by, last_updated_by) "+ - "VALUES (:rule.id, :rule.type, :rule.code, :rule.createdBy, :rule.lastUpdatedBy)") + @SqlUpdate("INSERT INTO automation_rule_evaluators(id, `type`, code, created_by, last_updated_by) " + + "VALUES (:rule.id, :rule.type, :rule.code, :rule.createdBy, :rule.lastUpdatedBy)") void saveEvaluator(@BindMethods("rule") AutomationRuleEvaluatorModel rule); @SqlUpdate(""" UPDATE automation_rule_evaluators SET code = :rule.code, - last_updated_by = :userName + last_updated_by = :rule.lastUpdatedBy WHERE id = :id """) - int updateEvaluator(@Bind("id") UUID id, @BindMethods("rule") AutomationRuleEvaluatorUpdate ruleUpdate, @Bind("userName") String userName); + int updateEvaluator(@Bind("id") UUID id, @BindMethods("rule") AutomationRuleEvaluatorModel rule); @SqlQuery(""" SELECT rule.id, rule.project_id, rule.action, rule.name, rule.sampling_rate, evaluator.type, evaluator.code, @@ -45,33 +46,48 @@ public interface AutomationRuleEvaluatorDAO extends AutomationRuleDAO { JOIN automation_rule_evaluators evaluator ON rule.id = evaluator.id WHERE workspace_id = :workspaceId AND project_id = :projectId - AND `action` = :action + AND rule.action = :action + AND evaluator.type = :type AND rule.id IN () - AND name like concat('%', :name, '%') - LIMIT :limit OFFSET :offset + AND rule.name like concat('%', :name, '%') + LIMIT :limit + OFFSET :offset """) @UseStringTemplateEngine @AllowUnusedBindings List> find(@Bind("workspaceId") String workspaceId, - @Bind("projectId") UUID projectId, - @Define("ids") @BindList(onEmpty = BindList.EmptyHandling.NULL_VALUE, value = "ids") Set ids, - @Define("name") @Bind("name") String name, - @Bind("action") AutomationRule.AutomationRuleAction action, - @Bind("offset") int offset, - @Bind("limit") int limit); + @Bind("projectId") UUID projectId, + @Bind("action") AutomationRule.AutomationRuleAction action, + @Define("type") @Bind("type") AutomationRuleEvaluatorType type, + @Define("ids") @BindList(onEmpty = BindList.EmptyHandling.NULL_VALUE, value = "ids") Set ids, + @Define("name") @Bind("name") String name, + @Define("offset") @Bind("offset") Integer offset, + @Define("limit") @Bind("limit") Integer limit); + + default List> find(String workspaceId, UUID projectId, + AutomationRuleEvaluatorCriteria criteria, Integer offset, Integer limit) { + return find(workspaceId, projectId, criteria.action(), criteria.type(), criteria.ids(), criteria.name(), offset, + limit); + } + + default List> find(String workspaceId, UUID projectId, + AutomationRuleEvaluatorCriteria criteria) { + return find(workspaceId, projectId, criteria, null, null); + } @SqlUpdate(""" - DELETE FROM automation_rule_evaluators - WHERE id IN ( - SELECT id - FROM automation_rules - WHERE workspace_id = :workspaceId AND project_id = :projectId - AND id IN () - ) - """) + DELETE FROM automation_rule_evaluators + WHERE id IN ( + SELECT id + FROM automation_rules + WHERE workspace_id = :workspaceId AND project_id = :projectId + AND id IN () + ) + """) @UseStringTemplateEngine @AllowUnusedBindings void deleteEvaluatorsByIds(@Bind("workspaceId") String workspaceId, - @Bind("projectId") UUID projectId, - @Define("ids") @BindList(onEmpty = BindList.EmptyHandling.NULL_VALUE, value = "ids") Set ids); + @Bind("projectId") UUID projectId, + @Define("ids") @BindList("ids") Set ids); + } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorModel.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorModel.java index c8bcfc56c4..a630c9699e 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorModel.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorModel.java @@ -7,7 +7,8 @@ public sealed interface AutomationRuleEvaluatorModel extends AutomationRuleModel permits LlmAsJudgeAutomationRuleEvaluatorModel { - @Json T code(); + @Json + T code(); AutomationRuleEvaluatorType type(); @Override diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorService.java index 613f04c8d3..191e9c8fd6 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/AutomationRuleEvaluatorService.java @@ -2,6 +2,9 @@ import com.comet.opik.api.AutomationRule; import com.comet.opik.api.AutomationRuleEvaluator; +import com.comet.opik.api.AutomationRuleEvaluatorCriteria; +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; +import com.comet.opik.api.AutomationRuleEvaluatorType; import com.comet.opik.api.AutomationRuleEvaluatorUpdate; import com.comet.opik.api.error.EntityAlreadyExistsException; import com.comet.opik.api.error.ErrorMessage; @@ -28,19 +31,25 @@ @ImplementedBy(AutomationRuleEvaluatorServiceImpl.class) public interface AutomationRuleEvaluatorService { - > T save(T automationRuleEvaluator, @NonNull String workspaceId, @NonNull String userName); + > T save(T automationRuleEvaluator, @NonNull String workspaceId, + @NonNull String userName); void update(@NonNull UUID id, @NonNull UUID projectId, @NonNull String workspaceId, @NonNull String userName, - AutomationRuleEvaluatorUpdate automationRuleEvaluator); + AutomationRuleEvaluatorUpdate automationRuleEvaluator); - > T findById(@NonNull UUID id, @NonNull UUID projectId, @NonNull String workspaceId); + > T findById(@NonNull UUID id, @NonNull UUID projectId, + @NonNull String workspaceId); void delete(@NonNull Set ids, @NonNull UUID projectId, @NonNull String workspaceId); - AutomationRuleEvaluator.AutomationRuleEvaluatorPage find(@NonNull UUID projectId, @NonNull String workspaceId, String name, int page, int size); -}@NonNull + AutomationRuleEvaluator.AutomationRuleEvaluatorPage find(@NonNull UUID projectId, @NonNull String workspaceId, + String name, int page, int size); -@Singleton + List findAll(@NonNull UUID projectId, @NonNull String workspaceId, + AutomationRuleEvaluatorType automationRuleEvaluatorType); +} + +@NonNull @Singleton @RequiredArgsConstructor(onConstructor_ = @Inject) @Slf4j class AutomationRuleEvaluatorServiceImpl implements AutomationRuleEvaluatorService { @@ -53,8 +62,8 @@ class AutomationRuleEvaluatorServiceImpl implements AutomationRuleEvaluatorServi @Override public > T save(T inputRuleEvaluator, - @NonNull String workspaceId, - @NonNull String userName) { + @NonNull String workspaceId, + @NonNull String userName) { UUID id = idGenerator.generateId(); IdGenerator.validateVersion(id, "AutomationRuleEvaluator"); @@ -63,12 +72,12 @@ public > T save(T inputRuleEvaluator, var evaluatorsDAO = handle.attach(AutomationRuleEvaluatorDAO.class); AutomationRuleEvaluatorModel> evaluator = switch (inputRuleEvaluator) { - case AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge llmAsJudge -> { + case AutomationRuleEvaluatorLlmAsJudge llmAsJudge -> { var definition = llmAsJudge.toBuilder() - .id(id) - .createdBy(userName) - .lastUpdatedBy(userName) - .build(); + .id(id) + .createdBy(userName) + .lastUpdatedBy(userName) + .build(); yield AutomationModelEvaluatorMapper.INSTANCE.map(definition); } @@ -95,18 +104,25 @@ public > T save(T inputRuleEvaluator, }); } - @Override public void update(@NonNull UUID id, @NonNull UUID projectId, @NonNull String workspaceId, - @NonNull String userName, @NonNull AutomationRuleEvaluatorUpdate evaluatorUpdate) { + @NonNull String userName, @NonNull AutomationRuleEvaluatorUpdate evaluatorUpdate) { - log.debug("Updating AutomationRuleEvaluator with id '{}' in projectId '{}' and workspaceId '{}'", id, projectId, workspaceId); + log.debug("Updating AutomationRuleEvaluator with id '{}' in projectId '{}' and workspaceId '{}'", id, projectId, + workspaceId); template.inTransaction(WRITE, handle -> { var dao = handle.attach(AutomationRuleEvaluatorDAO.class); try { - int resultBase = dao.updateBaseRule(id, projectId, workspaceId, evaluatorUpdate.name(), evaluatorUpdate.samplingRate(), userName); - int resultEval = dao.updateEvaluator(id, evaluatorUpdate, userName); + int resultBase = dao.updateBaseRule(id, projectId, workspaceId, evaluatorUpdate.name(), + evaluatorUpdate.samplingRate(), userName); + + var modelUpdate = LlmAsJudgeAutomationRuleEvaluatorModel.builder() + .code(AutomationModelEvaluatorMapper.INSTANCE.map(evaluatorUpdate.code())) + .lastUpdatedBy(userName) + .build(); + + int resultEval = dao.updateEvaluator(id, modelUpdate); if (resultEval == 0 || resultBase == 0) { throw newNotFoundException(); @@ -125,17 +141,21 @@ public void update(@NonNull UUID id, @NonNull UUID projectId, @NonNull String wo } @Override - public > T findById(@NonNull UUID id, @NonNull UUID projectId, @NonNull String workspaceId) { - log.debug("Finding AutomationRuleEvaluator with id '{}' in projectId '{}' and workspaceId '{}'", id, projectId, workspaceId); + public > T findById(@NonNull UUID id, @NonNull UUID projectId, + @NonNull String workspaceId) { + log.debug("Finding AutomationRuleEvaluator with id '{}' in projectId '{}' and workspaceId '{}'", id, projectId, + workspaceId); return (T) template.inTransaction(READ_ONLY, handle -> { var dao = handle.attach(AutomationRuleEvaluatorDAO.class); var singleIdSet = Collections.singleton(id); - return dao.find(workspaceId, projectId, singleIdSet, null, AutomationRule.AutomationRuleAction.EVALUATOR, 0, DEFAULT_PAGE_LIMIT) + var criteria = AutomationRuleEvaluatorCriteria.builder().ids(singleIdSet).build(); + return dao.find(workspaceId, projectId, criteria) .stream() .findFirst() .map(ruleEvaluator -> switch (ruleEvaluator) { - case LlmAsJudgeAutomationRuleEvaluatorModel llmAsJudge -> AutomationModelEvaluatorMapper.INSTANCE.map(llmAsJudge); + case LlmAsJudgeAutomationRuleEvaluatorModel llmAsJudge -> + AutomationModelEvaluatorMapper.INSTANCE.map(llmAsJudge); }) .orElseThrow(this::newNotFoundException); }); @@ -148,7 +168,8 @@ public void delete(@NonNull Set ids, @NonNull UUID projectId, @NonNull Str return; } - log.debug("Deleting AutomationRuleEvaluators with ids {} in projectId '{}' and workspaceId '{}'", ids, projectId, workspaceId); + log.debug("Deleting AutomationRuleEvaluators with ids {} in projectId '{}' and workspaceId '{}'", ids, + projectId, workspaceId); template.inTransaction(WRITE, handle -> { var dao = handle.attach(AutomationRuleEvaluatorDAO.class); @@ -167,30 +188,55 @@ private NotFoundException newNotFoundException() { @Override public AutomationRuleEvaluator.AutomationRuleEvaluatorPage find(@NonNull UUID projectId, - @NonNull String workspaceId, - String name, - int pageNum, int size) { + @NonNull String workspaceId, + String name, + int pageNum, int size) { - log.debug("Finding AutomationRuleEvaluators with name pattern '{}' in projectId '{}' and workspaceId '{}'", name, projectId, workspaceId); + log.debug("Finding AutomationRuleEvaluators with name pattern '{}' in projectId '{}' and workspaceId '{}'", + name, projectId, workspaceId); return template.inTransaction(READ_ONLY, handle -> { var dao = handle.attach(AutomationRuleEvaluatorDAO.class); var total = dao.findCount(projectId, workspaceId, AutomationRule.AutomationRuleAction.EVALUATOR); var offset = (pageNum - 1) * size; - var automationRuleEvaluators = dao.find(workspaceId, projectId, Collections.emptySet(), name, AutomationRule.AutomationRuleAction.EVALUATOR, offset, size) - .stream() - .map(evaluator -> switch (evaluator) { - case LlmAsJudgeAutomationRuleEvaluatorModel llmAsJudge -> - AutomationModelEvaluatorMapper.INSTANCE.map(llmAsJudge); - }) - .toList(); - log.info("Found {} AutomationRuleEvaluators for projectId '{}'", automationRuleEvaluators.size(), projectId); - - return new AutomationRuleEvaluator.AutomationRuleEvaluatorPage(pageNum, automationRuleEvaluators.size(), total, + var criteria = AutomationRuleEvaluatorCriteria.builder().name(name).build(); + var automationRuleEvaluators = dao.find(workspaceId, projectId, criteria, offset, size) + .stream() + .map(evaluator -> switch (evaluator) { + case LlmAsJudgeAutomationRuleEvaluatorModel llmAsJudge -> + AutomationModelEvaluatorMapper.INSTANCE.map(llmAsJudge); + }) + .toList(); + log.info("Found {} AutomationRuleEvaluators for projectId '{}'", automationRuleEvaluators.size(), + projectId); + return new AutomationRuleEvaluator.AutomationRuleEvaluatorPage(pageNum, automationRuleEvaluators.size(), + total, automationRuleEvaluators); }); } + @Override + public List findAll(@NonNull UUID projectId, @NonNull String workspaceId, + @NonNull AutomationRuleEvaluatorType type) { + log.debug("Finding AutomationRuleEvaluators with type '{}' in projectId '{}' and workspaceId '{}'", type, + projectId, workspaceId); + + return template.inTransaction(READ_ONLY, handle -> { + var dao = handle.attach(AutomationRuleEvaluatorDAO.class); + var criteria = AutomationRuleEvaluatorCriteria.builder().type(AutomationRuleEvaluatorType.LLM_AS_JUDGE) + .build(); + + return dao.find(workspaceId, projectId, criteria) + .stream() + .map(evaluator -> switch (evaluator) { + case LlmAsJudgeAutomationRuleEvaluatorModel llmAsJudge -> + AutomationModelEvaluatorMapper.INSTANCE.map(llmAsJudge); + }) + .toList(); + + }); + } + } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ChatCompletionService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ChatCompletionService.java index e196a14b7a..a565581ffe 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/ChatCompletionService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ChatCompletionService.java @@ -1,185 +1,112 @@ package com.comet.opik.domain; -import com.comet.opik.api.LlmProvider; -import com.comet.opik.infrastructure.EncryptionUtils; +import com.comet.opik.domain.llmproviders.LlmProviderFactory; +import com.comet.opik.domain.llmproviders.LlmProviderService; import com.comet.opik.infrastructure.LlmProviderClientConfig; -import com.comet.opik.utils.JsonUtils; -import dev.ai4j.openai4j.OpenAiClient; -import dev.ai4j.openai4j.OpenAiHttpException; +import com.comet.opik.utils.ChunkedOutputHandlers; import dev.ai4j.openai4j.chat.ChatCompletionRequest; import dev.ai4j.openai4j.chat.ChatCompletionResponse; import dev.langchain4j.internal.RetryUtils; import io.dropwizard.jersey.errors.ErrorMessage; import jakarta.inject.Inject; import jakarta.inject.Singleton; -import jakarta.ws.rs.BadRequestException; import jakarta.ws.rs.ClientErrorException; import jakarta.ws.rs.InternalServerErrorException; import jakarta.ws.rs.ServerErrorException; +import jakarta.ws.rs.core.Response; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.glassfish.jersey.server.ChunkedOutput; import ru.vyarus.dropwizard.guice.module.yaml.bind.Config; -import java.io.IOException; -import java.io.UncheckedIOException; import java.util.Optional; +import java.util.function.Consumer; + +import static jakarta.ws.rs.core.Response.Status.Family.familyOf; @Singleton @Slf4j public class ChatCompletionService { - - private static final String UNEXPECTED_ERROR_CALLING_LLM_PROVIDER = "Unexpected error calling LLM provider"; + public static final String UNEXPECTED_ERROR_CALLING_LLM_PROVIDER = "Unexpected error calling LLM provider"; + public static final String ERROR_EMPTY_MESSAGES = "messages cannot be empty"; + public static final String ERROR_NO_COMPLETION_TOKENS = "maxCompletionTokens cannot be null"; private final LlmProviderClientConfig llmProviderClientConfig; - private final LlmProviderApiKeyService llmProviderApiKeyService; + private final LlmProviderFactory llmProviderFactory; private final RetryUtils.RetryPolicy retryPolicy; @Inject public ChatCompletionService( @NonNull @Config LlmProviderClientConfig llmProviderClientConfig, - @NonNull LlmProviderApiKeyService llmProviderApiKeyService) { - this.llmProviderApiKeyService = llmProviderApiKeyService; + @NonNull LlmProviderFactory llmProviderFactory) { this.llmProviderClientConfig = llmProviderClientConfig; + this.llmProviderFactory = llmProviderFactory; this.retryPolicy = newRetryPolicy(); } - private RetryUtils.RetryPolicy newRetryPolicy() { - var retryPolicyBuilder = RetryUtils.retryPolicyBuilder(); - Optional.ofNullable(llmProviderClientConfig.getMaxAttempts()).ifPresent(retryPolicyBuilder::maxAttempts); - Optional.ofNullable(llmProviderClientConfig.getJitterScale()).ifPresent(retryPolicyBuilder::jitterScale); - Optional.ofNullable(llmProviderClientConfig.getBackoffExp()).ifPresent(retryPolicyBuilder::backoffExp); - return retryPolicyBuilder - .delayMillis(llmProviderClientConfig.getDelayMillis()) - .build(); - } - public ChatCompletionResponse create(@NonNull ChatCompletionRequest request, @NonNull String workspaceId) { - log.info("Creating chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); - var openAiClient = getAndConfigureOpenAiClient(request, workspaceId); + var llmProviderClient = llmProviderFactory.getService(workspaceId, request.model()); + llmProviderClient.validateRequest(request); + ChatCompletionResponse chatCompletionResponse; try { - chatCompletionResponse = retryPolicy.withRetry(() -> openAiClient.chatCompletion(request).execute()); + log.info("Creating chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); + chatCompletionResponse = retryPolicy.withRetry(() -> llmProviderClient.generate(request, workspaceId)); + log.info("Created chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); } catch (RuntimeException runtimeException) { log.error(UNEXPECTED_ERROR_CALLING_LLM_PROVIDER, runtimeException); - if (runtimeException.getCause() instanceof OpenAiHttpException openAiHttpException) { - if (openAiHttpException.code() >= 400 && openAiHttpException.code() <= 499) { - throw new ClientErrorException(openAiHttpException.getMessage(), openAiHttpException.code()); + llmProviderClient.getLlmProviderError(runtimeException).ifPresent(llmProviderError -> { + if (familyOf(llmProviderError.getCode()) == Response.Status.Family.CLIENT_ERROR) { + throw new ClientErrorException(llmProviderError.getMessage(), llmProviderError.getCode()); } - throw new ServerErrorException(openAiHttpException.getMessage(), openAiHttpException.code()); - } + + throw new ServerErrorException(llmProviderError.getMessage(), llmProviderError.getCode()); + }); + throw new InternalServerErrorException(UNEXPECTED_ERROR_CALLING_LLM_PROVIDER); } + log.info("Created chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); return chatCompletionResponse; } - public ChunkedOutput createAndStreamResponse( - @NonNull ChatCompletionRequest request, @NonNull String workspaceId) { + public void createAndStreamResponse( + @NonNull ChatCompletionRequest request, + @NonNull String workspaceId, + @NonNull ChunkedOutputHandlers handlers) { log.info("Creating and streaming chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); - var openAiClient = getAndConfigureOpenAiClient(request, workspaceId); - var chunkedOutput = new ChunkedOutput(String.class, "\r\n"); - openAiClient.chatCompletion(request) - .onPartialResponse(chatCompletionResponse -> send(chatCompletionResponse, chunkedOutput)) - .onComplete(() -> close(chunkedOutput)) - .onError(throwable -> handle(throwable, chunkedOutput)) - .execute(); - log.info("Created and streaming chat completions, workspaceId '{}', model '{}'", workspaceId, request.model()); - return chunkedOutput; - } - private OpenAiClient getAndConfigureOpenAiClient(ChatCompletionRequest request, String workspaceId) { - var llmProvider = getLlmProvider(request.model()); - var encryptedApiKey = getEncryptedApiKey(workspaceId, llmProvider); - return newOpenAiClient(encryptedApiKey); - } + var llmProviderClient = llmProviderFactory.getService(workspaceId, request.model()); - /** - * The agreed requirement is to resolve the LLM provider and its API key based on the model. - * Currently, only OPEN AI is supported, so model param is ignored. - * No further validation is needed on the model, as it's just forwarded in the OPEN AI request and will be rejected - * if not valid. - */ - private LlmProvider getLlmProvider(String model) { - return LlmProvider.OPEN_AI; - } + llmProviderClient.generateStream( + request, + workspaceId, + handlers::handleMessage, + handlers::handleClose, + getErrorHandler(handlers, llmProviderClient)); - /** - * Finding API keys isn't paginated at the moment, since only OPEN AI is supported. - * Even in the future, the number of supported LLM providers per workspace is going to be very low. - */ - private String getEncryptedApiKey(String workspaceId, LlmProvider llmProvider) { - return llmProviderApiKeyService.find(workspaceId).content().stream() - .filter(providerApiKey -> llmProvider.equals(providerApiKey.provider())) - .findFirst() - .orElseThrow(() -> new BadRequestException("API key not configured for LLM provider '%s'".formatted( - llmProvider.getValue()))) - .apiKey(); + log.info("Created and streaming chat completions, workspaceId '{}', model '{}'", workspaceId, + request.model()); } - /** - * Initially, only OPEN AI is supported, so no need for a more sophisticated client resolution to start with. - * At the moment, openai4j client and also langchain4j wrappers, don't support dynamic API keys. That can imply - * an important performance penalty for next phases. The following options should be evaluated: - * - Cache clients, but can be unsafe. - * - Find and evaluate other clients. - * - Implement our own client. - * TODO as part of : OPIK-522 - */ - private OpenAiClient newOpenAiClient(String encryptedApiKey) { - var openAiClientBuilder = OpenAiClient.builder(); - Optional.ofNullable(llmProviderClientConfig.getOpenAiClient()) - .map(LlmProviderClientConfig.OpenAiClientConfig::url) - .ifPresent(baseUrl -> { - if (StringUtils.isNotBlank(baseUrl)) { - openAiClientBuilder.baseUrl(baseUrl); - } - }); - Optional.ofNullable(llmProviderClientConfig.getCallTimeout()) - .ifPresent(callTimeout -> openAiClientBuilder.callTimeout(callTimeout.toJavaDuration())); - Optional.ofNullable(llmProviderClientConfig.getConnectTimeout()) - .ifPresent(connectTimeout -> openAiClientBuilder.connectTimeout(connectTimeout.toJavaDuration())); - Optional.ofNullable(llmProviderClientConfig.getReadTimeout()) - .ifPresent(readTimeout -> openAiClientBuilder.readTimeout(readTimeout.toJavaDuration())); - Optional.ofNullable(llmProviderClientConfig.getWriteTimeout()) - .ifPresent(writeTimeout -> openAiClientBuilder.writeTimeout(writeTimeout.toJavaDuration())); - return openAiClientBuilder - .openAiApiKey(EncryptionUtils.decrypt(encryptedApiKey)) + private RetryUtils.RetryPolicy newRetryPolicy() { + var retryPolicyBuilder = RetryUtils.retryPolicyBuilder(); + Optional.ofNullable(llmProviderClientConfig.getMaxAttempts()).ifPresent(retryPolicyBuilder::maxAttempts); + Optional.ofNullable(llmProviderClientConfig.getJitterScale()).ifPresent(retryPolicyBuilder::jitterScale); + Optional.ofNullable(llmProviderClientConfig.getBackoffExp()).ifPresent(retryPolicyBuilder::backoffExp); + return retryPolicyBuilder + .delayMillis(llmProviderClientConfig.getDelayMillis()) .build(); } - private void send(Object item, ChunkedOutput chunkedOutput) { - if (chunkedOutput.isClosed()) { - log.warn("Output stream is already closed"); - return; - } - try { - chunkedOutput.write(JsonUtils.writeValueAsString(item)); - } catch (IOException ioException) { - throw new UncheckedIOException(ioException); - } - } + private Consumer getErrorHandler( + ChunkedOutputHandlers handlers, LlmProviderService llmProviderClient) { + return throwable -> { + log.error(UNEXPECTED_ERROR_CALLING_LLM_PROVIDER, throwable); - private void handle(Throwable throwable, ChunkedOutput chunkedOutput) { - log.error(UNEXPECTED_ERROR_CALLING_LLM_PROVIDER, throwable); - var errorMessage = new ErrorMessage(UNEXPECTED_ERROR_CALLING_LLM_PROVIDER); - if (throwable instanceof OpenAiHttpException openAiHttpException) { - errorMessage = new ErrorMessage(openAiHttpException.code(), openAiHttpException.getMessage()); - } - try { - send(errorMessage, chunkedOutput); - } catch (UncheckedIOException uncheckedIOException) { - log.error("Failed to stream error message to client", uncheckedIOException); - } - close(chunkedOutput); - } + var errorMessage = llmProviderClient.getLlmProviderError(throwable) + .orElse(new ErrorMessage(ChatCompletionService.UNEXPECTED_ERROR_CALLING_LLM_PROVIDER)); - private void close(ChunkedOutput chunkedOutput) { - try { - chunkedOutput.close(); - } catch (IOException ioException) { - log.error("Failed to close output stream", ioException); - } + handlers.handleError(errorMessage); + }; } } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemDAO.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemDAO.java index 482999bd41..07f3d3bd81 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemDAO.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemDAO.java @@ -366,7 +366,11 @@ WITH dataset_items_final AS ( category_name, value, reason, - source + source, + created_at, + last_updated_at, + created_by, + last_updated_by FROM feedback_scores WHERE workspace_id = :workspace_id AND entity_type = :entityType diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemResultMapper.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemResultMapper.java index 4d8607710c..abce03684f 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemResultMapper.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetItemResultMapper.java @@ -4,8 +4,6 @@ import com.comet.opik.api.DatasetItem; import com.comet.opik.api.DatasetItemSource; import com.comet.opik.api.ExperimentItem; -import com.comet.opik.api.FeedbackScore; -import com.comet.opik.api.ScoreSource; import com.comet.opik.utils.JsonUtils; import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.Sets; @@ -17,7 +15,6 @@ import org.reactivestreams.Publisher; import reactor.core.publisher.Mono; -import java.math.BigDecimal; import java.time.Instant; import java.util.Arrays; import java.util.Collection; @@ -29,6 +26,7 @@ import java.util.stream.Collectors; import static com.comet.opik.api.Column.ColumnType; +import static com.comet.opik.domain.FeedbackScoreMapper.getFeedbackScores; import static com.comet.opik.utils.ValidationUtils.CLICKHOUSE_FIXED_STRING_UUID_FIELD_NULL_VALUE; import static java.util.function.Predicate.not; import static java.util.stream.Collectors.toMap; @@ -71,26 +69,6 @@ static JsonNode getJsonNodeOrNull(Object field) { return JsonUtils.getJsonNodeFromString(field.toString()); } - private static List getFeedbackScores(Object feedbackScoresRaw) { - if (feedbackScoresRaw instanceof List[] feedbackScoresArray) { - var feedbackScores = Arrays.stream(feedbackScoresArray) - .filter(feedbackScore -> CollectionUtils.isNotEmpty(feedbackScore) && - !CLICKHOUSE_FIXED_STRING_UUID_FIELD_NULL_VALUE.equals(feedbackScore.getFirst().toString())) - .map(feedbackScore -> FeedbackScore.builder() - .name(feedbackScore.get(1).toString()) - .categoryName(Optional.ofNullable(feedbackScore.get(2)).map(Object::toString) - .filter(StringUtils::isNotEmpty).orElse(null)) - .value(new BigDecimal(feedbackScore.get(3).toString())) - .reason(Optional.ofNullable(feedbackScore.get(4)).map(Object::toString) - .filter(StringUtils::isNotEmpty).orElse(null)) - .source(ScoreSource.fromString(feedbackScore.get(5).toString())) - .build()) - .toList(); - return feedbackScores.isEmpty() ? null : feedbackScores; - } - return null; - } - static Map.Entry> groupResults(Map.Entry> result1, Map.Entry> result2) { diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemDAO.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemDAO.java index aee8f26ad9..78cb1505d4 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemDAO.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemDAO.java @@ -1,6 +1,7 @@ package com.comet.opik.domain; import com.comet.opik.api.ExperimentItem; +import com.comet.opik.api.ExperimentItemSearchCriteria; import com.google.common.base.Preconditions; import io.opentelemetry.instrumentation.annotations.WithSpan; import io.r2dbc.spi.Connection; @@ -87,15 +88,74 @@ INSERT INTO experiment_items ( """; private static final String STREAM = """ + WITH experiment_items_scope as ( + SELECT * FROM + ( + SELECT + * + FROM experiment_items + WHERE workspace_id = :workspace_id + AND experiment_id IN :experiment_ids + AND id \\< :lastRetrievedId + ORDER BY id DESC, last_updated_at DESC + LIMIT 1 BY id + LIMIT :limit + ) + ORDER BY experiment_id DESC + ), feedback_scores_final AS ( + SELECT + entity_id, + name, + category_name, + value, + reason, + source, + created_at, + last_updated_at, + created_by, + last_updated_by + FROM feedback_scores + WHERE workspace_id = :workspace_id + AND entity_id IN (SELECT trace_id FROM experiment_items_scope) + ORDER BY entity_id DESC, last_updated_at DESC + LIMIT 1 BY entity_id, name + ) SELECT - * - FROM experiment_items - WHERE workspace_id = :workspace_id - AND experiment_id IN :experiment_ids - AND id \\< :lastRetrievedId - ORDER BY experiment_id DESC, id DESC, last_updated_at DESC - LIMIT 1 BY id - LIMIT :limit + ei.id, + ei.experiment_id, + ei.dataset_item_id, + ei.trace_id, + tfs.input, + tfs.output, + tfs.feedback_scores_array, + ei.created_at, + ei.last_updated_at, + ei.created_by, + ei.last_updated_by + FROM experiment_items_scope AS ei + LEFT JOIN ( + SELECT + t.id, + t.input, + t.output, + groupArray(tuple(fs.*)) AS feedback_scores_array + FROM ( + SELECT + id, + replaceRegexpAll(input, '', '"[image]"') as input input , + replaceRegexpAll(output, '', '"[image]"') as output output + FROM traces + WHERE workspace_id = :workspace_id + AND id IN (SELECT trace_id FROM experiment_items_scope) + ORDER BY id DESC, last_updated_at DESC + LIMIT 1 BY id + ) AS t + LEFT JOIN feedback_scores_final AS fs ON t.id = fs.entity_id + GROUP BY + t.id, + t.input, + t.output + ) AS tfs ON ei.trace_id = tfs.id ; """; @@ -197,24 +257,11 @@ private Mono insert(Collection experimentItems, Connection }); } - private Publisher mapToExperimentItem(Result result) { - return result.map((row, rowMetadata) -> ExperimentItem.builder() - .id(row.get("id", UUID.class)) - .experimentId(row.get("experiment_id", UUID.class)) - .datasetItemId(row.get("dataset_item_id", UUID.class)) - .traceId(row.get("trace_id", UUID.class)) - .lastUpdatedAt(row.get("last_updated_at", Instant.class)) - .createdAt(row.get("created_at", Instant.class)) - .createdBy(row.get("created_by", String.class)) - .lastUpdatedBy(row.get("last_updated_by", String.class)) - .build()); - } - @WithSpan public Mono get(@NonNull UUID id) { return Mono.from(connectionFactory.create()) .flatMapMany(connection -> get(id, connection)) - .flatMap(this::mapToExperimentItem) + .flatMap(ExperimentItemMapper::mapToExperimentItem) .singleOrEmpty(); } @@ -227,25 +274,32 @@ private Publisher extends Result> get(UUID id, Connection connection) { return makeFluxContextAware(bindWorkspaceIdToFlux(statement)); } - public Flux getItems(@NonNull Set experimentIds, int limit, UUID lastRetrievedId) { + public Flux getItems(@NonNull Set experimentIds, + @NonNull ExperimentItemSearchCriteria criteria) { if (experimentIds.isEmpty()) { log.info("Getting experiment items by empty experimentIds, limit '{}', lastRetrievedId '{}'", - limit, lastRetrievedId); + criteria.limit(), criteria.lastRetrievedId()); return Flux.empty(); } return Mono.from(connectionFactory.create()) - .flatMapMany(connection -> getItems(experimentIds, limit, lastRetrievedId, connection)) - .flatMap(this::mapToExperimentItem); + .flatMapMany(connection -> getItems(experimentIds, criteria, connection)) + .flatMap(ExperimentItemMapper::mapToExperimentItemFullContent); } private Publisher extends Result> getItems( - Set experimentIds, int limit, UUID lastRetrievedId, Connection connection) { + Set experimentIds, ExperimentItemSearchCriteria criteria, Connection connection) { + + int limit = criteria.limit(); + UUID lastRetrievedId = criteria.lastRetrievedId(); + log.info("Getting experiment items by experimentIds count '{}', limit '{}', lastRetrievedId '{}'", experimentIds.size(), limit, lastRetrievedId); + var template = new ST(STREAM); if (lastRetrievedId != null) { template.add("lastRetrievedId", lastRetrievedId); } + template = ImageUtils.addTruncateToTemplate(template, criteria.truncate()); var statement = connection.createStatement(template.render()) .bind("experiment_ids", experimentIds.toArray(UUID[]::new)) .bind("limit", limit); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemMapper.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemMapper.java new file mode 100644 index 0000000000..768d9ff751 --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemMapper.java @@ -0,0 +1,52 @@ +package com.comet.opik.domain; + +import com.comet.opik.api.ExperimentItem; +import com.comet.opik.utils.JsonUtils; +import io.r2dbc.spi.Result; +import lombok.experimental.UtilityClass; +import org.reactivestreams.Publisher; + +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +import static com.comet.opik.domain.FeedbackScoreMapper.getFeedbackScores; + +@UtilityClass +class ExperimentItemMapper { + public static Publisher mapToExperimentItem(Result result) { + return result.map((row, rowMetadata) -> ExperimentItem.builder() + .id(row.get("id", UUID.class)) + .experimentId(row.get("experiment_id", UUID.class)) + .datasetItemId(row.get("dataset_item_id", UUID.class)) + .traceId(row.get("trace_id", UUID.class)) + .lastUpdatedAt(row.get("last_updated_at", Instant.class)) + .createdAt(row.get("created_at", Instant.class)) + .createdBy(row.get("created_by", String.class)) + .lastUpdatedBy(row.get("last_updated_by", String.class)) + .build()); + } + + public static Publisher mapToExperimentItemFullContent(Result result) { + return result.map((row, rowMetadata) -> ExperimentItem.builder() + .id(row.get("id", UUID.class)) + .experimentId(row.get("experiment_id", UUID.class)) + .datasetItemId(row.get("dataset_item_id", UUID.class)) + .traceId(row.get("trace_id", UUID.class)) + .input(Optional.ofNullable(row.get("input", String.class)) + .filter(str -> !str.isBlank()) + .map(JsonUtils::getJsonNodeFromString) + .orElse(null)) + .output(Optional.ofNullable(row.get("output", String.class)) + .filter(str -> !str.isBlank()) + .map(JsonUtils::getJsonNodeFromString) + .orElse(null)) + .feedbackScores(getFeedbackScores(row.get("feedback_scores_array", List[].class))) + .lastUpdatedAt(row.get("last_updated_at", Instant.class)) + .createdAt(row.get("created_at", Instant.class)) + .createdBy(row.get("created_by", String.class)) + .lastUpdatedBy(row.get("last_updated_by", String.class)) + .build()); + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemService.java index 6bffae7f83..7424672510 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentItemService.java @@ -2,7 +2,7 @@ import com.comet.opik.api.Experiment; import com.comet.opik.api.ExperimentItem; -import com.comet.opik.api.ExperimentItemStreamRequest; +import com.comet.opik.api.ExperimentItemSearchCriteria; import com.comet.opik.infrastructure.auth.RequestContext; import com.google.common.base.Preconditions; import jakarta.inject.Inject; @@ -120,13 +120,13 @@ private NotFoundException newNotFoundException(UUID id) { return new NotFoundException(message); } - public Flux getExperimentItems(@NonNull ExperimentItemStreamRequest request) { - log.info("Getting experiment items by '{}'", request); - return experimentService.findByName(request.experimentName()) + public Flux getExperimentItems(@NonNull ExperimentItemSearchCriteria criteria) { + log.info("Getting experiment items by '{}'", criteria); + return experimentService.findByName(criteria.experimentName()) .subscribeOn(Schedulers.boundedElastic()) .collect(Collectors.mapping(Experiment::id, Collectors.toUnmodifiableSet())) .flatMapMany(experimentIds -> experimentItemDAO.getItems( - experimentIds, request.limit(), request.lastRetrievedId())); + experimentIds, criteria)); } public Mono delete(@NonNull Set ids) { diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/FeedbackScoreMapper.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/FeedbackScoreMapper.java index 8cf60569a3..812dd9563a 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/FeedbackScoreMapper.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/FeedbackScoreMapper.java @@ -2,13 +2,22 @@ import com.comet.opik.api.FeedbackScore; import com.comet.opik.api.FeedbackScoreBatchItem; +import com.comet.opik.api.ScoreSource; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.mapstruct.Mapper; import org.mapstruct.Mapping; import org.mapstruct.factory.Mappers; +import java.math.BigDecimal; +import java.time.Instant; +import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.UUID; +import static com.comet.opik.utils.ValidationUtils.CLICKHOUSE_FIXED_STRING_UUID_FIELD_NULL_VALUE; + @Mapper public interface FeedbackScoreMapper { @@ -23,4 +32,28 @@ public interface FeedbackScoreMapper { @Mapping(target = "id", source = "entityId") FeedbackScoreBatchItem toFeedbackScore(UUID entityId, UUID projectId, FeedbackScore score); + + static List getFeedbackScores(Object feedbackScoresRaw) { + if (feedbackScoresRaw instanceof List[] feedbackScoresArray) { + var feedbackScores = Arrays.stream(feedbackScoresArray) + .filter(feedbackScore -> CollectionUtils.isNotEmpty(feedbackScore) && + !CLICKHOUSE_FIXED_STRING_UUID_FIELD_NULL_VALUE.equals(feedbackScore.getFirst().toString())) + .map(feedbackScore -> FeedbackScore.builder() + .name(feedbackScore.get(1).toString()) + .categoryName(Optional.ofNullable(feedbackScore.get(2)).map(Object::toString) + .filter(StringUtils::isNotEmpty).orElse(null)) + .value(new BigDecimal(feedbackScore.get(3).toString())) + .reason(Optional.ofNullable(feedbackScore.get(4)).map(Object::toString) + .filter(StringUtils::isNotEmpty).orElse(null)) + .source(ScoreSource.fromString(feedbackScore.get(5).toString())) + .createdAt(Instant.parse(feedbackScore.get(6).toString())) + .lastUpdatedAt(Instant.parse(feedbackScore.get(7).toString())) + .createdBy(feedbackScore.get(8).toString()) + .lastUpdatedBy(feedbackScore.get(9).toString()) + .build()) + .toList(); + return feedbackScores.isEmpty() ? null : feedbackScores; + } + return null; + } } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/LlmAsJudgeAutomationRuleEvaluatorModel.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/LlmAsJudgeAutomationRuleEvaluatorModel.java index d8f51460c1..be1110ee6e 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/LlmAsJudgeAutomationRuleEvaluatorModel.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/LlmAsJudgeAutomationRuleEvaluatorModel.java @@ -1,30 +1,43 @@ package com.comet.opik.domain; import com.comet.opik.api.AutomationRuleEvaluatorType; -import com.fasterxml.jackson.databind.JsonNode; +import com.comet.opik.api.LlmAsJudgeOutputSchemaType; import lombok.Builder; import org.jdbi.v3.json.Json; import java.time.Instant; +import java.util.List; +import java.util.Map; import java.util.UUID; - @Builder(toBuilder = true) -public record LlmAsJudgeAutomationRuleEvaluatorModel ( - UUID id, - UUID projectId, - String name, - Float samplingRate, - @Json JsonNode code, - Instant createdAt, - String createdBy, - Instant lastUpdatedAt, - String lastUpdatedBy -) implements AutomationRuleEvaluatorModel { +public record LlmAsJudgeAutomationRuleEvaluatorModel( + UUID id, + UUID projectId, + String name, + Float samplingRate, + @Json LlmAsJudgeAutomationRuleEvaluatorModel.LlmAsJudgeCode code, + Instant createdAt, + String createdBy, + Instant lastUpdatedAt, + String lastUpdatedBy) + implements + AutomationRuleEvaluatorModel { @Override public AutomationRuleEvaluatorType type() { return AutomationRuleEvaluatorType.LLM_AS_JUDGE; } + record LlmAsJudgeCode(LlmAsJudgeCodeParameters model, + List messages, + Map variables, + List schema) { + } + record LlmAsJudgeCodeParameters(String name, Double temperature) { + } + record LlmAsJudgeCodeMessage(String role, String content) { + } + record LlmAsJudgeCodeSchema(String name, LlmAsJudgeOutputSchemaType type, String description) { + } } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ProjectService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ProjectService.java index 45ab2d6108..fee3c1e1ab 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/ProjectService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ProjectService.java @@ -27,6 +27,7 @@ import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; import org.jdbi.v3.core.statement.UnableToExecuteStatementException; import ru.vyarus.guicey.jdbi3.tx.TransactionTemplate; @@ -354,6 +355,11 @@ private Page findWithLastTraceSorting(int page, int size, @NonNull Proj List sorted = sortByLastTrace(allProjectIdsLastUpdated, projectLastUpdatedTraceAtMap, sortingField); List finalIds = PaginationUtils.paginate(page, size, sorted); + if (CollectionUtils.isEmpty(finalIds)) { + // pagination might return an empty list + return ProjectPage.empty(page); + } + // get all project properties for the final list of ids Map projectsById = template.inTransaction(READ_ONLY, handle -> { ProjectDAO repository = handle.attach(ProjectDAO.class); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/TraceService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/TraceService.java index 2e224e6b39..5c2b56cf34 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/TraceService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/TraceService.java @@ -106,10 +106,14 @@ public Mono create(@NonNull Trace trace) { .flatMap(project -> lockService.executeWithLock( new LockService.Lock(id, TRACE_KEY), Mono.defer(() -> insertTrace(trace, project, id))) - .doOnSuccess(__ -> eventBus.post(new TracesCreated( - Set.of(project.id()), - ctx.get(RequestContext.WORKSPACE_ID), - ctx.get(RequestContext.USER_NAME)))))); + .doOnSuccess(__ -> { + // forwards the trace with its actual projectId + var savedTrace = trace.toBuilder().projectId(project.id()).build(); + String workspaceId = ctx.get(RequestContext.WORKSPACE_ID); + String userName = ctx.get(RequestContext.USER_NAME); + + eventBus.post(new TracesCreated(List.of(savedTrace), workspaceId, userName)); + }))); } @WithSpan @@ -131,12 +135,12 @@ public Mono create(TraceBatch batch) { .map(projects -> bindTraceToProjectAndId(batch, projects)) .subscribeOn(Schedulers.boundedElastic()); + String workspaceId = ctx.get(RequestContext.WORKSPACE_ID); + String userName = ctx.get(RequestContext.USER_NAME); + return resolveProjects .flatMap(traces -> template.nonTransaction(connection -> dao.batchInsert(traces, connection)) - .doOnSuccess(__ -> eventBus.post(new TracesCreated( - traces.stream().map(Trace::projectId).collect(Collectors.toUnmodifiableSet()), - ctx.get(RequestContext.WORKSPACE_ID), - ctx.get(RequestContext.USER_NAME))))); + .doOnSuccess(__ -> eventBus.post(new TracesCreated(traces, workspaceId, userName)))); }); } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderAnthropic.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderAnthropic.java new file mode 100644 index 0000000000..6781b7fb9a --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderAnthropic.java @@ -0,0 +1,238 @@ +package com.comet.opik.domain.llmproviders; + +import com.comet.opik.infrastructure.LlmProviderClientConfig; +import dev.ai4j.openai4j.chat.AssistantMessage; +import dev.ai4j.openai4j.chat.ChatCompletionChoice; +import dev.ai4j.openai4j.chat.ChatCompletionRequest; +import dev.ai4j.openai4j.chat.ChatCompletionResponse; +import dev.ai4j.openai4j.chat.Delta; +import dev.ai4j.openai4j.chat.Message; +import dev.ai4j.openai4j.chat.Role; +import dev.ai4j.openai4j.chat.SystemMessage; +import dev.ai4j.openai4j.chat.UserMessage; +import dev.ai4j.openai4j.shared.Usage; +import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.model.StreamingResponseHandler; +import dev.langchain4j.model.anthropic.internal.api.AnthropicContent; +import dev.langchain4j.model.anthropic.internal.api.AnthropicCreateMessageRequest; +import dev.langchain4j.model.anthropic.internal.api.AnthropicCreateMessageResponse; +import dev.langchain4j.model.anthropic.internal.api.AnthropicMessage; +import dev.langchain4j.model.anthropic.internal.api.AnthropicMessageContent; +import dev.langchain4j.model.anthropic.internal.api.AnthropicRole; +import dev.langchain4j.model.anthropic.internal.api.AnthropicTextContent; +import dev.langchain4j.model.anthropic.internal.api.AnthropicToolChoice; +import dev.langchain4j.model.anthropic.internal.client.AnthropicClient; +import dev.langchain4j.model.anthropic.internal.client.AnthropicHttpException; +import dev.langchain4j.model.output.Response; +import io.dropwizard.jersey.errors.ErrorMessage; +import jakarta.ws.rs.BadRequestException; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; + +import static com.comet.opik.domain.ChatCompletionService.ERROR_EMPTY_MESSAGES; +import static com.comet.opik.domain.ChatCompletionService.ERROR_NO_COMPLETION_TOKENS; + +@Slf4j +class LlmProviderAnthropic implements LlmProviderService { + private final @NonNull LlmProviderClientConfig llmProviderClientConfig; + private final @NonNull AnthropicClient anthropicClient; + + public LlmProviderAnthropic(@NonNull LlmProviderClientConfig llmProviderClientConfig, @NonNull String apiKey) { + this.llmProviderClientConfig = llmProviderClientConfig; + this.anthropicClient = newClient(apiKey); + } + + @Override + public ChatCompletionResponse generate(@NonNull ChatCompletionRequest request, @NonNull String workspaceId) { + var response = anthropicClient.createMessage(toAnthropicCreateMessageRequest(request)); + + return ChatCompletionResponse.builder() + .id(response.id) + .model(response.model) + .choices(response.content.stream().map(content -> toChatCompletionChoice(response, content)) + .toList()) + .usage(Usage.builder() + .promptTokens(response.usage.inputTokens) + .completionTokens(response.usage.outputTokens) + .totalTokens(response.usage.inputTokens + response.usage.outputTokens) + .build()) + .build(); + } + + @Override + public void generateStream( + @NonNull ChatCompletionRequest request, + @NonNull String workspaceId, + @NonNull Consumer handleMessage, + @NonNull Runnable handleClose, @NonNull Consumer handleError) { + validateRequest(request); + anthropicClient.createMessage(toAnthropicCreateMessageRequest(request), + new ChunkedResponseHandler(handleMessage, handleClose, handleError, request.model())); + } + + @Override + public void validateRequest(@NonNull ChatCompletionRequest request) { + // see https://github.com/anthropics/courses/blob/master/anthropic_api_fundamentals/04_parameters.ipynb + if (CollectionUtils.isEmpty(request.messages())) { + throw new BadRequestException(ERROR_EMPTY_MESSAGES); + } + if (request.maxCompletionTokens() == null) { + throw new BadRequestException(ERROR_NO_COMPLETION_TOKENS); + } + } + + @Override + public @NonNull Optional getLlmProviderError(Throwable runtimeException) { + if (runtimeException instanceof AnthropicHttpException anthropicHttpException) { + return Optional.of(new ErrorMessage(anthropicHttpException.statusCode(), + anthropicHttpException.getMessage())); + } + + return Optional.empty(); + } + + private AnthropicCreateMessageRequest toAnthropicCreateMessageRequest(ChatCompletionRequest request) { + var builder = AnthropicCreateMessageRequest.builder(); + Optional.ofNullable(request.toolChoice()) + .ifPresent(toolChoice -> builder.toolChoice(AnthropicToolChoice.from( + request.toolChoice().toString()))); + return builder + .stream(request.stream()) + .model(request.model()) + .messages(request.messages().stream() + .filter(message -> List.of(Role.ASSISTANT, Role.USER).contains(message.role())) + .map(this::toMessage).toList()) + .system(request.messages().stream() + .filter(message -> message.role() == Role.SYSTEM) + .map(this::toSystemMessage).toList()) + .temperature(request.temperature()) + .topP(request.topP()) + .stopSequences(request.stop()) + .maxTokens(request.maxCompletionTokens()) + .build(); + } + + private AnthropicMessage toMessage(Message message) { + if (message.role() == Role.ASSISTANT) { + return AnthropicMessage.builder() + .role(AnthropicRole.ASSISTANT) + .content(List.of(new AnthropicTextContent(((AssistantMessage) message).content()))) + .build(); + } + + if (message.role() == Role.USER) { + return AnthropicMessage.builder() + .role(AnthropicRole.USER) + .content(List.of(toAnthropicMessageContent(((UserMessage) message).content()))) + .build(); + } + + throw new BadRequestException("unexpected message role: " + message.role()); + } + + private AnthropicTextContent toSystemMessage(Message message) { + if (message.role() != Role.SYSTEM) { + throw new BadRequestException("expecting only system role, got: " + message.role()); + } + + return new AnthropicTextContent(((SystemMessage) message).content()); + } + + private AnthropicMessageContent toAnthropicMessageContent(Object rawContent) { + if (rawContent instanceof String content) { + return new AnthropicTextContent(content); + } + + throw new BadRequestException("only text content is supported"); + } + + private ChatCompletionChoice toChatCompletionChoice( + AnthropicCreateMessageResponse response, AnthropicContent content) { + return ChatCompletionChoice.builder() + .message(AssistantMessage.builder() + .name(content.name) + .content(content.text) + .build()) + .finishReason(response.stopReason) + .build(); + } + + private AnthropicClient newClient(String apiKey) { + var anthropicClientBuilder = AnthropicClient.builder(); + Optional.ofNullable(llmProviderClientConfig.getAnthropicClient()) + .map(LlmProviderClientConfig.AnthropicClientConfig::url) + .ifPresent(url -> { + if (StringUtils.isNotEmpty(url)) { + anthropicClientBuilder.baseUrl(url); + } + }); + Optional.ofNullable(llmProviderClientConfig.getAnthropicClient()) + .map(LlmProviderClientConfig.AnthropicClientConfig::version) + .ifPresent(version -> { + if (StringUtils.isNotBlank(version)) { + anthropicClientBuilder.version(version); + } + }); + Optional.ofNullable(llmProviderClientConfig.getLogRequests()) + .ifPresent(anthropicClientBuilder::logRequests); + Optional.ofNullable(llmProviderClientConfig.getLogResponses()) + .ifPresent(anthropicClientBuilder::logResponses); + // anthropic client builder only receives one timeout variant + Optional.ofNullable(llmProviderClientConfig.getCallTimeout()) + .ifPresent(callTimeout -> anthropicClientBuilder.timeout(callTimeout.toJavaDuration())); + return anthropicClientBuilder + .apiKey(apiKey) + .build(); + } + + private record ChunkedResponseHandler( + Consumer handleMessage, + Runnable handleClose, + Consumer handleError, + String model) implements StreamingResponseHandler { + + @Override + public void onNext(String s) { + handleMessage.accept(ChatCompletionResponse.builder() + .model(model) + .choices(List.of(ChatCompletionChoice.builder() + .delta(Delta.builder() + .content(s) + .role(Role.ASSISTANT) + .build()) + .build())) + .build()); + } + + @Override + public void onComplete(Response response) { + handleMessage.accept(ChatCompletionResponse.builder() + .model(model) + .choices(List.of(ChatCompletionChoice.builder() + .delta(Delta.builder() + .content("") + .role(Role.ASSISTANT) + .build()) + .build())) + .usage(Usage.builder() + .promptTokens(response.tokenUsage().inputTokenCount()) + .completionTokens(response.tokenUsage().outputTokenCount()) + .totalTokens(response.tokenUsage().totalTokenCount()) + .build()) + .id((String) response.metadata().get("id")) + .build()); + handleClose.run(); + } + + @Override + public void onError(Throwable throwable) { + handleError.accept(throwable); + } + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderFactory.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderFactory.java new file mode 100644 index 0000000000..033d3d6a49 --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderFactory.java @@ -0,0 +1,70 @@ +package com.comet.opik.domain.llmproviders; + +import com.comet.opik.api.LlmProvider; +import com.comet.opik.domain.LlmProviderApiKeyService; +import com.comet.opik.infrastructure.EncryptionUtils; +import com.comet.opik.infrastructure.LlmProviderClientConfig; +import dev.ai4j.openai4j.chat.ChatCompletionModel; +import dev.langchain4j.model.anthropic.AnthropicChatModelName; +import jakarta.inject.Inject; +import jakarta.inject.Singleton; +import jakarta.ws.rs.BadRequestException; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.EnumUtils; +import ru.vyarus.dropwizard.guice.module.yaml.bind.Config; + +import java.util.function.Function; + +@Singleton +@RequiredArgsConstructor(onConstructor_ = @Inject) +public class LlmProviderFactory { + public static final String ERROR_MODEL_NOT_SUPPORTED = "model not supported %s"; + + private final @NonNull @Config LlmProviderClientConfig llmProviderClientConfig; + private final @NonNull LlmProviderApiKeyService llmProviderApiKeyService; + + public LlmProviderService getService(@NonNull String workspaceId, @NonNull String model) { + var llmProvider = getLlmProvider(model); + var apiKey = EncryptionUtils.decrypt(getEncryptedApiKey(workspaceId, llmProvider)); + + return switch (llmProvider) { + case LlmProvider.OPEN_AI -> new LlmProviderOpenAi(llmProviderClientConfig, apiKey); + case LlmProvider.ANTHROPIC -> new LlmProviderAnthropic(llmProviderClientConfig, apiKey); + }; + } + + /** + * The agreed requirement is to resolve the LLM provider and its API key based on the model. + */ + private LlmProvider getLlmProvider(String model) { + if (isModelBelongToProvider(model, ChatCompletionModel.class, ChatCompletionModel::toString)) { + return LlmProvider.OPEN_AI; + } + if (isModelBelongToProvider(model, AnthropicChatModelName.class, AnthropicChatModelName::toString)) { + return LlmProvider.ANTHROPIC; + } + + throw new BadRequestException(ERROR_MODEL_NOT_SUPPORTED.formatted(model)); + } + + /** + * Finding API keys isn't paginated at the moment. + * Even in the future, the number of supported LLM providers per workspace is going to be very low. + */ + private String getEncryptedApiKey(String workspaceId, LlmProvider llmProvider) { + return llmProviderApiKeyService.find(workspaceId).content().stream() + .filter(providerApiKey -> llmProvider.equals(providerApiKey.provider())) + .findFirst() + .orElseThrow(() -> new BadRequestException("API key not configured for LLM provider '%s'".formatted( + llmProvider.getValue()))) + .apiKey(); + } + + private static > boolean isModelBelongToProvider( + String model, Class enumClass, Function valueGetter) { + return EnumUtils.getEnumList(enumClass).stream() + .map(valueGetter) + .anyMatch(model::equals); + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderOpenAi.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderOpenAi.java new file mode 100644 index 0000000000..f8b85c4103 --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderOpenAi.java @@ -0,0 +1,90 @@ +package com.comet.opik.domain.llmproviders; + +import com.comet.opik.infrastructure.LlmProviderClientConfig; +import dev.ai4j.openai4j.OpenAiClient; +import dev.ai4j.openai4j.OpenAiHttpException; +import dev.ai4j.openai4j.chat.ChatCompletionRequest; +import dev.ai4j.openai4j.chat.ChatCompletionResponse; +import io.dropwizard.jersey.errors.ErrorMessage; +import jakarta.inject.Inject; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + +import java.util.Optional; +import java.util.function.Consumer; + +@Slf4j +class LlmProviderOpenAi implements LlmProviderService { + private final LlmProviderClientConfig llmProviderClientConfig; + private final OpenAiClient openAiClient; + + @Inject + public LlmProviderOpenAi(LlmProviderClientConfig llmProviderClientConfig, String apiKey) { + this.llmProviderClientConfig = llmProviderClientConfig; + this.openAiClient = newOpenAiClient(apiKey); + } + + @Override + public ChatCompletionResponse generate(@NonNull ChatCompletionRequest request, @NonNull String workspaceId) { + return openAiClient.chatCompletion(request).execute(); + } + + @Override + public void generateStream( + @NonNull ChatCompletionRequest request, + @NonNull String workspaceId, + @NonNull Consumer handleMessage, + @NonNull Runnable handleClose, + @NonNull Consumer handleError) { + openAiClient.chatCompletion(request) + .onPartialResponse(handleMessage) + .onComplete(handleClose) + .onError(handleError) + .execute(); + } + + @Override + public void validateRequest(@NonNull ChatCompletionRequest request) { + + } + + @Override + public @NonNull Optional getLlmProviderError(Throwable runtimeException) { + if (runtimeException instanceof OpenAiHttpException openAiHttpException) { + return Optional.of(new ErrorMessage(openAiHttpException.code(), openAiHttpException.getMessage())); + } + + return Optional.empty(); + } + + /** + * At the moment, openai4j client and also langchain4j wrappers, don't support dynamic API keys. That can imply + * an important performance penalty for next phases. The following options should be evaluated: + * - Cache clients, but can be unsafe. + * - Find and evaluate other clients. + * - Implement our own client. + * TODO as part of : OPIK-522 + */ + private OpenAiClient newOpenAiClient(String apiKey) { + var openAiClientBuilder = OpenAiClient.builder(); + Optional.ofNullable(llmProviderClientConfig.getOpenAiClient()) + .map(LlmProviderClientConfig.OpenAiClientConfig::url) + .ifPresent(baseUrl -> { + if (StringUtils.isNotBlank(baseUrl)) { + openAiClientBuilder.baseUrl(baseUrl); + } + }); + Optional.ofNullable(llmProviderClientConfig.getCallTimeout()) + .ifPresent(callTimeout -> openAiClientBuilder.callTimeout(callTimeout.toJavaDuration())); + Optional.ofNullable(llmProviderClientConfig.getConnectTimeout()) + .ifPresent(connectTimeout -> openAiClientBuilder.connectTimeout(connectTimeout.toJavaDuration())); + Optional.ofNullable(llmProviderClientConfig.getReadTimeout()) + .ifPresent(readTimeout -> openAiClientBuilder.readTimeout(readTimeout.toJavaDuration())); + Optional.ofNullable(llmProviderClientConfig.getWriteTimeout()) + .ifPresent(writeTimeout -> openAiClientBuilder.writeTimeout(writeTimeout.toJavaDuration())); + return openAiClientBuilder + .openAiApiKey(apiKey) + .build(); + } +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderService.java new file mode 100644 index 0000000000..851acf2034 --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/llmproviders/LlmProviderService.java @@ -0,0 +1,26 @@ +package com.comet.opik.domain.llmproviders; + +import dev.ai4j.openai4j.chat.ChatCompletionRequest; +import dev.ai4j.openai4j.chat.ChatCompletionResponse; +import io.dropwizard.jersey.errors.ErrorMessage; +import lombok.NonNull; + +import java.util.Optional; +import java.util.function.Consumer; + +public interface LlmProviderService { + ChatCompletionResponse generate( + @NonNull ChatCompletionRequest request, + @NonNull String workspaceId); + + void generateStream( + @NonNull ChatCompletionRequest request, + @NonNull String workspaceId, + @NonNull Consumer handleMessage, + @NonNull Runnable handleClose, + @NonNull Consumer handleError); + + void validateRequest(@NonNull ChatCompletionRequest request); + + @NonNull Optional getLlmProviderError(Throwable runtimeException); +} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/EncryptionUtils.java b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/EncryptionUtils.java index 243323f31d..1beb757994 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/EncryptionUtils.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/EncryptionUtils.java @@ -1,6 +1,7 @@ package com.comet.opik.infrastructure; import lombok.NonNull; +import lombok.experimental.UtilityClass; import org.apache.commons.lang3.StringUtils; import javax.crypto.BadPaddingException; @@ -15,6 +16,7 @@ import java.security.NoSuchAlgorithmException; import java.util.Base64; +@UtilityClass public class EncryptionUtils { private static final String ALGO = "AES"; diff --git a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/LlmProviderClientConfig.java b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/LlmProviderClientConfig.java index 584fce52b2..ef2d204c7f 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/LlmProviderClientConfig.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/LlmProviderClientConfig.java @@ -15,6 +15,9 @@ public class LlmProviderClientConfig { public record OpenAiClientConfig(String url) { } + public record AnthropicClientConfig(String url, String version) { + } + @Min(1) private Integer maxAttempts; @@ -37,6 +40,13 @@ public record OpenAiClientConfig(String url) { @MinDuration(value = 1, unit = TimeUnit.MILLISECONDS) private Duration writeTimeout; + private Boolean logRequests; + + private Boolean logResponses; + @Valid private LlmProviderClientConfig.OpenAiClientConfig openAiClient; + + @Valid + private LlmProviderClientConfig.AnthropicClientConfig anthropicClient; } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/redis/RedissonLockService.java b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/redis/RedissonLockService.java index 1486674c6b..616bc9b6cc 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/redis/RedissonLockService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/infrastructure/redis/RedissonLockService.java @@ -34,7 +34,7 @@ public void release() { semaphore.release(locked) .subscribe( __ -> log.debug("Lock '{}' released successfully", locked), - __ -> log.warn("Lock '{}' already released", locked)); + __ -> log.warn("Lock already released or doesn't exist")); } } diff --git a/apps/opik-backend/src/main/java/com/comet/opik/utils/ChunkedOutputHandlers.java b/apps/opik-backend/src/main/java/com/comet/opik/utils/ChunkedOutputHandlers.java new file mode 100644 index 0000000000..e246719b60 --- /dev/null +++ b/apps/opik-backend/src/main/java/com/comet/opik/utils/ChunkedOutputHandlers.java @@ -0,0 +1,45 @@ +package com.comet.opik.utils; + +import io.dropwizard.jersey.errors.ErrorMessage; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.glassfish.jersey.server.ChunkedOutput; + +import java.io.IOException; +import java.io.UncheckedIOException; + +@Slf4j +@RequiredArgsConstructor +public class ChunkedOutputHandlers { + private final @NonNull ChunkedOutput chunkedOutput; + + public void handleMessage(@NonNull Object item) { + if (chunkedOutput.isClosed()) { + log.warn("Output stream is already closed"); + return; + } + try { + chunkedOutput.write(JsonUtils.writeValueAsString(item)); + } catch (IOException ioException) { + throw new UncheckedIOException(ioException); + } + } + + public void handleClose() { + try { + chunkedOutput.close(); + } catch (IOException ioException) { + log.error("Failed to close output stream", ioException); + } + } + + public void handleError(@NonNull ErrorMessage errorMessage) { + try { + handleMessage(errorMessage); + } catch (UncheckedIOException uncheckedIOException) { + log.error("Failed to stream error message to client", uncheckedIOException); + } + handleClose(); + } +} diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/AssertionUtils.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/AssertionUtils.java index 36335401c1..888e47ea1d 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/AssertionUtils.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/AssertionUtils.java @@ -1,5 +1,6 @@ package com.comet.opik.api.resources.utils; +import com.comet.opik.api.ExperimentItem; import com.comet.opik.api.FeedbackScoreNames; import lombok.experimental.UtilityClass; @@ -18,4 +19,28 @@ public static void assertFeedbackScoreNames(FeedbackScoreNames actual, List { + assertThat(feedbackScore.createdBy()).isEqualTo(user); + assertThat(feedbackScore.lastUpdatedBy()).isEqualTo(user); + assertThat(feedbackScore.createdAt()).isNotNull(); + assertThat(feedbackScore.lastUpdatedAt()).isNotNull(); + }); + + return actualExperimentItem.toBuilder() + .feedbackScores(actualExperimentItem.feedbackScores().stream().map( + feedbackScore -> feedbackScore.toBuilder() + .createdBy(null) + .lastUpdatedBy(null) + .createdAt(null) + .lastUpdatedAt(null) + .build()) + .toList()) + .build(); + } } diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestDropwizardAppExtensionUtils.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestDropwizardAppExtensionUtils.java index cc2008a5ef..85112e762b 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestDropwizardAppExtensionUtils.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestDropwizardAppExtensionUtils.java @@ -2,7 +2,6 @@ import com.comet.opik.OpikApplication; import com.comet.opik.infrastructure.DatabaseAnalyticsFactory; -import com.comet.opik.infrastructure.auth.TestHttpClientUtils; import com.comet.opik.infrastructure.events.EventModule; import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; import com.google.common.eventbus.EventBus; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/TestHttpClientUtils.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestHttpClientUtils.java similarity index 92% rename from apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/TestHttpClientUtils.java rename to apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestHttpClientUtils.java index 8bb9a5238d..76c57c1fc3 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/TestHttpClientUtils.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/TestHttpClientUtils.java @@ -1,5 +1,6 @@ -package com.comet.opik.infrastructure.auth; +package com.comet.opik.api.resources.utils; +import com.comet.opik.infrastructure.auth.AuthModule; import jakarta.ws.rs.client.Client; import jakarta.ws.rs.client.ClientBuilder; import lombok.experimental.UtilityClass; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/AutomationRuleEvaluatorResourceClient.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/AutomationRuleEvaluatorResourceClient.java new file mode 100644 index 0000000000..185dc2399f --- /dev/null +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/AutomationRuleEvaluatorResourceClient.java @@ -0,0 +1,71 @@ +package com.comet.opik.api.resources.utils.resources; + +import com.comet.opik.api.AutomationRuleEvaluator; +import com.comet.opik.api.AutomationRuleEvaluatorUpdate; +import com.comet.opik.api.resources.utils.TestHttpClientUtils; +import com.comet.opik.api.resources.utils.TestUtils; +import jakarta.ws.rs.HttpMethod; +import jakarta.ws.rs.client.Entity; +import jakarta.ws.rs.core.HttpHeaders; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; +import lombok.RequiredArgsConstructor; +import org.apache.http.HttpStatus; +import ru.vyarus.dropwizard.guice.test.ClientSupport; + +import java.util.UUID; + +import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; +import static org.assertj.core.api.Assertions.assertThat; + +@RequiredArgsConstructor +public class AutomationRuleEvaluatorResourceClient { + + private static final String RESOURCE_PATH = "%s/v1/private/automations/projects/%s/evaluators/"; + + private final ClientSupport client; + private final String baseURI; + + public UUID createEvaluator(AutomationRuleEvaluator> evaluator, String workspaceName, String apiKey) { + try (var actualResponse = createEvaluator(evaluator, workspaceName, apiKey, HttpStatus.SC_CREATED)) { + assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(201); + + return TestUtils.getIdFromLocation(actualResponse.getLocation()); + } + } + + public Response createEvaluator(AutomationRuleEvaluator> evaluator, String workspaceName, String apiKey, + int expectedStatus) { + var actualResponse = client.target(RESOURCE_PATH.formatted(baseURI, evaluator.getProjectId())) + .request() + .accept(MediaType.APPLICATION_JSON_TYPE) + .header(HttpHeaders.AUTHORIZATION, apiKey) + .header(WORKSPACE_HEADER, workspaceName) + .post(Entity.json(evaluator)); + + assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(expectedStatus); + + return actualResponse; + } + + public void updateEvaluator(UUID evaluatorId, UUID projectId, String workspaceName, + AutomationRuleEvaluatorUpdate updatedEvaluator, String apiKey, boolean isAuthorized) { + try (var actualResponse = client.target(RESOURCE_PATH.formatted(baseURI, projectId)) + .path(evaluatorId.toString()) + .request() + .header(HttpHeaders.AUTHORIZATION, apiKey) + .accept(MediaType.APPLICATION_JSON_TYPE) + .header(WORKSPACE_HEADER, workspaceName) + .method(HttpMethod.PATCH, Entity.json(updatedEvaluator))) { + + if (isAuthorized) { + assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(204); + assertThat(actualResponse.hasEntity()).isFalse(); + } else { + assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(401); + assertThat(actualResponse.readEntity(io.dropwizard.jersey.errors.ErrorMessage.class)) + .isEqualTo(TestHttpClientUtils.UNAUTHORIZED_RESPONSE); + } + } + } +} diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/ChatCompletionsClient.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/ChatCompletionsClient.java index c1f435ea64..01a4c4cb79 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/ChatCompletionsClient.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/utils/resources/ChatCompletionsClient.java @@ -91,8 +91,8 @@ public List createAndStream( } } - public List createAndStreamError( - String apiKey, String workspaceName, ChatCompletionRequest request) { + public ErrorMessage createAndStreamError( + String apiKey, String workspaceName, ChatCompletionRequest request, int expectedStatusCode) { assertThat(request.stream()).isTrue(); try (var response = clientSupport.target(getCreateUrl()) @@ -102,9 +102,9 @@ public List createAndStreamError( .header(RequestContext.WORKSPACE_HEADER, workspaceName) .post(Entity.json(request))) { - assertThat(response.getStatusInfo().getStatusCode()).isEqualTo(HttpStatus.SC_OK); + assertThat(response.getStatusInfo().getStatusCode()).isEqualTo(expectedStatusCode); - return getStreamedError(response); + return response.readEntity(ErrorMessage.class); } } diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRenderTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRenderTest.java new file mode 100644 index 0000000000..6afcd44fe7 --- /dev/null +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/LlmAsJudgeMessageRenderTest.java @@ -0,0 +1,142 @@ +package com.comet.opik.api.resources.v1.events; + +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; +import com.comet.opik.api.Trace; +import com.comet.opik.domain.AutomationRuleEvaluatorService; +import com.comet.opik.domain.ChatCompletionService; +import com.comet.opik.domain.FeedbackScoreService; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.eventbus.EventBus; +import dev.ai4j.openai4j.chat.Role; +import dev.ai4j.openai4j.chat.UserMessage; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import static org.assertj.core.api.Assertions.assertThat; + +@Slf4j +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("LlmAsJudge Message Render") +public class LlmAsJudgeMessageRenderTest { + @Mock + AutomationRuleEvaluatorService ruleEvaluatorService; + @Mock + ChatCompletionService aiProxyService; + @Mock + FeedbackScoreService feedbackScoreService; + @Mock + EventBus eventBus; + OnlineScoringEventListener onlineScoringEventListener; + + AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode evaluatorCode; + Trace trace; + + String messageToTest = "Summary: {{summary}}\\nInstruction: {{instruction}}\\n\\n"; + String testEvaluator = """ + { + "model": { + "name": "gpt-4o", + "temperature": 0.3 + }, + "messages": [ + { + "role": "USER", + "content": "%s" + }, + { + "role": "SYSTEM", + "content": "You're a helpful AI, be cordial." + } + ], + "variables": { + "summary": "input.questions.question1", + "instruction": "output.output", + "nonUsed": "input.questions.question2", + "toFail1": "metadata.nonexistent.path" + }, + "schema": [ + { "name": "Relevance", "type": "INTEGER", "description": "Relevance of the summary" }, + { "name": "Conciseness", "type": "DOUBLE", "description": "Conciseness of the summary" }, + { "name": "Technical Accuracy", "type": "BOOLEAN", "description": "Technical accuracy of the summary" } + ] + } + """ + .formatted(messageToTest).trim(); + String summaryStr = "What was the approach to experimenting with different data mixtures?"; + String outputStr = "The study employed a systematic approach to experiment with varying data mixtures by manipulating the proportions and sources of datasets used for model training."; + String input = """ + { + "questions": { + "question1": "%s", + "question2": "Whatever, we wont use it anyway" + }, + "pdf_url": "https://arxiv.org/pdf/2406.04744", + "title": "CRAG -- Comprehensive RAG Benchmark" + } + """.formatted(summaryStr).trim(); + String output = """ + { + "output": "%s" + } + """.formatted(outputStr).trim(); + + @BeforeEach + void setUp() throws JsonProcessingException { + MockitoAnnotations.openMocks(this); + Mockito.doNothing().when(eventBus).register(Mockito.any()); + onlineScoringEventListener = new OnlineScoringEventListener(eventBus, ruleEvaluatorService, + aiProxyService, feedbackScoreService); + + var mapper = new ObjectMapper(); + evaluatorCode = mapper.readValue(testEvaluator, AutomationRuleEvaluatorLlmAsJudge.LlmAsJudgeCode.class); + trace = Trace.builder().input(mapper.readTree(input)).output(mapper.readTree(output)).build(); + } + + @Test + @DisplayName("parse variable mapping into a usable one") + void when__parseRuleVariables() { + var variableMappings = LlmAsJudgeMessageRender.variableMapping(evaluatorCode.variables()); + + assertThat(variableMappings).hasSize(4); + + var varSummary = variableMappings.get(0); + assertThat(varSummary.traceSection()).isEqualTo(LlmAsJudgeMessageRender.TraceSection.INPUT); + assertThat(varSummary.jsonPath()).isEqualTo("$.questions.question1"); + + var varInstruction = variableMappings.get(1); + assertThat(varInstruction.traceSection()).isEqualTo(LlmAsJudgeMessageRender.TraceSection.OUTPUT); + assertThat(varInstruction.jsonPath()).isEqualTo("$.output"); + + var varNonUsed = variableMappings.get(2); + assertThat(varNonUsed.traceSection()).isEqualTo(LlmAsJudgeMessageRender.TraceSection.INPUT); + assertThat(varNonUsed.jsonPath()).isEqualTo("$.questions.question2"); + + var varToFail = variableMappings.get(3); + assertThat(varToFail.traceSection()).isEqualTo(LlmAsJudgeMessageRender.TraceSection.METADATA); + assertThat(varToFail.jsonPath()).isEqualTo("$.nonexistent.path"); + } + + @Test + @DisplayName("render message templates with a trace") + void when__renderTemplate() { + var renderedMessages = LlmAsJudgeMessageRender.renderMessages(trace, evaluatorCode); + + assertThat(renderedMessages).hasSize(2); + + var userMessage = (UserMessage) renderedMessages.get(0); + assertThat(userMessage.role()).isEqualTo(Role.USER); + assertThat(userMessage.content().toString()).contains(summaryStr); + assertThat(userMessage.content().toString()).contains(outputStr); + + var systemMessage = renderedMessages.get(1); + assertThat(systemMessage.role()).isEqualTo(Role.SYSTEM); + } + +} diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListenerTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListenerTest.java new file mode 100644 index 0000000000..ad416cc812 --- /dev/null +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/events/OnlineScoringEventListenerTest.java @@ -0,0 +1,139 @@ +package com.comet.opik.api.resources.v1.events; + +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; +import com.comet.opik.api.Trace; +import com.comet.opik.api.resources.utils.AuthTestUtils; +import com.comet.opik.api.resources.utils.ClickHouseContainerUtils; +import com.comet.opik.api.resources.utils.ClientSupportUtils; +import com.comet.opik.api.resources.utils.MigrationUtils; +import com.comet.opik.api.resources.utils.MySQLContainerUtils; +import com.comet.opik.api.resources.utils.RedisContainerUtils; +import com.comet.opik.api.resources.utils.TestDropwizardAppExtensionUtils; +import com.comet.opik.api.resources.utils.WireMockUtils; +import com.comet.opik.api.resources.utils.resources.AutomationRuleEvaluatorResourceClient; +import com.comet.opik.api.resources.utils.resources.ProjectResourceClient; +import com.comet.opik.api.resources.utils.resources.TraceResourceClient; +import com.comet.opik.infrastructure.DatabaseAnalyticsFactory; +import com.comet.opik.podam.PodamFactoryUtils; +import com.redis.testcontainers.RedisContainer; +import lombok.extern.slf4j.Slf4j; +import org.jdbi.v3.core.Jdbi; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.testcontainers.clickhouse.ClickHouseContainer; +import org.testcontainers.containers.MySQLContainer; +import org.testcontainers.lifecycle.Startables; +import ru.vyarus.dropwizard.guice.test.ClientSupport; +import ru.vyarus.dropwizard.guice.test.jupiter.ext.TestDropwizardAppExtension; +import uk.co.jemos.podam.api.PodamFactory; + +import java.util.UUID; + +import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; +import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; + +@Slf4j +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@DisplayName("Online Scoring Event Listener") +public class OnlineScoringEventListenerTest { + + private static final String API_KEY = UUID.randomUUID().toString(); + private static final String USER = UUID.randomUUID().toString(); + private static final String WORKSPACE_ID = UUID.randomUUID().toString(); + private static final String TEST_WORKSPACE = UUID.randomUUID().toString(); + + private static final RedisContainer REDIS = RedisContainerUtils.newRedisContainer(); + + private static final MySQLContainer> MYSQL = MySQLContainerUtils.newMySQLContainer(); + + private static final ClickHouseContainer CLICKHOUSE = ClickHouseContainerUtils.newClickHouseContainer(); + + @RegisterExtension + private static final TestDropwizardAppExtension app; + + private static final WireMockUtils.WireMockRuntime wireMock; + + static { + Startables.deepStart(MYSQL, CLICKHOUSE, REDIS).join(); + + wireMock = WireMockUtils.startWireMock(); + + DatabaseAnalyticsFactory databaseAnalyticsFactory = ClickHouseContainerUtils + .newDatabaseAnalyticsFactory(CLICKHOUSE, DATABASE_NAME); + + app = TestDropwizardAppExtensionUtils.newTestDropwizardAppExtension( + MYSQL.getJdbcUrl(), databaseAnalyticsFactory, wireMock.runtimeInfo(), REDIS.getRedisURI()); + } + + private final PodamFactory factory = PodamFactoryUtils.newPodamFactory(); + + private String baseURI; + private ClientSupport client; + private TraceResourceClient traceResourceClient; + private AutomationRuleEvaluatorResourceClient evaluatorResourceClient; + private ProjectResourceClient projectResourceClient; + + @BeforeAll + void setUpAll(ClientSupport client, Jdbi jdbi) throws Exception { + + MigrationUtils.runDbMigration(jdbi, MySQLContainerUtils.migrationParameters()); + + try (var connection = CLICKHOUSE.createConnection("")) { + MigrationUtils.runDbMigration(connection, CLICKHOUSE_CHANGELOG_FILE, + ClickHouseContainerUtils.migrationParameters()); + } + + this.baseURI = "http://localhost:%d".formatted(client.getPort()); + this.client = client; + + ClientSupportUtils.config(client); + + mockTargetWorkspace(API_KEY, TEST_WORKSPACE, WORKSPACE_ID); + + this.traceResourceClient = new TraceResourceClient(this.client, baseURI); + this.evaluatorResourceClient = new AutomationRuleEvaluatorResourceClient(this.client, baseURI); + this.projectResourceClient = new ProjectResourceClient(this.client, baseURI, factory); + } + + @AfterAll + void tearDownAll() { + wireMock.server().stop(); + } + + private static void mockTargetWorkspace(String apiKey, String workspaceName, String workspaceId) { + AuthTestUtils.mockTargetWorkspace(wireMock.server(), apiKey, workspaceName, workspaceId, USER); + } + + @Nested + @TestInstance(TestInstance.Lifecycle.PER_CLASS) + class TracesCreatedEvent { + + @Test + @DisplayName("when a new trace is created, OnlineScoring should see it within a event") + void when__newTracesIsCreated__onlineScoringShouldKnow() { + var projectName = factory.manufacturePojo(String.class); + var projectId = projectResourceClient.createProject(projectName, API_KEY, TEST_WORKSPACE); + + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) + .toBuilder().projectId(projectId).build(); + + evaluatorResourceClient.createEvaluator(evaluator, TEST_WORKSPACE, API_KEY); + + var trace = factory.manufacturePojo(Trace.class).toBuilder() + .projectName(projectName) + .build(); + + UUID traceId = traceResourceClient.createTrace(trace, API_KEY, TEST_WORKSPACE); + + Trace returnTrace = traceResourceClient.getById(traceId, TEST_WORKSPACE, API_KEY); + + // TODO: run the actual test checking for if we have a FeedbackScore by the end. Prob mocking AI Proxy. + } + } + +} diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/internal/UsageResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/internal/UsageResourceTest.java index 77405234e2..7ecdf97bf8 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/internal/UsageResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/internal/UsageResourceTest.java @@ -55,7 +55,7 @@ @DisplayName("Usage Resource Test") @TestInstance(TestInstance.Lifecycle.PER_CLASS) @Slf4j -public class UsageResourceTest { +class UsageResourceTest { public static final String USAGE_RESOURCE_URL_TEMPLATE = "%s/v1/internal/usage"; public static final String TRACE_RESOURCE_URL_TEMPLATE = "%s/v1/private/traces"; private static final String EXPERIMENT_RESOURCE_URL_TEMPLATE = "%s/v1/private/experiments"; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResourceTest.java index 66399bb4f6..be4d87cd8c 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/AutomationRuleEvaluatorsResourceTest.java @@ -1,17 +1,17 @@ package com.comet.opik.api.resources.v1.priv; import com.comet.opik.api.AutomationRuleEvaluator; +import com.comet.opik.api.AutomationRuleEvaluatorLlmAsJudge; import com.comet.opik.api.AutomationRuleEvaluatorUpdate; import com.comet.opik.api.BatchDelete; -import com.comet.opik.api.FeedbackDefinition; import com.comet.opik.api.resources.utils.AuthTestUtils; import com.comet.opik.api.resources.utils.ClientSupportUtils; import com.comet.opik.api.resources.utils.MigrationUtils; import com.comet.opik.api.resources.utils.MySQLContainerUtils; import com.comet.opik.api.resources.utils.RedisContainerUtils; import com.comet.opik.api.resources.utils.TestDropwizardAppExtensionUtils; -import com.comet.opik.api.resources.utils.TestUtils; import com.comet.opik.api.resources.utils.WireMockUtils; +import com.comet.opik.api.resources.utils.resources.AutomationRuleEvaluatorResourceClient; import com.comet.opik.podam.PodamFactoryUtils; import com.github.tomakehurst.wiremock.client.WireMock; import com.redis.testcontainers.RedisContainer; @@ -44,9 +44,9 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; @@ -89,6 +89,7 @@ class AutomationRuleEvaluatorsResourceTest { private String baseURI; private ClientSupport client; + private AutomationRuleEvaluatorResourceClient evaluatorsResourceClient; @BeforeAll void setUpAll(ClientSupport client, Jdbi jdbi) { @@ -101,6 +102,8 @@ void setUpAll(ClientSupport client, Jdbi jdbi) { ClientSupportUtils.config(client); mockTargetWorkspace(API_KEY, TEST_WORKSPACE, WORKSPACE_ID); + + this.evaluatorsResourceClient = new AutomationRuleEvaluatorResourceClient(this.client, baseURI); } private static void mockTargetWorkspace(String apiKey, String workspaceName, String workspaceId) { @@ -112,20 +115,6 @@ void tearDownAll() { wireMock.server().stop(); } - private UUID create(AutomationRuleEvaluator> evaluator, String apiKey, String workspaceName) { - try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, evaluator.getProjectId())) - .request() - .accept(MediaType.APPLICATION_JSON_TYPE) - .header(HttpHeaders.AUTHORIZATION, apiKey) - .header(WORKSPACE_HEADER, workspaceName) - .post(Entity.json(evaluator))) { - - assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(201); - - return TestUtils.getIdFromLocation(actualResponse.getLocation()); - } - } - @Nested @DisplayName("Api Key Authentication:") @TestInstance(TestInstance.Lifecycle.PER_CLASS) @@ -161,9 +150,10 @@ void setUp() { @MethodSource("credentials") @DisplayName("create evaluator definition: when api key is present, then return proper response") void createAutomationRuleEvaluator__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { - var ruleEvaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var ruleEvaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build(); mockTargetWorkspace(okApikey, TEST_WORKSPACE, WORKSPACE_ID); @@ -189,7 +179,7 @@ void createAutomationRuleEvaluator__whenApiKeyIsPresent__thenReturnProperRespons @MethodSource("credentials") @DisplayName("get evaluators by project id: when api key is present, then return proper response") void getProjectAutomationRuleEvaluators__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { final String workspaceName = UUID.randomUUID().toString(); final String workspaceId = UUID.randomUUID().toString(); @@ -200,9 +190,10 @@ void getProjectAutomationRuleEvaluators__whenApiKeyIsPresent__thenReturnProperRe int samplesToCreate = 15; IntStream.range(0, samplesToCreate).forEach(i -> { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class) + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) .toBuilder().id(null).projectId(projectId).build(); - create(evaluator, okApikey, workspaceName); + + evaluatorsResourceClient.createEvaluator(evaluator, workspaceName, okApikey); }); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, projectId)) @@ -242,13 +233,13 @@ void find__whenSearchingByName__thenReturnEvaluators() { mockTargetWorkspace(apiKey, workspaceName, workspaceId); var name = "Evaluator Name: " + UUID.randomUUID(); - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class) + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) .toBuilder().id(null) .projectId(projectId) .name(name) .build(); - create(evaluator, apiKey, workspaceName); + evaluatorsResourceClient.createEvaluator(evaluator, workspaceName, apiKey); var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, projectId)) .queryParam("name", "aluator") @@ -264,7 +255,7 @@ void find__whenSearchingByName__thenReturnEvaluators() { assertThat(actualEntity.size()).isEqualTo(1); assertThat(actualEntity.total()).isEqualTo(1); - List content = actualEntity.content(); + List content = actualEntity.content(); assertThat(content.stream().map(AutomationRuleEvaluator::getName).toList()).contains(name); } @@ -272,16 +263,17 @@ void find__whenSearchingByName__thenReturnEvaluators() { @MethodSource("credentials") @DisplayName("get evaluator by id: when api key is present, then return proper response") void getAutomationRuleEvaluatorById__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) + .toBuilder().id(null).build(); String workspaceName = UUID.randomUUID().toString(); String workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(okApikey, workspaceName, workspaceId); - UUID id = create(evaluator, okApikey, workspaceName); + UUID id = evaluatorsResourceClient.createEvaluator(evaluator, workspaceName, okApikey); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, evaluator.getProjectId())) .path(id.toString()) @@ -309,52 +301,39 @@ void getAutomationRuleEvaluatorById__whenApiKeyIsPresent__thenReturnProperRespon @MethodSource("credentials") @DisplayName("update evaluator: when api key is present, then return proper response") void updateAutomationRuleEvaluator__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build(); String workspaceName = UUID.randomUUID().toString(); String workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(okApikey, workspaceName, workspaceId); - UUID id = create(evaluator, okApikey, workspaceName); + UUID id = evaluatorsResourceClient.createEvaluator(evaluator, workspaceName, okApikey); var updatedEvaluator = factory.manufacturePojo(AutomationRuleEvaluatorUpdate.class); - try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, evaluator.getProjectId())) - .path(id.toString()) - .request() - .header(HttpHeaders.AUTHORIZATION, apiKey) - .accept(MediaType.APPLICATION_JSON_TYPE) - .header(WORKSPACE_HEADER, workspaceName) - .method(HttpMethod.PATCH, Entity.json(updatedEvaluator))) { - - if (isAuthorized) { - assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(204); - assertThat(actualResponse.hasEntity()).isFalse(); - } else { - assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(401); - assertThat(actualResponse.readEntity(io.dropwizard.jersey.errors.ErrorMessage.class)) - .isEqualTo(UNAUTHORIZED_RESPONSE); - } - } + evaluatorsResourceClient.updateEvaluator(id, evaluator.getProjectId(), workspaceName, updatedEvaluator, + apiKey, isAuthorized); } @ParameterizedTest @MethodSource("credentials") @DisplayName("delete evaluator by id: when api key is present, then return proper response") void deleteAutomationRuleEvaluator__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build();; + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build();; String workspaceName = UUID.randomUUID().toString(); String workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(okApikey, workspaceName, workspaceId); - UUID id = create(evaluator, okApikey, workspaceName); + UUID id = evaluatorsResourceClient.createEvaluator(evaluator, workspaceName, okApikey); var deleteMethod = BatchDelete.builder().ids(Collections.singleton(id)).build(); @@ -381,21 +360,24 @@ void deleteAutomationRuleEvaluator__whenApiKeyIsPresent__thenReturnProperRespons @MethodSource("credentials") @DisplayName("batch delete evaluators by id: when api key is present, then return proper response") void deleteProjectAutomationRuleEvaluators__whenApiKeyIsPresent__thenReturnProperResponse(String apiKey, - boolean isAuthorized) { + boolean isAuthorized) { var projectId = UUID.randomUUID(); var workspaceName = UUID.randomUUID().toString(); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(okApikey, workspaceName, workspaceId); - var evaluator1 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - var evalId1 = create(evaluator1, okApikey, workspaceName); + var evaluator1 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) + .toBuilder().id(null).projectId(projectId).build(); + var evalId1 = evaluatorsResourceClient.createEvaluator(evaluator1, workspaceName, okApikey); - var evaluator2 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - var evalId2 = create(evaluator2, okApikey, workspaceName); + var evaluator2 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) + .toBuilder().id(null).projectId(projectId).build(); + var evalId2 = evaluatorsResourceClient.createEvaluator(evaluator2, workspaceName, okApikey); - var evaluator3 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - create(evaluator3, okApikey, workspaceName); + var evaluator3 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) + .toBuilder().id(null).projectId(projectId).build(); + evaluatorsResourceClient.createEvaluator(evaluator3, workspaceName, okApikey); var evalIds1and2 = Set.of(evalId1, evalId2); var deleteMethod = BatchDelete.builder().ids(evalIds1and2).build(); @@ -479,10 +461,11 @@ void setUp() { @MethodSource("credentials") @DisplayName("create evaluator definition: when api key is present, then return proper response") void createAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + boolean isAuthorized, + String workspaceName) { - var ruleEvaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var ruleEvaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build(); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, ruleEvaluator.getProjectId())) .request() @@ -505,9 +488,10 @@ void createAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperR @ParameterizedTest @MethodSource("credentials") @DisplayName("get evaluators by project id: when api key is present, then return proper response") - void getProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + void getProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnProperResponse( + String sessionToken, + boolean isAuthorized, + String workspaceName) { var projectId = UUID.randomUUID(); @@ -521,11 +505,10 @@ void getProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnPr .withRequestBody(matchingJsonPath("$.workspaceName", equalTo(newWorkspaceName))) .willReturn(okJson(AuthTestUtils.newWorkspaceAuthResponse(USER, newWorkspaceId)))); - IntStream.range(0, samplesToCreate).forEach(i -> { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class) + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class) .toBuilder().id(null).projectId(projectId).build(); - create(evaluator, API_KEY, TEST_WORKSPACE); + evaluatorsResourceClient.createEvaluator(evaluator, TEST_WORKSPACE, API_KEY); }); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, projectId)) @@ -557,12 +540,13 @@ void getProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnPr @MethodSource("credentials") @DisplayName("get evaluator by id: when api key is present, then return proper response") void getAutomationRuleEvaluatorById__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + boolean isAuthorized, + String workspaceName) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build(); - UUID id = create(evaluator, API_KEY, TEST_WORKSPACE); + UUID id = evaluatorsResourceClient.createEvaluator(evaluator, TEST_WORKSPACE, API_KEY); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, evaluator.getProjectId())) .path(id.toString()) @@ -590,12 +574,13 @@ void getAutomationRuleEvaluatorById__whenSessionTokenIsPresent__thenReturnProper @MethodSource("credentials") @DisplayName("update evaluator: when api key is present, then return proper response") void updateAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + boolean isAuthorized, + String workspaceName) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build(); + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build(); - UUID id = create(evaluator, API_KEY, TEST_WORKSPACE); + UUID id = evaluatorsResourceClient.createEvaluator(evaluator, TEST_WORKSPACE, API_KEY); var updatedEvaluator = factory.manufacturePojo(AutomationRuleEvaluatorUpdate.class); @@ -622,12 +607,13 @@ void updateAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperR @MethodSource("credentials") @DisplayName("delete evaluator by id: when api key is present, then return proper response") void deleteAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + boolean isAuthorized, + String workspaceName) { - var evaluator = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null).build();; + var evaluator = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().id(null) + .build();; - var id = create(evaluator, API_KEY, TEST_WORKSPACE); + var id = evaluatorsResourceClient.createEvaluator(evaluator, TEST_WORKSPACE, API_KEY); var deleteMethod = BatchDelete.builder().ids(Collections.singleton(id)).build(); try (var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI, evaluator.getProjectId())) @@ -652,20 +638,24 @@ void deleteAutomationRuleEvaluator__whenSessionTokenIsPresent__thenReturnProperR @ParameterizedTest @MethodSource("credentials") @DisplayName("batch delete evaluators by id: when api key is present, then return proper response") - void deleteProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnProperResponse(String sessionToken, - boolean isAuthorized, - String workspaceName) { + void deleteProjectAutomationRuleEvaluators__whenSessionTokenIsPresent__thenReturnProperResponse( + String sessionToken, + boolean isAuthorized, + String workspaceName) { var projectId = UUID.randomUUID(); - var evaluator1 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - var evalId1 = create(evaluator1, API_KEY, TEST_WORKSPACE); + var evaluator1 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder() + .projectId(projectId).build(); + var evalId1 = evaluatorsResourceClient.createEvaluator(evaluator1, TEST_WORKSPACE, API_KEY); - var evaluator2 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - var evalId2 = create(evaluator2, API_KEY, TEST_WORKSPACE); + var evaluator2 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder() + .projectId(projectId).build(); + var evalId2 = evaluatorsResourceClient.createEvaluator(evaluator2, TEST_WORKSPACE, API_KEY); - var evaluator3 = factory.manufacturePojo(AutomationRuleEvaluator.AutomationRuleEvaluatorLlmAsJudge.class).toBuilder().projectId(projectId).build(); - create(evaluator3, API_KEY, TEST_WORKSPACE); + var evaluator3 = factory.manufacturePojo(AutomationRuleEvaluatorLlmAsJudge.class).toBuilder() + .projectId(projectId).build(); + evaluatorsResourceClient.createEvaluator(evaluator3, TEST_WORKSPACE, API_KEY); var evalIds1and2 = Set.of(evalId1, evalId2); var deleteMethod = BatchDelete.builder().ids(evalIds1and2).build(); diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResourceTest.java index 17cfdf6cc8..10b64ac5a3 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ChatCompletionsResourceTest.java @@ -16,13 +16,17 @@ import dev.ai4j.openai4j.chat.ChatCompletionModel; import dev.ai4j.openai4j.chat.ChatCompletionRequest; import dev.ai4j.openai4j.chat.Role; +import dev.langchain4j.model.anthropic.AnthropicChatModelName; import org.apache.http.HttpStatus; import org.jdbi.v3.core.Jdbi; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.testcontainers.clickhouse.ClickHouseContainer; import org.testcontainers.containers.MySQLContainer; import org.testcontainers.lifecycle.Startables; @@ -33,11 +37,23 @@ import java.sql.SQLException; import java.util.UUID; +import java.util.stream.Stream; +import static com.comet.opik.domain.ChatCompletionService.ERROR_EMPTY_MESSAGES; +import static com.comet.opik.domain.ChatCompletionService.ERROR_NO_COMPLETION_TOKENS; +import static com.comet.opik.domain.llmproviders.LlmProviderFactory.ERROR_MODEL_NOT_SUPPORTED; import static org.assertj.core.api.Assertions.assertThat; - +import static org.assertj.core.api.Assumptions.assumeThat; +import static org.junit.jupiter.api.Named.named; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +/// For some providers, the tests need to make actual LLM calls. For that to work, the relevant API keys must be set in +/// the environment prior to running the tests. If an environment variable for a specific provider is not set, the +/// relevant test will be skipped for that provider. +/// - **Openai**: runs against a demo server and doesn't require an API key +/// - **Anthropic**: set `ANTHROPIC_API_KEY` to your anthropic api key @TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class ChatCompletionsResourceTest { +class ChatCompletionsResourceTest { private static final String API_KEY = RandomStringUtils.randomAlphanumeric(25); private static final String WORKSPACE_ID = UUID.randomUUID().toString(); @@ -97,18 +113,20 @@ private static void mockTargetWorkspace(String workspaceName, String workspaceId @Nested @TestInstance(TestInstance.Lifecycle.PER_CLASS) class Create { + @ParameterizedTest + @MethodSource("testModelsProvider") + void create(String expectedModel, LlmProvider llmProvider, String llmProviderApiKey) { + assumeThat(llmProviderApiKey).isNotEmpty(); - @Test - void create() { var workspaceName = RandomStringUtils.randomAlphanumeric(20); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(workspaceName, workspaceId); - createLlmProviderApiKey(workspaceName); - var expectedModel = ChatCompletionModel.GPT_4O_MINI.toString(); + createLlmProviderApiKey(workspaceName, llmProvider, llmProviderApiKey); var request = podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) .stream(false) .model(expectedModel) + .maxCompletionTokens(100) .addUserMessage("Say 'Hello World'") .build(); @@ -121,12 +139,12 @@ void create() { }); } - @Test - void createReturnsBadRequestWhenNoLlmProviderApiKey() { + @ParameterizedTest + @MethodSource("testModelsProvider") + void createReturnsBadRequestWhenNoLlmProviderApiKey(String expectedModel, LlmProvider llmProvider) { var workspaceName = RandomStringUtils.randomAlphanumeric(20); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(workspaceName, workspaceId); - var expectedModel = ChatCompletionModel.GPT_4O_MINI.toString(); var request = podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) .stream(false) @@ -139,11 +157,12 @@ void createReturnsBadRequestWhenNoLlmProviderApiKey() { assertThat(errorMessage.getCode()).isEqualTo(HttpStatus.SC_BAD_REQUEST); assertThat(errorMessage.getMessage()) .containsIgnoringCase("API key not configured for LLM provider '%s'" - .formatted(LlmProvider.OPEN_AI.getValue())); + .formatted(llmProvider.getValue())); } - @Test - void createReturnsBadRequestWhenNoModel() { + @ParameterizedTest + @ValueSource(strings = {"", "non-existing-model"}) + void createReturnsBadRequestWhenModelIsInvalid(String model) { var workspaceName = RandomStringUtils.randomAlphanumeric(20); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(workspaceName, workspaceId); @@ -151,6 +170,7 @@ void createReturnsBadRequestWhenNoModel() { var request = podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) .stream(false) + .model(model) .addUserMessage("Say 'Hello World'") .build(); @@ -158,20 +178,23 @@ void createReturnsBadRequestWhenNoModel() { assertThat(errorMessage.getCode()).isEqualTo(HttpStatus.SC_BAD_REQUEST); assertThat(errorMessage.getMessage()) - .containsIgnoringCase("Only %s model is available".formatted(ChatCompletionModel.GPT_4O_MINI)); + .containsIgnoringCase(ERROR_MODEL_NOT_SUPPORTED.formatted(model)); } - @Test - void createAndStreamResponse() { + @ParameterizedTest + @MethodSource("testModelsProvider") + void createAndStreamResponse(String expectedModel, LlmProvider llmProvider, String llmProviderApiKey) { + assumeThat(llmProviderApiKey).isNotEmpty(); + var workspaceName = RandomStringUtils.randomAlphanumeric(20); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(workspaceName, workspaceId); - createLlmProviderApiKey(workspaceName); - var expectedModel = ChatCompletionModel.GPT_4O_MINI.toString(); + createLlmProviderApiKey(workspaceName, llmProvider, llmProviderApiKey); var request = podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) .stream(true) .model(expectedModel) + .maxCompletionTokens(100) .addUserMessage("Say 'Hello World'") .build(); @@ -192,8 +215,17 @@ void createAndStreamResponse() { .isEqualTo(Role.ASSISTANT)); } - @Test - void createAndStreamResponseReturnsBadRequestWhenNoModel() { + private static Stream testModelsProvider() { + return Stream.of( + arguments(ChatCompletionModel.GPT_4O_MINI.toString(), LlmProvider.OPEN_AI, + UUID.randomUUID().toString()), + arguments(AnthropicChatModelName.CLAUDE_3_5_SONNET_20240620.toString(), LlmProvider.ANTHROPIC, + System.getenv("ANTHROPIC_API_KEY"))); + } + + @ParameterizedTest + @ValueSource(strings = {"", "non-existing-model"}) + void createAndStreamResponseReturnsBadRequestWhenNoModel(String model) { var workspaceName = RandomStringUtils.randomAlphanumeric(20); var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(workspaceName, workspaceId); @@ -201,22 +233,56 @@ void createAndStreamResponseReturnsBadRequestWhenNoModel() { var request = podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) .stream(true) + .model(model) .addUserMessage("Say 'Hello World'") .build(); - var errorMessages = chatCompletionsClient.createAndStreamError(API_KEY, workspaceName, request); + var errorMessage = chatCompletionsClient.createAndStreamError(API_KEY, workspaceName, request, + HttpStatus.SC_BAD_REQUEST); - assertThat(errorMessages).hasSize(1); - assertThat(errorMessages.getFirst().getCode()).isEqualTo(HttpStatus.SC_BAD_REQUEST); - assertThat(errorMessages.getFirst().getMessage()) - .containsIgnoringCase("Only %s model is available".formatted(ChatCompletionModel.GPT_4O_MINI)); + assertThat(errorMessage.getCode()).isEqualTo(HttpStatus.SC_BAD_REQUEST); + assertThat(errorMessage.getMessage()) + .containsIgnoringCase(ERROR_MODEL_NOT_SUPPORTED.formatted(model)); } + } + + @ParameterizedTest + @MethodSource + void createAnthropicValidateMandatoryFields(ChatCompletionRequest request, String expectedErrorMessage) { + String llmProviderApiKey = UUID.randomUUID().toString(); + + var workspaceName = RandomStringUtils.randomAlphanumeric(20); + var workspaceId = UUID.randomUUID().toString(); + mockTargetWorkspace(workspaceName, workspaceId); + createLlmProviderApiKey(workspaceName, LlmProvider.ANTHROPIC, llmProviderApiKey); + + var errorMessage = chatCompletionsClient.create(API_KEY, workspaceName, request, HttpStatus.SC_BAD_REQUEST); + assertThat(errorMessage.getCode()).isEqualTo(HttpStatus.SC_BAD_REQUEST); + assertThat(errorMessage.getMessage()) + .containsIgnoringCase(expectedErrorMessage); + } + + private Stream createAnthropicValidateMandatoryFields() { + return Stream.of( + arguments(named("no messages", podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) + .stream(false) + .model(AnthropicChatModelName.CLAUDE_3_5_SONNET_20240620.toString()) + .maxCompletionTokens(100).build()), + ERROR_EMPTY_MESSAGES), + arguments(named("no max tokens", podamFactory.manufacturePojo(ChatCompletionRequest.Builder.class) + .stream(false) + .model(AnthropicChatModelName.CLAUDE_3_5_SONNET_20240620.toString()) + .addUserMessage("Say 'Hello World'").build()), + ERROR_NO_COMPLETION_TOKENS)); } private void createLlmProviderApiKey(String workspaceName) { - var llmProviderApiKey = UUID.randomUUID().toString(); + createLlmProviderApiKey(workspaceName, LlmProvider.OPEN_AI, UUID.randomUUID().toString()); + } + + private void createLlmProviderApiKey(String workspaceName, LlmProvider llmProvider, String llmProviderApiKey) { llmProviderApiKeyResourceClient.createProviderApiKey( - llmProviderApiKey, LlmProvider.OPEN_AI, API_KEY, workspaceName, 201); + llmProviderApiKey, llmProvider, API_KEY, workspaceName, 201); } } diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java index 702c1af0c6..84746b490a 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java @@ -117,12 +117,13 @@ import static com.comet.opik.api.Column.ColumnType; import static com.comet.opik.api.DatasetItem.DatasetItemPage; +import static com.comet.opik.api.resources.utils.AssertionUtils.assertFeedbackScoresIgnoredFieldsAndSetThemToNull; import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.api.resources.utils.WireMockUtils.WireMockRuntime; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.infrastructure.db.TransactionTemplateAsync.WRITE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; @@ -3831,7 +3832,8 @@ void find() { .containsExactlyElementsOf(expectedExperimentItems); for (var j = 0; j < actualDatasetItem.experimentItems().size(); j++) { - var actualExperimentItem = actualDatasetItem.experimentItems().get(j); + var actualExperimentItem = assertFeedbackScoresIgnoredFieldsAndSetThemToNull( + actualDatasetItem.experimentItems().get(j), USER); var expectedExperimentItem = expectedExperimentItems.get(j); assertThat(actualExperimentItem.feedbackScores()) @@ -3901,7 +3903,7 @@ void findWithImageTruncation(JsonNode original, JsonNode expected, boolean trunc .traceId(traces.get(i).id()) .datasetItemId(datasetItemBatchWithImage.items().get(i).id()).build()) .toList(); - PodamFactoryUtils.manufacturePojoList(factory, ExperimentItem.class); + var experimentItemsBatch = ExperimentItemsBatch.builder() .experimentItems(Set.copyOf(experimentItems)).build(); @@ -4840,7 +4842,8 @@ private void assertDatasetItemExperiments(DatasetItemPage actualPage, List getStreamedItems(Response response) { return items; } - } diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java index ec5e4275b6..f80211fb0b 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java @@ -75,6 +75,7 @@ import org.testcontainers.clickhouse.ClickHouseContainer; import org.testcontainers.containers.MySQLContainer; import org.testcontainers.lifecycle.Startables; +import org.testcontainers.shaded.org.apache.commons.lang3.tuple.Pair; import ru.vyarus.dropwizard.guice.test.ClientSupport; import ru.vyarus.dropwizard.guice.test.jupiter.ext.TestDropwizardAppExtension; import uk.co.jemos.podam.api.PodamFactory; @@ -97,13 +98,14 @@ import java.util.stream.Stream; import static com.comet.opik.api.resources.utils.AssertionUtils.assertFeedbackScoreNames; +import static com.comet.opik.api.resources.utils.AssertionUtils.assertFeedbackScoresIgnoredFieldsAndSetThemToNull; import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; import static com.comet.opik.api.resources.utils.TestDropwizardAppExtensionUtils.AppContextConfig; import static com.comet.opik.api.resources.utils.TestDropwizardAppExtensionUtils.newTestDropwizardAppExtension; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.utils.ValidationUtils.SCALE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; @@ -2372,6 +2374,23 @@ void streamByExperimentName() { var workspaceId = UUID.randomUUID().toString(); mockTargetWorkspace(apiKey, workspaceName, workspaceId); + // Creating two traces with input, output and scores + var traceWithScores1 = createTraceWithScores(apiKey, workspaceName); + var traceWithScores2 = createTraceWithScores(apiKey, workspaceName); + + var traceIdToScoresMap = Stream + .concat(traceWithScores1.getRight().stream(), traceWithScores2.getRight().stream()) + .collect(groupingBy(FeedbackScoreBatchItem::id)); + + // When storing the scores in batch, adding some more unrelated random ones + var feedbackScoreBatch = podamFactory.manufacturePojo(FeedbackScoreBatch.class); + feedbackScoreBatch = feedbackScoreBatch.toBuilder() + .scores(Stream.concat(feedbackScoreBatch.scores().stream(), + traceIdToScoresMap.values().stream().flatMap(List::stream)).toList()) + .build(); + + createScoreAndAssert(feedbackScoreBatch, apiKey, workspaceName); + var experiment1 = generateExperiment(); createAndAssert(experiment1, apiKey, workspaceName); @@ -2387,13 +2406,15 @@ void streamByExperimentName() { createAndAssert(experiment3, apiKey, workspaceName); var experimentItems1 = PodamFactoryUtils.manufacturePojoList(podamFactory, ExperimentItem.class).stream() - .map(experimentItem -> experimentItem.toBuilder().experimentId(experiment1.id()).build()) + .map(experimentItem -> experimentItem.toBuilder().experimentId(experiment1.id()) + .traceId(traceWithScores1.getLeft().id()).build()) .collect(toUnmodifiableSet()); var createRequest1 = ExperimentItemsBatch.builder().experimentItems(experimentItems1).build(); createAndAssert(createRequest1, apiKey, workspaceName); var experimentItems2 = PodamFactoryUtils.manufacturePojoList(podamFactory, ExperimentItem.class).stream() - .map(experimentItem -> experimentItem.toBuilder().experimentId(experiment2.id()).build()) + .map(experimentItem -> experimentItem.toBuilder().experimentId(experiment2.id()) + .traceId(traceWithScores2.getLeft().id()).build()) .collect(toUnmodifiableSet()); var createRequest2 = ExperimentItemsBatch.builder().experimentItems(experimentItems2).build(); createAndAssert(createRequest2, apiKey, workspaceName); @@ -2412,8 +2433,22 @@ void streamByExperimentName() { .toList() .reversed(); - var expectedExperimentItems1 = expectedExperimentItems.subList(0, limit); - var expectedExperimentItems2 = expectedExperimentItems.subList(limit, size); + var expectedExperimentItems1 = expectedExperimentItems.subList(0, limit).stream() + .map(experimentItem -> experimentItem.toBuilder() + .input(traceWithScores2.getLeft().input()) + .output(traceWithScores2.getLeft().output()) + .feedbackScores(traceWithScores2.getRight().stream() + .map(FeedbackScoreMapper.INSTANCE::toFeedbackScore).toList()) + .build()) + .toList(); + var expectedExperimentItems2 = expectedExperimentItems.subList(limit, size).stream() + .map(experimentItem -> experimentItem.toBuilder() + .input(traceWithScores1.getLeft().input()) + .output(traceWithScores1.getLeft().output()) + .feedbackScores(traceWithScores1.getRight().stream() + .map(FeedbackScoreMapper.INSTANCE::toFeedbackScore).toList()) + .build()) + .toList(); var streamRequest1 = ExperimentItemStreamRequest.builder() .experimentName(experiment2.name()) @@ -2465,6 +2500,21 @@ void streamByExperimentNameWithoutExperiments() { var unexpectedExperimentItems1 = List.of(); streamAndAssert(streamRequest, expectedExperimentItems, unexpectedExperimentItems1, apiKey, workspaceName); } + + private Pair> createTraceWithScores(String apiKey, String workspaceName) { + var trace = podamFactory.manufacturePojo(Trace.class); + traceResourceClient.createTrace(trace, apiKey, workspaceName); + + // Creating 5 scores peach each of the two traces above + return Pair.of(trace, PodamFactoryUtils.manufacturePojoList(podamFactory, FeedbackScoreBatchItem.class) + .stream() + .map(feedbackScoreBatchItem -> feedbackScoreBatchItem.toBuilder() + .id(trace.id()) + .projectName(trace.projectName()) + .value(podamFactory.manufacturePojo(BigDecimal.class)) + .build()) + .toList()); + } } @Nested @@ -2750,7 +2800,7 @@ private void getAndAssert(ExperimentItem expectedExperimentItem, String workspac .ignoringFields(ITEM_IGNORED_FIELDS) .isEqualTo(expectedExperimentItem); - assertIgnoredFields(actualExperimentItem, expectedExperimentItem); + assertIgnoredFieldsWithoutFeedbacks(actualExperimentItem, expectedExperimentItem); } } @@ -2758,18 +2808,41 @@ private void assertIgnoredFields( List actualExperimentItems, List expectedExperimentItems) { assertThat(actualExperimentItems).hasSameSizeAs(expectedExperimentItems); for (int i = 0; i < actualExperimentItems.size(); i++) { - assertIgnoredFields(actualExperimentItems.get(i), expectedExperimentItems.get(i)); + assertIgnoredFieldsFullContent(actualExperimentItems.get(i), expectedExperimentItems.get(i)); } } - private void assertIgnoredFields(ExperimentItem actualExperimentItem, ExperimentItem expectedExperimentItem) { - assertThat(actualExperimentItem.input()).isNull(); - assertThat(actualExperimentItem.output()).isNull(); - assertThat(actualExperimentItem.feedbackScores()).isNull(); + private void assertIgnoredFieldsFullContent(ExperimentItem actualExperimentItem, + ExperimentItem expectedExperimentItem) { + assertIgnoredFields(actualExperimentItem, expectedExperimentItem, true); + } + + private void assertIgnoredFieldsWithoutFeedbacks(ExperimentItem actualExperimentItem, + ExperimentItem expectedExperimentItem) { + assertIgnoredFields(actualExperimentItem, expectedExperimentItem, false); + } + + private void assertIgnoredFields(ExperimentItem actualExperimentItem, ExperimentItem expectedExperimentItem, + boolean isFullContent) { assertThat(actualExperimentItem.createdAt()).isAfter(expectedExperimentItem.createdAt()); assertThat(actualExperimentItem.lastUpdatedAt()).isAfter(expectedExperimentItem.lastUpdatedAt()); assertThat(actualExperimentItem.createdBy()).isEqualTo(USER); assertThat(actualExperimentItem.lastUpdatedBy()).isEqualTo(USER); + if (isFullContent) { + actualExperimentItem = assertFeedbackScoresIgnoredFieldsAndSetThemToNull(actualExperimentItem, USER); + + assertThat(actualExperimentItem.feedbackScores()) + .usingRecursiveComparison() + .withComparatorForType(BigDecimal::compareTo, BigDecimal.class) + .ignoringCollectionOrder() + .isEqualTo(expectedExperimentItem.feedbackScores()); + assertThat(actualExperimentItem.input()).isEqualTo(expectedExperimentItem.input()); + assertThat(actualExperimentItem.output()).isEqualTo(expectedExperimentItem.output()); + } else { + assertThat(actualExperimentItem.input()).isNull(); + assertThat(actualExperimentItem.output()).isNull(); + assertThat(actualExperimentItem.feedbackScores()).isNull(); + } } private void getAndAssertNotFound(UUID id, String apiKey, String workspaceName) { diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/FeedbackDefinitionResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/FeedbackDefinitionResourceTest.java index 213355ad25..03d1947428 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/FeedbackDefinitionResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/FeedbackDefinitionResourceTest.java @@ -53,10 +53,10 @@ import static com.comet.opik.api.FeedbackDefinition.CategoricalFeedbackDefinition.CategoricalFeedbackDetail; import static com.comet.opik.api.FeedbackDefinition.FeedbackDefinitionPage; import static com.comet.opik.api.FeedbackDefinition.NumericalFeedbackDefinition; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.domain.FeedbackDefinitionModel.FeedbackType; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectMetricsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectMetricsResourceTest.java index 97a1de319b..9ccb370369 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectMetricsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectMetricsResourceTest.java @@ -81,9 +81,9 @@ import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectsResourceTest.java index 77bee5fd04..6317b2b556 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ProjectsResourceTest.java @@ -88,10 +88,10 @@ import static com.comet.opik.api.resources.utils.AssertionUtils.assertFeedbackScoreNames; import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.domain.ProjectService.DEFAULT_PROJECT; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; @@ -857,21 +857,25 @@ void getProjects__whenSortingProjectsByLastTrace__thenReturnProjectSorted(Direct mockTargetWorkspace(apiKey, workspaceName, workspaceId); - List projects = PodamFactoryUtils.manufacturePojoList(factory, Project.class); + List projects = createProjectsWithLastTrace(apiKey, workspaceName); - projects = projects.stream().map(project -> { - UUID projectId = createProject(project, apiKey, workspaceName); - List traceIds = IntStream.range(0, 5) - .mapToObj(i -> createCreateTrace(project.name(), apiKey, workspaceName)) - .toList(); + requestAndAssertLastTraceSorting(workspaceName, apiKey, projects, request, expected, 1, projects.size()); + } - Trace trace = getTrace(traceIds.getLast(), apiKey, workspaceName); - return project.toBuilder() - .id(projectId) - .lastUpdatedTraceAt(trace.lastUpdatedAt()).build(); - }).toList(); + @Test + @DisplayName("when fetching all project with last trace sorting and out of range pagination, then return empty list") + void getProjects__whenSortingProjectsByLastTraceWithPagination__thenReturnEmptyList() { + final int OUT_OF_RANGE_PAGE = 3; + String workspaceName = UUID.randomUUID().toString(); + String apiKey = UUID.randomUUID().toString(); + String workspaceId = UUID.randomUUID().toString(); + + mockTargetWorkspace(apiKey, workspaceName, workspaceId); - requestAndAssertLastTraceSorting(workspaceName, apiKey, projects, request, expected); + List projects = createProjectsWithLastTrace(apiKey, workspaceName); + + requestAndAssertLastTraceSorting(workspaceName, apiKey, List.of(), Direction.DESC, Direction.DESC, + OUT_OF_RANGE_PAGE, projects.size()); } @ParameterizedTest @@ -906,9 +910,10 @@ void getProjects__whenSortingProjectsByLastTraceAndNoTraceExists__thenReturnProj return project.toBuilder().id(projectId).build(); }).toList(); + List allProjects = Stream.concat(withTraceProjects.stream(), noTraceProjects.stream()).toList(); + requestAndAssertLastTraceSorting( - workspaceName, apiKey, Stream.concat(withTraceProjects.stream(), noTraceProjects.stream()).toList(), - request, expected); + workspaceName, apiKey, allProjects, request, expected, 1, allProjects.size()); } public static Stream sortDirectionProvider() { @@ -918,6 +923,22 @@ public static Stream sortDirectionProvider() { Arguments.of(Named.of("descending", Direction.DESC), Direction.DESC)); } + private List createProjectsWithLastTrace(String apiKey, String workspaceName) { + List projects = PodamFactoryUtils.manufacturePojoList(factory, Project.class); + + return projects.stream().map(project -> { + UUID projectId = createProject(project, apiKey, workspaceName); + List traceIds = IntStream.range(0, 5) + .mapToObj(i -> createCreateTrace(project.name(), apiKey, workspaceName)) + .toList(); + + Trace trace = getTrace(traceIds.getLast(), apiKey, workspaceName); + return project.toBuilder() + .id(projectId) + .lastUpdatedTraceAt(trace.lastUpdatedAt()).build(); + }).toList(); + } + @ParameterizedTest @MethodSource @DisplayName("sort by non-sortable field should return an error") @@ -1594,14 +1615,15 @@ private void assertProject(Project project, String apiKey, String workspaceName) } private void requestAndAssertLastTraceSorting(String workspaceName, String apiKey, List allProjects, - Direction request, Direction expected) { + Direction request, Direction expected, int page, int size) { var sorting = List.of(SortingField.builder() .field(SortableFields.LAST_UPDATED_TRACE_AT) .direction(request) .build()); var actualResponse = client.target(URL_TEMPLATE.formatted(baseURI)) - .queryParam("size", allProjects.size()) + .queryParam("size", size) + .queryParam("page", page) .queryParam("sorting", URLEncoder.encode(JsonUtils.writeValueAsString(sorting), StandardCharsets.UTF_8)) .request() @@ -1614,7 +1636,7 @@ private void requestAndAssertLastTraceSorting(String workspaceName, String apiKe assertThat(actualResponse.getStatusInfo().getStatusCode()).isEqualTo(200); assertThat(actualEntity.size()).isEqualTo(allProjects.size()); assertThat(actualEntity.total()).isEqualTo(allProjects.size()); - assertThat(actualEntity.page()).isEqualTo(1); + assertThat(actualEntity.page()).isEqualTo(page); if (expected == Direction.DESC) { allProjects = allProjects.reversed(); @@ -2087,7 +2109,8 @@ void findFeedbackScoreNames(boolean userProjectId) { // Create unexpected feedback scores String unexpectedProjectName = UUID.randomUUID().toString(); - UUID unexpectedProjectId = projectResourceClient.createProject(unexpectedProjectName, apiKey, workspaceName); + UUID unexpectedProjectId = projectResourceClient.createProject(unexpectedProjectName, apiKey, + workspaceName); Project unexpectedProject = projectResourceClient.getProject(unexpectedProjectId, apiKey, workspaceName); traceResourceClient.createMultiValueScores(otherNames, unexpectedProject, diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/PromptResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/PromptResourceTest.java index 517b4518f6..ed72672cdb 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/PromptResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/PromptResourceTest.java @@ -62,9 +62,9 @@ import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/SpansResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/SpansResourceTest.java index c8b201fdb0..6363fd3aaa 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/SpansResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/SpansResourceTest.java @@ -102,11 +102,11 @@ import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; import static com.comet.opik.api.resources.utils.StatsUtils.getProjectSpanStatItems; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.domain.ProjectService.DEFAULT_PROJECT; import static com.comet.opik.domain.SpanService.PROJECT_AND_WORKSPACE_NAME_MISMATCH; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.utils.ValidationUtils.MAX_FEEDBACK_SCORE_VALUE; import static com.comet.opik.utils.ValidationUtils.MIN_FEEDBACK_SCORE_VALUE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; @@ -3426,7 +3426,8 @@ private void createAndAssert(UUID entityId, FeedbackScore score, String workspac } } - private void createAndAssertErrorMessage(Span span, String apiKey, String workspaceName, int status, String errorMessage) { + private void createAndAssertErrorMessage(Span span, String apiKey, String workspaceName, int status, + String errorMessage) { try (var response = spanResourceClient.createSpan(span, apiKey, workspaceName, status)) { assertThat(response.readEntity(ErrorMessage.class).errors().getFirst()).isEqualTo(errorMessage); } @@ -3572,7 +3573,8 @@ void createSpansWithSameIdForDifferentWorkspacesReturnsConflict() { .build(); mockTargetWorkspace(apiKey, workspaceName, workspaceId); - createAndAssertErrorMessage(span2, apiKey, workspaceName, HttpStatus.SC_CONFLICT, PROJECT_AND_WORKSPACE_NAME_MISMATCH); + createAndAssertErrorMessage(span2, apiKey, workspaceName, HttpStatus.SC_CONFLICT, + PROJECT_AND_WORKSPACE_NAME_MISMATCH); } @Test diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/TracesResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/TracesResourceTest.java index 95daebeef6..7f26f22b6a 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/TracesResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/TracesResourceTest.java @@ -100,11 +100,11 @@ import static com.comet.opik.api.resources.utils.ClickHouseContainerUtils.DATABASE_NAME; import static com.comet.opik.api.resources.utils.MigrationUtils.CLICKHOUSE_CHANGELOG_FILE; import static com.comet.opik.api.resources.utils.StatsUtils.getProjectTraceStatItems; +import static com.comet.opik.api.resources.utils.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.comet.opik.domain.ProjectService.DEFAULT_PROJECT; import static com.comet.opik.domain.TraceService.PROJECT_NAME_AND_WORKSPACE_NAME_MISMATCH; import static com.comet.opik.infrastructure.auth.RequestContext.SESSION_COOKIE; import static com.comet.opik.infrastructure.auth.RequestContext.WORKSPACE_HEADER; -import static com.comet.opik.infrastructure.auth.TestHttpClientUtils.UNAUTHORIZED_RESPONSE; import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; import static com.github.tomakehurst.wiremock.client.WireMock.matching; import static com.github.tomakehurst.wiremock.client.WireMock.matchingJsonPath; @@ -3569,7 +3569,8 @@ private UUID create(Trace trace, String apiKey, String workspaceName) { return traceResourceClient.createTrace(trace, apiKey, workspaceName); } - private void createAndAssertErrorMessage(Trace trace, String apiKey, String workspaceName, int status, String errorMessage) { + private void createAndAssertErrorMessage(Trace trace, String apiKey, String workspaceName, int status, + String errorMessage) { try (var response = traceResourceClient.createTrace(trace, apiKey, workspaceName, status)) { assertThat(response.readEntity(ErrorMessage.class).errors().getFirst()).isEqualTo(errorMessage); } @@ -3680,7 +3681,8 @@ void create__whenCreatingTracesWithSameIdForDifferentWorkspaces__thenReturnConfl .usage(null) .feedbackScores(null) .build(); - createAndAssertErrorMessage(trace2, apiKey, workspaceName, HttpStatus.SC_CONFLICT, PROJECT_NAME_AND_WORKSPACE_NAME_MISMATCH); + createAndAssertErrorMessage(trace2, apiKey, workspaceName, HttpStatus.SC_CONFLICT, + PROJECT_NAME_AND_WORKSPACE_NAME_MISMATCH); } @Test diff --git a/apps/opik-backend/src/test/java/com/comet/opik/domain/TraceServiceImplTest.java b/apps/opik-backend/src/test/java/com/comet/opik/domain/TraceServiceImplTest.java index dcc7d63b75..3cc8d7a375 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/domain/TraceServiceImplTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/domain/TraceServiceImplTest.java @@ -120,6 +120,7 @@ void create__whenConcurrentTraceCreationsWithSameProjectNameConflict__thenHandle .thenReturn(Mono.just(traceId)); var actualResult = traceService.create(Trace.builder() + .projectId(projectId) .projectName(projectName) .startTime(Instant.now()) .build()) diff --git a/apps/opik-backend/src/test/java/com/comet/opik/domain/llmproviders/LlmProviderFactoryTest.java b/apps/opik-backend/src/test/java/com/comet/opik/domain/llmproviders/LlmProviderFactoryTest.java new file mode 100644 index 0000000000..9250183902 --- /dev/null +++ b/apps/opik-backend/src/test/java/com/comet/opik/domain/llmproviders/LlmProviderFactoryTest.java @@ -0,0 +1,87 @@ +package com.comet.opik.domain.llmproviders; + +import com.comet.opik.api.LlmProvider; +import com.comet.opik.api.ProviderApiKey; +import com.comet.opik.domain.LlmProviderApiKeyService; +import com.comet.opik.infrastructure.EncryptionUtils; +import com.comet.opik.infrastructure.LlmProviderClientConfig; +import com.comet.opik.infrastructure.OpikConfiguration; +import com.fasterxml.jackson.databind.ObjectMapper; +import dev.ai4j.openai4j.chat.ChatCompletionModel; +import dev.langchain4j.model.anthropic.AnthropicChatModelName; +import io.dropwizard.configuration.ConfigurationException; +import io.dropwizard.configuration.FileConfigurationSourceProvider; +import io.dropwizard.configuration.YamlConfigurationFactory; +import io.dropwizard.jackson.Jackson; +import io.dropwizard.jersey.validation.Validators; +import jakarta.validation.Validator; +import org.apache.commons.lang3.EnumUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; +import java.util.stream.Stream; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; +import static org.junit.jupiter.params.provider.Arguments.arguments; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class LlmProviderFactoryTest { + private LlmProviderClientConfig llmProviderClientConfig; + + private static final ObjectMapper objectMapper = Jackson.newObjectMapper(); + private static final Validator validator = Validators.newValidator(); + private static final YamlConfigurationFactory factory = new YamlConfigurationFactory<>( + OpikConfiguration.class, validator, objectMapper, "dw"); + + @BeforeAll + void setUpAll() throws ConfigurationException, IOException { + final OpikConfiguration config = factory.build(new FileConfigurationSourceProvider(), + "src/test/resources/config-test.yml"); + EncryptionUtils.setConfig(config); + llmProviderClientConfig = config.getLlmProviderClient(); + } + + @ParameterizedTest + @MethodSource + void testGetService(String model, LlmProvider llmProvider, Class extends LlmProviderService> providerClass) { + // setup + LlmProviderApiKeyService llmProviderApiKeyService = mock(LlmProviderApiKeyService.class); + String workspaceId = UUID.randomUUID().toString(); + String apiKey = UUID.randomUUID().toString(); + + when(llmProviderApiKeyService.find(workspaceId)).thenReturn(ProviderApiKey.ProviderApiKeyPage.builder() + .content(List.of(ProviderApiKey.builder() + .provider(llmProvider) + .apiKey(EncryptionUtils.encrypt(apiKey)) + .build())) + .total(1) + .page(1) + .size(1) + .build()); + + // SUT + var llmProviderFactory = new LlmProviderFactory(llmProviderClientConfig, llmProviderApiKeyService); + + LlmProviderService actual = llmProviderFactory.getService(workspaceId, model); + + // assertions + assertThat(actual).isInstanceOf(providerClass); + } + + private static Stream testGetService() { + var openAiModels = EnumUtils.getEnumList(ChatCompletionModel.class).stream() + .map(model -> arguments(model.toString(), LlmProvider.OPEN_AI, LlmProviderOpenAi.class)); + var anthropicModels = EnumUtils.getEnumList(AnthropicChatModelName.class).stream() + .map(model -> arguments(model.toString(), LlmProvider.ANTHROPIC, LlmProviderAnthropic.class)); + + return Stream.concat(openAiModels, anthropicModels); + } +} diff --git a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/RemoveAuthServiceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/RemoveAuthServiceTest.java index e02d66c813..1d233a17f4 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/RemoveAuthServiceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/auth/RemoveAuthServiceTest.java @@ -1,5 +1,6 @@ package com.comet.opik.infrastructure.auth; +import com.comet.opik.api.resources.utils.TestHttpClientUtils; import com.comet.opik.api.resources.utils.WireMockUtils; import com.comet.opik.domain.DummyLockService; import com.comet.opik.infrastructure.AuthenticationConfig; @@ -34,7 +35,7 @@ import static org.junit.jupiter.params.provider.Arguments.arguments; @TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class RemoveAuthServiceTest { +class RemoveAuthServiceTest { private Client client; private static final WireMockUtils.WireMockRuntime wireMock; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/aws/rds/MysqlRdsIamE2eTest.java b/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/aws/rds/MysqlRdsIamE2eTest.java index 41342f966d..7196c91ca4 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/aws/rds/MysqlRdsIamE2eTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/infrastructure/aws/rds/MysqlRdsIamE2eTest.java @@ -30,7 +30,7 @@ @Disabled @TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class MysqlRdsIamE2eTest { +class MysqlRdsIamE2eTest { private static final String URL_TEMPLATE = "%s/v1/private/projects"; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/utils/PaginationUtilsTest.java b/apps/opik-backend/src/test/java/com/comet/opik/utils/PaginationUtilsTest.java index 0cfb7295ce..99b2257781 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/utils/PaginationUtilsTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/utils/PaginationUtilsTest.java @@ -13,8 +13,10 @@ import static org.junit.jupiter.api.Named.named; import static org.junit.jupiter.params.provider.Arguments.arguments; -public class PaginationUtilsTest { +class PaginationUtilsTest { + private static final List LIST = IntStream.range(0, 50).boxed().toList(); + @ParameterizedTest @MethodSource void testPagination(int page, int size, List expected) { diff --git a/apps/opik-backend/src/test/resources/config-test.yml b/apps/opik-backend/src/test/resources/config-test.yml index 5eb53bfb21..a80d0e6206 100644 --- a/apps/opik-backend/src/test/resources/config-test.yml +++ b/apps/opik-backend/src/test/resources/config-test.yml @@ -98,7 +98,7 @@ health: distributedLock: # Default: 500 # Description: Lease time in milliseconds - lockTimeout: 500 + lockTimeoutMS: 500 # Default: 1 # Description: This value has to be considerably higher than the lockTimeoutMS value, as it has to guarantee that the # last thread to join the queue to acquire the lock will have enough time to execute the action. Then, the lock will @@ -117,18 +117,18 @@ redis: authentication: # Default: false # Description: Whether or not to enable authentication - enabled: ${AUTH_ENABLED:-false} + enabled: false # Default: 0 # Description: API key resolution cache TTL (seconds). Setting this value to 0 means no caching. - apiKeyResolutionCacheTTLInSec: ${AUTH_API_KEY_RESOLUTION_CACHE_TTL_IN_SEC:-0} + apiKeyResolutionCacheTTLInSec: 0 # Default: # Description: Configures how to authenticate requests which originates from the sdk sdk: - url: ${AUTH_SDK_URL:-''} + url: '' # Default: # Description: Configures how to authenticate requests which originates from the ui ui: - url: ${AUTH_UI_URL:-''} + url: '' # https://www.dropwizard.io/en/stable/manual/configuration.html#servers server: @@ -168,23 +168,40 @@ usageReport: metadata: # Default: latest # Description: The application version - version: ${OPIK_VERSION:-latest} + version: latest # CORS related configuration cors: # Default: false # Description: Whether or not to allow cross site scripting - enabled: ${CORS:-false} + enabled: false # Encryption related configuration encryption: # Default: GiTHubiLoVeYouAA # Description: Encryption key to use when storing sensitive information - key: ${OPIK_ENCRYPTION_KEY:-'GiTHubiLoVeYouAA'} + key: 'GiTHubiLoVeYouAA' # LLM providers client configuration llmProviderClient: + # Default: 60s + # Description: Call timeout for LLM providers + callTimeout: 60s + # Default: false + # Description: Whether or not to log requests + logRequests: false + # Default: false + # Description: Whether or not to log responses + logResponses: false openAiClient: # See demo endpoint Langchain4j documentation: https://docs.langchain4j.dev/get-started # Not https but only used for testing purposes. It's fine as long as not sensitive data is sent. url: http://langchain4j.dev/demo/openai/v1 + # Configuration for Anthropic client + anthropicClient: + # Default: https://api.anthropic.com/v1/ + # Description: Anthropic API base URL + url: https://api.anthropic.com/v1/ + # Default: 2023-06-01 + # Description: Anthropic API version https://docs.anthropic.com/en/api/versioning + version: '2023-06-01' diff --git a/apps/opik-documentation/documentation/.gitignore b/apps/opik-documentation/documentation/.gitignore index b2d6de3062..c8a58ad640 100644 --- a/apps/opik-documentation/documentation/.gitignore +++ b/apps/opik-documentation/documentation/.gitignore @@ -4,6 +4,8 @@ # Production /build +/data + # Generated files .docusaurus .cache-loader diff --git a/apps/opik-documentation/documentation/conftest.py b/apps/opik-documentation/documentation/conftest.py new file mode 100644 index 0000000000..3d963298b6 --- /dev/null +++ b/apps/opik-documentation/documentation/conftest.py @@ -0,0 +1,4 @@ +from pytest_codeblocks.pytest_integration import pytest_collect_file + +# Export the necessary components +__all__ = ["pytest_collect_file"] diff --git a/apps/opik-documentation/documentation/docs/changelog.md b/apps/opik-documentation/documentation/docs/changelog.md index c3cd372b11..e8b4675ae0 100644 --- a/apps/opik-documentation/documentation/docs/changelog.md +++ b/apps/opik-documentation/documentation/docs/changelog.md @@ -1,6 +1,7 @@ --- sidebar_label: Changelog description: Weelkly changelog for Opik +pytest_codeblocks_skip: true --- # Weekly Changelog diff --git a/apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb b/apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb new file mode 100644 index 0000000000..99faa15e45 --- /dev/null +++ b/apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "description: Cookbook that showcases Opik's integration with DSPy\n", + "---\n", + "\n", + "# Using Opik with DSPy\n", + "\n", + "[DSPy](https://dspy.ai/) is the framework for programming—rather than prompting—language models.\n", + "\n", + "In this guide, we will showcase how to integrate Opik with DSPy so that all the DSPy calls are logged as traces in Opik." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating an account on Comet.com\n", + "\n", + "[Comet](https://www.comet.com/site?from=llm&utm_source=opik&utm_medium=colab&utm_content=dspy&utm_campaign=opik) provides a hosted version of the Opik platform, [simply create an account](https://www.comet.com/signup?from=llm&utm_source=opik&utm_medium=colab&utm_content=dspy&utm_campaign=opik) and grab you API Key.\n", + "\n", + "> You can also run the Opik platform locally, see the [installation guide](https://www.comet.com/docs/opik/self-host/overview/?from=llm&utm_source=opik&utm_medium=colab&utm_content=dspy&utm_campaign=opik) for more information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade opik dspy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "OPIK: Opik is already configured. You can check the settings by viewing the config file at /Users/jacquesverre/.opik.config\n" + ] + } + ], + "source": [ + "import opik\n", + "\n", + "opik.configure(use_local=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "if \"OPENAI_API_KEY\" not in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logging traces\n", + "\n", + "In order to log traces to Opik, you will need to set the `opik` callback:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import dspy\n", + "from opik.integrations.dspy.callback import OpikCallback\n", + "\n", + "lm = dspy.LM(\"openai/gpt-4o-mini\")\n", + "\n", + "project_name = \"DSPY\"\n", + "opik_callback = OpikCallback(project_name=project_name)\n", + "\n", + "dspy.configure(lm=lm, callbacks=[opik_callback])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:langfuse:Langfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client\n", + "OPIK: Started logging traces to the \"DSPY\" project at https://www.comet.com/opik/jacques-comet/redirect/projects?name=DSPY.\n" + ] + }, + { + "data": { + "text/plain": [ + "Prediction(\n", + " reasoning='The meaning of life is a philosophical question that has been contemplated by humans for centuries. Different cultures, religions, and individuals have proposed various interpretations. Some suggest that the meaning of life is to seek happiness, fulfillment, and personal growth, while others believe it is about serving a higher purpose or contributing to the well-being of others. Ultimately, the meaning of life may vary from person to person, shaped by personal experiences, beliefs, and values.',\n", + " answer=\"The meaning of life is subjective and can vary greatly among individuals. It may involve seeking happiness, personal growth, and contributing to the well-being of others, or fulfilling a higher purpose, depending on one's beliefs and experiences.\"\n", + ")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cot = dspy.ChainOfThought(\"question -> answer\")\n", + "cot(question=\"What is the meaning of life?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The trace is now logged to the Opik platform:\n", + "\n", + "![DSPy trace](https://raw.githubusercontent.com/comet-ml/opik/main/apps/opik-documentation/documentation/static/img/cookbook/dspy_trace_cookbook.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py312_llm_eval", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md b/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md index 7504a700df..f669dcb5f5 100644 --- a/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md +++ b/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md @@ -1,6 +1,7 @@ --- sidebar_label: Evaluate your LLM Application description: Step by step guide on how to evaluate your LLM application +pytest_codeblocks_execute_previous: true --- # Evaluate your LLM Application @@ -130,7 +131,7 @@ def your_llm_application(input: str) -> str: # Define the evaluation task def evaluation_task(x): return { - "output": your_llm_application(x['user_question']) + "output": your_llm_application(x['input']) } # Create a simple dataset @@ -145,7 +146,6 @@ dataset.insert([ hallucination_metric = Hallucination() evaluation = evaluate( - experiment_name="My experiment", dataset=dataset, task=evaluation_task, scoring_metrics=[hallucination_metric], @@ -193,7 +193,6 @@ prompt = opik.Prompt( # Run the evaluation evaluation = evaluate( - experiment_name="My experiment", dataset=dataset, task=evaluation_task, scoring_metrics=[hallucination_metric], diff --git a/apps/opik-documentation/documentation/docs/production/gateway.mdx b/apps/opik-documentation/documentation/docs/production/gateway.mdx index 8b7eddb58f..1998fbb77c 100644 --- a/apps/opik-documentation/documentation/docs/production/gateway.mdx +++ b/apps/opik-documentation/documentation/docs/production/gateway.mdx @@ -78,7 +78,7 @@ The plugin is open source and available at [comet-ml/opik-kong-plugin](https://g Once the plugin is installed, you can enable it by running: -```bash +```bash pytest_codeblocks_skip="true" curl -is -X POST http://localhost:8001/services/{serviceName|Id}/plugins \ --header "accept: application/json" \ --header "Content-Type: application/json" \ diff --git a/apps/opik-documentation/documentation/docs/tracing/production_monitoring.md b/apps/opik-documentation/documentation/docs/production/production_monitoring.md similarity index 91% rename from apps/opik-documentation/documentation/docs/tracing/production_monitoring.md rename to apps/opik-documentation/documentation/docs/production/production_monitoring.md index 3afbdd5d0d..721cb64351 100644 --- a/apps/opik-documentation/documentation/docs/tracing/production_monitoring.md +++ b/apps/opik-documentation/documentation/docs/production/production_monitoring.md @@ -51,9 +51,7 @@ import opik opik_client = opik.Opik() traces = opik_client.search_traces( - project_name="Default Project", - start_time="2024-01-01", - end_time="2025-01-01", + project_name="Default Project" ) ``` @@ -67,12 +65,11 @@ The `search_traces` method allows you to fetch traces based on any of trace attr Once you have fetched the traces you want to annotate, you can update the feedback scores using the [`Opik.log_traces_feedback_scores`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.log_traces_feedback_scores) method. -```python +```python pytest_codeblocks_skip="true" for trace in traces: opik_client.log_traces_feedback_scores( project_name="Default Project", - trace_ids=[i.id], - feedback_scores=[{"name": "user_feedback", "value": 1.0, "reason": "The response was helpful and accurate."}], + feedback_scores=[{"id": trace.id, "name": "user_feedback", "value": 1.0, "reason": "The response was helpful and accurate."}], ) ``` diff --git a/apps/opik-documentation/documentation/docs/quickstart.mdx b/apps/opik-documentation/documentation/docs/quickstart.mdx index 0aa78cb791..e485f5d360 100644 --- a/apps/opik-documentation/documentation/docs/quickstart.mdx +++ b/apps/opik-documentation/documentation/docs/quickstart.mdx @@ -76,7 +76,7 @@ All OpenAI calls made using the `openai_client` will now be logged to Opik. -```python +```python pytest_codeblocks_skip="true" from litellm.integrations.opik.opik import OpikLogger import litellm @@ -93,7 +93,7 @@ All LiteLLM calls made using the `litellm` client will now be logged to Opik. If you are using an LLM provider that Opik does not have an integration for, you can still log the LLM calls by using the `@track` decorator: -```python +```python pytest_codeblocks_skip="true" from opik import track import anthropic @@ -182,7 +182,7 @@ While this code sample assumes that you are using OpenAI, the same principle app If you are using LangChain to build your chains, you can use the `OpikTracer` to log your chains. The `OpikTracer` is a LangChain callback that will log every step of the chain to Opik: -```python +```python pytest_codeblocks_skip="true" from langchain_openai import OpenAI from langchain.prompts import PromptTemplate from opik.integrations.langchain import OpikTracer @@ -211,7 +211,7 @@ llm_chain.invoke({"input": "Hello, how are you?"}, callbacks=[opik_tracer]) If you are using LLamaIndex you can set `opik` as a global callback to log all LLM calls: -```python +```python pytest_codeblocks_skip="true" from llama_index.core import global_handler, set_global_handler set_global_handler("opik") diff --git a/apps/opik-documentation/documentation/docs/self-host/kubernetes.md b/apps/opik-documentation/documentation/docs/self-host/kubernetes.md index e91b6e76f6..fe08dad1f9 100644 --- a/apps/opik-documentation/documentation/docs/self-host/kubernetes.md +++ b/apps/opik-documentation/documentation/docs/self-host/kubernetes.md @@ -1,6 +1,7 @@ --- sidebar_label: Production (Kubernetes) description: Describes how to run Opik on a Kubernetes cluster +test_code_snippets: false --- # Production ready Kubernetes deployment diff --git a/apps/opik-documentation/documentation/docs/self-host/local_deployment.md b/apps/opik-documentation/documentation/docs/self-host/local_deployment.md index 7225ab2de5..467d9b9fc5 100644 --- a/apps/opik-documentation/documentation/docs/self-host/local_deployment.md +++ b/apps/opik-documentation/documentation/docs/self-host/local_deployment.md @@ -1,6 +1,7 @@ --- sidebar_label: Local (Docker Compose) description: Describes how to run Opik locally using Docker Compose +test_code_snippets: false --- # Local Deployments using Docker Compose diff --git a/apps/opik-documentation/documentation/docs/self-host/overview.md b/apps/opik-documentation/documentation/docs/self-host/overview.md index bd868d74f7..5334c9470c 100644 --- a/apps/opik-documentation/documentation/docs/self-host/overview.md +++ b/apps/opik-documentation/documentation/docs/self-host/overview.md @@ -1,6 +1,7 @@ --- sidebar_label: Overview description: High-level overview on how to self-host Opik +test_code_snippets: false --- # Self-hosting Opik @@ -16,7 +17,7 @@ If you choose to self-host Opik, you can choose between two deployment options: If you would like to try out Opik locally, we recommend using our Local installation based on `docker compose`. Assuming you have `git` and `docker` installed, you can get started in a couple of minutes: -```bash +```bash pytest_codeblocks_skip="true" # Clone the Opik repository git clone https://github.com/comet-ml/opik.git @@ -28,14 +29,14 @@ docker compose up --detach Opik will now be available at http://localhost:5173 and all traces logged from your local machine will be logged to this local Opik instance. In order for traces and other data to be logged to your Opik instance, you need to make sure that the Opik Python SDK is configured to point to the Opik server you just started. You can do this by running the following command: -```bash +```bash pytest_codeblocks_skip="true" # Configure the Python SDK to point to the local Opik platform export OPIK_BASE_URL=http://localhost:5173/api ``` or in Python: -```python +```python pytest_codeblocks_skip="true" import os os.environ["OPIK_BASE_URL"] = "http://localhost:5173/api" diff --git a/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md b/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md index 97bed1e518..31deb427b8 100644 --- a/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md +++ b/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md @@ -111,7 +111,7 @@ from opik.evaluation.metrics import Contains metric = Contains() score = metric.score( output="The quick brown fox jumps over the lazy dog.", - expected_output="The quick brown fox jumps over the lazy dog." + reference="The quick brown fox jumps over the lazy dog." ) ``` diff --git a/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md b/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md index 031a4a92cd..8524108abf 100644 --- a/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md +++ b/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md @@ -1,6 +1,7 @@ --- sidebar_label: Cost Tracking description: Describes how to track and monitor costs for your LLM applications using Opik +test_code_snippets: false --- # Cost Tracking @@ -42,9 +43,9 @@ You can retrieve the estimated cost programmatically for both spans and traces. ```python import opik -client = opik.Client() +client = opik.Opik() -span = client.get_span_content(SPAN_ID) +span = client.get_span_content("") # Returns estimated cost in USD, or None for unsupported models print(span.total_estimated_cost) ``` @@ -54,9 +55,9 @@ print(span.total_estimated_cost) ```python import opik -client = opik.Client() +client = opik.Opik() -trace = client.get_trace_content(TRACE_ID) +trace = client.get_trace_content("") # Returns estimated cost in USD, or None for unsupported models print(trace.total_estimated_cost) ``` diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md b/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md index eae608dffa..383c86e831 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md @@ -36,12 +36,10 @@ opik configure In order to configure Anthropic, you will need to have your Anthropic API Key set, see this [section how to pass your Anthropic API Key](https://github.com/anthropics/anthropic-sdk-python?tab=readme-ov-file#usage). -Once you have it, you can set create your Anthropic client: +Once you have it, you can set it as an environment variable: -```python -import anthropic - -anthropic_client = anthropic.Anthropic() +```bash pytest_codeblocks_skip="true" +export ANTHROPIC_API_KEY="YOUR_API_KEY" ``` ## Logging LLM calls @@ -49,8 +47,10 @@ anthropic_client = anthropic.Anthropic() In order to log the LLM calls to Opik, you will need to create the wrap the anthropic client with `track_anthropic`. When making calls with that wrapped client, all calls will be logged to Opik: ```python +import anthropic from opik.integrations.anthropic import track_anthropic +anthropic_client = anthropic.Anthropic() anthropic_client = track_anthropic(anthropic_client, project_name="anthropic-integration-demo") PROMPT = "Why is it important to use a LLM Monitoring like CometML Opik tool that allows you to log traces and spans when working with Anthropic LLM Models?" diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md b/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md index 264166cc7c..5d9f8b4c5b 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md @@ -1,6 +1,7 @@ --- sidebar_label: Bedrock description: Describes how to track Bedrock LLM calls using Opik +test_code_snippets: false --- # AWS Bedrock diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx b/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx index 4467d355f8..1fadc101f0 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx @@ -1,6 +1,7 @@ --- sidebar_label: Dify description: Describes how to use Opik with Dify +test_code_snippets: false --- import Tabs from "@theme/Tabs"; diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/dspy.md b/apps/opik-documentation/documentation/docs/tracing/integrations/dspy.md new file mode 100644 index 0000000000..385c116927 --- /dev/null +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/dspy.md @@ -0,0 +1,60 @@ +--- +sidebar_label: DSPy +description: Describes how to track DSPy calls using Opik +--- + +# DSPy + +[DSPy](https://dspy.ai/) is the framework for programming—rather than prompting—language models. + +Opik integrates with DSPy to log traces for all DSPy calls. + + + You can check out the Colab Notebook if you'd like to jump straight to the code: + + + + + +## Getting started + +First, ensure you have both `opik` and `dspy` installed: + +```bash +pip install opik dspy +``` + +In addition, you can configure Opik using the `opik configure` command which will prompt you for the correct local server address or if you are using the Cloud platfrom your API key: + +```bash +opik configure +``` + +## Logging DSPy calls + +To log a DSPy pipeline run, you can use the [`OpikCallback`](https://www.comet.com/docs/opik/python-sdk-reference/integrations/dspy/OpikCallback.html). This callback will log each DSPy run to Opik: + +```python +import dspy +from opik.integrations.dspy.callback import OpikCallback + +project_name = "DSPY" + +lm = dspy.LM( + model="openai/gpt-4o-mini", +) +dspy.configure(lm=lm) + + +opik_callback = OpikCallback(project_name=project_name) +dspy.settings.configure( + callbacks=[opik_callback], +) + +cot = dspy.ChainOfThought("question -> answer") +cot(question="What is the meaning of life?") +``` + +Each run will now be logged to the Opik platform: + +![DSPy](/img/cookbook/dspy_trace_cookbook.png) diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md b/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md index e4392bb54d..f661246157 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md @@ -44,7 +44,7 @@ In order to configure Gemini, you will need to have: Once you have these, you can set them as environment variables: -```python +```python pytest_codeblocks_skip="true" import os os.environ["GEMINI_API_KEY"] = "" # Your Google AI Studio Gemini API Key @@ -76,6 +76,9 @@ response = litellm.completion( If you are using LiteLLM within a function tracked with the [`@track`](/tracing/log_traces#using-function-decorators) decorator, you will need to pass the `current_span_data` as metadata to the `litellm.completion` call: ```python +from opik import track, opik_context +import litellm + @track def generate_story(prompt): response = litellm.completion( @@ -83,7 +86,7 @@ def generate_story(prompt): messages=[{"role": "user", "content": prompt}], metadata={ "opik": { - "current_span_data": get_current_span_data(), + "current_span_data": opik_context.get_current_span_data(), }, }, ) @@ -98,7 +101,7 @@ def generate_topic(): messages=[{"role": "user", "content": prompt}], metadata={ "opik": { - "current_span_data": get_current_span_data(), + "current_span_data": opik_context.get_current_span_data(), }, }, ) diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md b/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md index 984631bc4f..98fdd02e4a 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md @@ -76,6 +76,9 @@ response = litellm.completion( If you are using LiteLLM within a function tracked with the [`@track`](/tracing/log_traces#using-function-decorators) decorator, you will need to pass the `current_span_data` as metadata to the `litellm.completion` call: ```python +from opik import track, opik_context +import litellm + @track def generate_story(prompt): response = litellm.completion( @@ -83,7 +86,7 @@ def generate_story(prompt): messages=[{"role": "user", "content": prompt}], metadata={ "opik": { - "current_span_data": get_current_span_data(), + "current_span_data": opik_context.get_current_span_data(), }, }, ) @@ -94,11 +97,11 @@ def generate_story(prompt): def generate_topic(): prompt = "Generate a topic for a story about Opik." response = litellm.completion( - model="Groq/Groq-pro", + model="groq/llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}], metadata={ "opik": { - "current_span_data": get_current_span_data(), + "current_span_data": opik_context.get_current_span_data(), }, }, ) diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/haystack.md b/apps/opik-documentation/documentation/docs/tracing/integrations/haystack.md index 6ebfbc7b8d..2f8575f650 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/haystack.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/haystack.md @@ -19,7 +19,7 @@ pip install opik haystack-ai In addition, you can configure Opik using the `opik configure` command which will prompt you for the correct local server address or if you are using the Cloud platfrom your API key: -```bash +```bash pytest_codeblocks_skip="true" opik configure ``` @@ -39,7 +39,6 @@ from haystack.dataclasses import ChatMessage from opik.integrations.haystack import OpikConnector - pipe = Pipeline() # Add the OpikConnector component to the pipeline @@ -91,10 +90,18 @@ By default the `OpikConnector` will flush the trace to the Opik platform after e In order to make sure that all traces are logged to the Opik platform before you exit a script, you can use the `flush` method: ```python +from opik.integrations.haystack import OpikConnector from haystack.tracing import tracer +from haystack import Pipeline -# Pipeline definition +pipe = Pipeline() +# Add the OpikConnector component to the pipeline +pipe.add_component( + "tracer", OpikConnector("Chat example") +) + +# Pipeline definition tracer.actual_tracer.flush() ``` @@ -108,7 +115,7 @@ Disabling this feature may result in data loss if the program crashes before the The `OpikConnector` returns the logged trace ID in the pipeline run response. You can use this ID to update the trace with feedback scores or other metadata: -```python +```python pytest_codeblocks_skip="true" import opik response = pipe.run( diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/langgraph.md b/apps/opik-documentation/documentation/docs/tracing/integrations/langgraph.md index fde8fa5fa9..edd7d4cbd4 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/langgraph.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/langgraph.md @@ -19,7 +19,7 @@ Opik provides a seamless integration with LangGraph, allowing you to easily log To use the [`OpikTracer`](https://www.comet.com/docs/opik/python-sdk-reference/integrations/langchain/OpikTracer.html) with LangGraph, you'll need to have both the `opik` and `langgraph` packages installed. You can install them using pip: ```bash -pip install opik langgraph +pip install opik langgraph langchain ``` In addition, you can configure Opik using the `opik configure` command which will prompt you for the correct local server address or if you are using the Cloud platfrom your API key: @@ -33,21 +33,37 @@ opik configure You can use the [`OpikTracer`](https://www.comet.com/docs/opik/python-sdk-reference/integrations/langchain/OpikTracer.html) callback with any LangGraph graph by passing it in as an argument to the `stream` or `invoke` functions: ```python +from typing import List, Annotated +from pydantic import BaseModel from opik.integrations.langchain import OpikTracer +from langchain_core.messages import HumanMessage +from langgraph.graph import StateGraph, START, END +from langgraph.graph.message import add_messages # create your LangGraph graph -graph = ... -app = graph.compile(...) +class State(BaseModel): + messages: Annotated[list, add_messages] +def chatbot(state): + # Typically your LLM calls would be done here + return {"messages": "Hello, how can I help you today?"} + +graph = StateGraph(State) +graph.add_node("chatbot", chatbot) +graph.add_edge(START, "chatbot") +graph.add_edge("chatbot", END) +app = graph.compile() + +# Create the OpikTracer opik_tracer = OpikTracer(graph=app.get_graph(xray=True)) # Pass the OpikTracer callback to the Graph.stream function -for s in app.stream({"messages": [HumanMessage(content = QUESTION)]}, +for s in app.stream({"messages": [HumanMessage(content = "How to use LangGraph ?")]}, config={"callbacks": [opik_tracer]}): print(s) # Pass the OpikTracer callback to the Graph.invoke function -result = app.invoke({"messages": [HumanMessage(content = QUESTION)]}, +result = app.invoke({"messages": [HumanMessage(content = "How to use LangGraph ?")]}, config={"callbacks": [opik_tracer]}) ``` diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/llama_index.md b/apps/opik-documentation/documentation/docs/tracing/integrations/llama_index.md index 64ec4ce819..0a561e20b5 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/llama_index.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/llama_index.md @@ -101,11 +101,7 @@ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("./data/paul_graham").load_data() index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine() -``` - -Now that the query engine is set up, we can use it to query the data: -```python response = query_engine.query("What did the author do growing up?") print(response) ``` diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/ollama.md b/apps/opik-documentation/documentation/docs/tracing/integrations/ollama.md index c4b9574be8..af773935fc 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/ollama.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/ollama.md @@ -123,7 +123,7 @@ The local LLM call is now traced and logged to Opik. In order to trace Ollama calls made with LangChain, you will need to first install the `langchain-ollama` package: ```bash -pip install --quiet --upgrade langchain-ollama +pip install --quiet --upgrade langchain-ollama langchain ``` You will now be able to use the `OpikTracer` class to log all your Ollama calls made with LangChain to Opik: diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/overview.md b/apps/opik-documentation/documentation/docs/tracing/integrations/overview.md index 8b97a59037..8e8c30944c 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/overview.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/overview.md @@ -16,12 +16,13 @@ Opik aims to make it as easy as possible to log, view and evaluate your LLM trac | aisuite | Log traces for all aisuite LLM calls | [Documentation](/tracing/integrations/aisuite.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/aisuite.ipynb) | | Anthropic | Log traces for all Anthropic LLM calls | [Documentation](/tracing/integrations/anthropic.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/anthropic.ipynb) | | Bedrock | Log traces for all AWS Bedrock LLM calls | [Documentation](/tracing/integrations/bedrock.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/bedrock.ipynb) | +| Dify | Log traces and LLM calls for your Dify Apps | [Documentation](/tracing/integrations/dify.mdx) | | +| DSPy | Log traces for all DSPy runs | [Documentation](/tracing/integrations/dspy.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/dspy.ipynb) | | LangGraph | Log traces for all LangGraph executions | [Documentation](/tracing/integrations/langgraph.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/langgraph.ipynb) | | LlamaIndex | Log traces for all LlamaIndex LLM calls | [Documentation](/tracing/integrations/llama_index.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/llama-index.ipynb) | | Ollama | Log traces for all Ollama LLM calls | [Documentation](/tracing/integrations/ollama.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/ollama.ipynb) | | Predibase | Fine-tune and serve open-source LLMs | [Documentation](/tracing/integrations/predibase.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/predibase.ipynb) | | Ragas | Evaluation framework for your Retrieval Augmented Generation (RAG) pipelines | [Documentation](/tracing/integrations/ragas.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb) | | watsonx | Log traces for all watsonx LLM calls | [Documentation](/tracing/integrations/watsonx.md) | [![Open Quickstart In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/opik/blob/master/apps/opik-documentation/documentation/docs/cookbook/watsonx.ipynb) | -| Dify | Log traces and LLM calls for your Dify Apps | [Documentation](/tracing/integrations/dify.mdx) | | If you would like to see more integrations, please open an issue on our [GitHub repository](https://github.com/comet-ml/opik/issues/new/choose). diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/predibase.md b/apps/opik-documentation/documentation/docs/tracing/integrations/predibase.md index bd97df52ae..9575225035 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/predibase.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/predibase.md @@ -1,6 +1,7 @@ --- sidebar_label: Predibase description: Describes how to track Predibase LLM calls using Opik +test_code_snippets: false --- # Using Opik with Predibase diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md b/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md index 48fd243a44..4003e01abf 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md @@ -1,6 +1,7 @@ --- sidebar_label: Ragas description: Describes how to log Ragas scores to the Opik platform +test_code_snippets: false --- # Ragas diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/watsonx.md b/apps/opik-documentation/documentation/docs/tracing/integrations/watsonx.md index 2a632cbd1b..8f61507968 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/watsonx.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/watsonx.md @@ -1,6 +1,7 @@ --- sidebar_label: watsonx description: Describes how to track watsonx LLM calls using Opik +test_code_snippets: false --- # watsonx diff --git a/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md b/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md index 5003e3cb12..7adda8ddf0 100644 --- a/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md +++ b/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md @@ -1,6 +1,7 @@ --- sidebar_label: Log Distributed Traces description: Describes how to log distributed traces to the Opik platform +test_code_snippets: false --- # Log Distributed Traces diff --git a/apps/opik-documentation/documentation/docs/tracing/sdk_configuration.mdx b/apps/opik-documentation/documentation/docs/tracing/sdk_configuration.mdx index 312268f4ef..8609cfe786 100644 --- a/apps/opik-documentation/documentation/docs/tracing/sdk_configuration.mdx +++ b/apps/opik-documentation/documentation/docs/tracing/sdk_configuration.mdx @@ -32,7 +32,7 @@ opik configure If you are self-hosting the platform, you can configure the SDK by running: -```python +```python pytest_codeblocks_skip="true" import opik opik.configure(use_local=True) @@ -40,7 +40,7 @@ opik.configure(use_local=True) or from the Command line: -```bash +```bash pytest_codeblocks_skip="true" opik configure --use_local ``` diff --git a/apps/opik-documentation/documentation/pytest.ini b/apps/opik-documentation/documentation/pytest.ini new file mode 100644 index 0000000000..5dd43de1b8 --- /dev/null +++ b/apps/opik-documentation/documentation/pytest.ini @@ -0,0 +1,7 @@ +[pytest] +testpaths = docs +python_files = *.md *.mdx +log_cli = true +log_cli_level = INFO +asyncio_default_fixture_loop_scope = module +norecursedirs = docs/cookbook diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/README.md b/apps/opik-documentation/documentation/pytest_codeblocks/README.md new file mode 100644 index 0000000000..b86b4f449d --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/README.md @@ -0,0 +1,48 @@ +# Pytest Codeblocks + +The pytest-codeblocks extension allows you to run code blocks from a markdown file using pytest. + +This extension was created to ensure that: + +1. Each markdown file is tested in isolation using a fresh Python environment - This allows us to detect missing package install statements. +2. It is possible to skip testing certain code blocks. + +## How does it work + +The pytest-codeblocks extension uses pytest to run the code blocks in a markdown file. + +The extension works by: + +1. Collecting all the code blocks in a markdown file +2. Creating a new venv for each code block. Currently only Python and Bash code blocks are supported +3. Running each code block in this new env + +**Note: This extension only tests that the code successfull runs without raising an error, it does not test the output of the code.** + +## Using the pytest-codeblocks + +To use the pytest-codeblocks extension, simply run: + +```bash +cd apps/opik-documentation/documentation/ + +pytest +``` + +## Advanced usage + +The `pytest-codeblocks` extension supports the following features: + +- Skipping all code blocks in a file: By setting `pytest_codeblocks_skip: true` in the frontmatter of the markdown file, all code blocks in the file will be skipped. + +- Skipping specific code blocks: Setting `pytest_codeblocks_skip="true"` in the header of the code block will skip it: + + ```` + ```python pytest_codeblocks_skip="true" + + print("test") + + ``` + ```` + +- Executing all previous code blocks: By setting `pytest_codeblocks_execute_previous: true` in the frontmatter of the markdown file, all code blocks in the file will be executed before the current code block. This is useful if you have multiple code blocks that depend on variables defined in a previous code block for example. diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/__init__.py b/apps/opik-documentation/documentation/pytest_codeblocks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/environment.py b/apps/opik-documentation/documentation/pytest_codeblocks/environment.py new file mode 100644 index 0000000000..53c1404f16 --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/environment.py @@ -0,0 +1,36 @@ +from typing import Optional, List +import os +import tempfile +import subprocess +import venv +from datetime import datetime + + +def setup_env(packages: Optional[List[str]] = None): + """ + Create a virtual environment and install required packages. + + Args: + packages: Optional list of packages to install in the environment + """ + # Create a virtual environment + env_path = os.path.join( + tempfile.gettempdir(), f"venv_{datetime.now().strftime('%Y%m%d%H%M%S')}" + ) + venv.create(env_path, with_pip=True, clear=True) + + # Get paths to executables + if os.name == "nt": # Windows + python_path = os.path.join(env_path, "Scripts", "python.exe") + pip_path = os.path.join(env_path, "Scripts", "pip.exe") + else: # Unix-like + python_path = os.path.join(env_path, "bin", "python") + pip_path = os.path.join(env_path, "bin", "pip") + + # Install required packages + if packages: + subprocess.run( + [pip_path, "install"] + packages, capture_output=True, check=True + ) + + return env_path, python_path, pip_path diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/__init__.py b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/__init__.py new file mode 100644 index 0000000000..dcd9188288 --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/__init__.py @@ -0,0 +1,7 @@ +from .bash_evaluator import BashEvaluator +from .python_evaluator import PythonEvaluator + +__all__ = [ + "BashEvaluator", + "PythonEvaluator", +] diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/bash_evaluator.py b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/bash_evaluator.py new file mode 100644 index 0000000000..d935d9053e --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/bash_evaluator.py @@ -0,0 +1,27 @@ +import os +import subprocess + + +class BashEvaluator: + def __init__(self, code, file=None, start_line=None, test=True): + self.code = code + self.file = file + self.start_line = start_line + self.test = test + + def set_env(self, env_path: str, python_path: str, pip_path: str): + self.env_path = env_path + self.python_path = python_path + self.pip_path = pip_path + + def evaluate(self): + env = os.environ.copy() + env.update( + { + "PATH": f"{os.path.dirname(self.pip_path)}:{env.get('PATH', '')}", + "VIRTUAL_ENV": self.env_path, + "PYTHONPATH": os.path.dirname(self.python_path), + } + ) + + subprocess.run(self.code, shell=True, env=env) diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py new file mode 100644 index 0000000000..0f2c69e800 --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py @@ -0,0 +1,39 @@ +import os +import subprocess +import tempfile + + +class PythonEvaluator: + def __init__(self, code, start_line=None, history=None): + self.code = code + self.start_line = start_line + self.history = history or [] + + def set_env(self, env_path: str, python_path: str, pip_path: str): + self.env_path = env_path + self.python_path = python_path + self.pip_path = pip_path + + def evaluate(self): + # Run the code in a subprocess + with tempfile.TemporaryDirectory() as temp_dir: + script_path = os.path.join(temp_dir, "script.py") + with open(script_path, "w") as f: + f.write("\n".join([*self.history, self.code])) + + env = os.environ.copy() + env.update( + { + "PATH": f"{os.path.dirname(self.pip_path)}:{env.get('PATH', '')}", + "VIRTUAL_ENV": self.env_path, + "PYTHONPATH": os.path.dirname(self.python_path), + } + ) + + subprocess.run( + [self.python_path, script_path], + capture_output=True, + text=True, + env=env, + check=True, + ) diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py b/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py new file mode 100644 index 0000000000..48effdc887 --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py @@ -0,0 +1,114 @@ +from mrkdwn_analysis import MarkdownAnalyzer +import logging +from . import evaluators +from typing import List, Union + +LOGGER = logging.getLogger(__name__) + + +def _get_code_block_language(language: str): + """ + This method extracts the language of the code block based on the string that is + after ``` in each code block. + """ + params = language.split(" ") + if (len(params) == 1) and params[0] == "": + return None + else: + return params[0] + + +def _reindent_code_block(code_block): + first_line = code_block.split("\n")[0] + leading_spaces = len(first_line) - len(first_line.lstrip()) + + return "\n".join([x[leading_spaces:] for x in code_block.split("\n")]) + + +def get_page_frontmatter(path): + headers = {} + with open(path, "r") as f: + lines = f.read().split("\n") + if lines[0] != "---": + return headers + else: + for line in lines[1:]: + if line.startswith("---"): + break + if ":" in line: + k, v = line.split(":") + v = v.strip() + k = k.strip() + if v == "true" or v == "True": + v = True + if v == "false" or v == "False": + v = False + headers[k] = v + return headers + + +def check_skip_code_block(mk_language): + language_params = mk_language.split(" ") + for params in language_params: + if "=" in params: + title, value = params.split("=") + if title == "pytest_codeblocks_skip" and ( + value.strip() == "true" or value.strip() == "True" + ): + return True + + return False + + +def check_skip_frontmatter(path): + frontmatter = get_page_frontmatter(path) + return frontmatter.get("pytest_codeblocks_skip", False) + + +def get_code_blocs( + path: str, +) -> List[Union[evaluators.PythonEvaluator, evaluators.BashEvaluator]]: + LOGGER.debug(f"Finding code blocks in {path}") + + if check_skip_frontmatter(path): + LOGGER.debug(f"Skipping {path} because test_code_snippets is set to false") + return [] + + page_frontmatter = get_page_frontmatter(path) + code_blocks = [] + markdown = MarkdownAnalyzer(path) + mrkdwn_analysis_code_blocks = markdown.identify_code_blocks().get("Code block", []) + for i, mk_code_block in enumerate(mrkdwn_analysis_code_blocks): + language = _get_code_block_language(mk_code_block["language"]) + start_line = mk_code_block["start_line"] + + if language not in ["bash", "python"]: + LOGGER.debug( + f"Skipping code block in {path}:{start_line} because language '{language}' is not supported." + ) + continue + + if check_skip_code_block(mk_code_block["language"]): + LOGGER.debug( + f"Skipping code block in {path}:{start_line} because test is set to false." + ) + continue + + code_str = _reindent_code_block(mk_code_block["content"]) + if language == "python": + if page_frontmatter.get("pytest_codeblocks_execute_previous", False): + history = [x["content"] for x in mrkdwn_analysis_code_blocks[:i]] + else: + history = [] + + code_blocks.append( + evaluators.PythonEvaluator(code_str, start_line, history=history) + ) + elif language == "bash": + code_blocks.append( + evaluators.BashEvaluator(code=code_str, start_line=start_line) + ) + + LOGGER.debug(f"Found {len(code_blocks)} code blocks to test in {path}") + + return code_blocks diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/pytest_integration.py b/apps/opik-documentation/documentation/pytest_codeblocks/pytest_integration.py new file mode 100644 index 0000000000..3de19c177f --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/pytest_integration.py @@ -0,0 +1,94 @@ +import pytest +import logging +from pytest import Collector, ExceptionInfo, Module, Session +from _pytest._code.code import TerminalRepr +from typing import Union, Optional, Iterator, Any +from pathlib import Path +import os +from . import evaluators, reporting, parsing_utils, environment + +LOGGER = logging.getLogger(__name__) + + +class OpikDocsTestFile(pytest.File): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.venv_path = None + self.venv_python = None + self.venv_pip = None + + @classmethod + def from_parent( + cls, parent: Session, path: Path, **kwargs: Any + ) -> "OpikDocsTestFile": + item = super().from_parent(parent=parent, path=path, **kwargs) + + return item + + def collect(self) -> Iterator["OpikDocsTestItem"]: + code_blocks = parsing_utils.get_code_blocs(self.path) + + for code_block in code_blocks: + if not self.venv_path: + LOGGER.info("Setting up venv for code snippets in:", self.path) + self.venv_path, self.venv_python, self.venv_pip = environment.setup_env( + ["opik"] + ) + + code_block.set_env( + env_path=self.venv_path, + python_path=self.venv_python, + pip_path=self.venv_pip, + ) + + yield OpikDocsTestItem.from_parent( + parent=self, + name=f"code_block_starting_line_{code_block.start_line}", + test_case=code_block, + ) + + +class OpikDocsTestItem(pytest.Item): + def __init__( + self, + name, + parent, + test_case: Optional[ + Union[evaluators.PythonEvaluator, evaluators.BashEvaluator] + ] = None, + ): + super().__init__(name, parent) + self.test_case = test_case + + @classmethod + def from_parent(cls, parent, name, test_case, **kwargs): + item = super().from_parent(parent=parent, name=name, **kwargs) + item.test_case = test_case + return item + + def runtest(self): + if self.test_case is not None: + self.test_case.evaluate() + + def repr_failure( + self, excinfo: ExceptionInfo[BaseException] + ) -> Union[str, TerminalRepr]: + return reporting.format_error(self.fspath, self.test_case, excinfo) + + +def pytest_collect_file(parent: Collector, file_path: Path) -> Optional[Module]: + """Hook to collect MDX files for testing""" + test_path = parent.config.args[0] if parent.config.args else None + + if test_path: + # Convert both paths to absolute and normalized form for comparison + test_path = os.path.abspath(os.path.normpath(test_path)) + current_path = os.path.abspath(os.path.normpath(str(file_path))) + + # Only collect if this is the specific file being tested + if current_path.startswith(test_path) and file_path.suffix in (".mdx", ".md"): + return OpikDocsTestFile.from_parent(parent=parent, path=file_path) + elif file_path.suffix in (".mdx", ".md"): + # Fallback to old behavior if no specific path provided + return OpikDocsTestFile.from_parent(parent=parent, path=file_path) + return None diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/reporting.py b/apps/opik-documentation/documentation/pytest_codeblocks/reporting.py new file mode 100644 index 0000000000..15fc6862bb --- /dev/null +++ b/apps/opik-documentation/documentation/pytest_codeblocks/reporting.py @@ -0,0 +1,36 @@ +from pytest import ExceptionInfo +from typing import Union +from . import evaluators + + +def format_error( + path: str, + test_case: Union[evaluators.PythonEvaluator, evaluators.BashEvaluator], + excinfo: ExceptionInfo[BaseException], +) -> str: + # Get the error type and message + error_type = excinfo.type.__name__ + + # Get the line number from the traceback + traceback = excinfo.traceback + last_entry = traceback[-1] # Get the last frame in the traceback + error_type = excinfo.type.__name__ + if error_type == "CalledProcessError": + error = excinfo.value + actual_line = 0 + + error_msg = "" + skip = True + for line in error.stderr.split("\n"): + if 'script.py", line' in line: + skip = False + line_part = line.split("line")[1].split(",")[0].strip() + actual_line = test_case.start_line + int(line_part) - 1 + if not skip: + error_msg += "\n" + line + else: + actual_line = test_case.start_line + last_entry.lineno + error_msg = str(excinfo.value) + + # Format like a standard Python error + return f"{path}:{actual_line}: {error_msg}" diff --git a/apps/opik-documentation/documentation/requirements.txt b/apps/opik-documentation/documentation/requirements.txt index 87906ccb40..b920daf862 100644 --- a/apps/opik-documentation/documentation/requirements.txt +++ b/apps/opik-documentation/documentation/requirements.txt @@ -1 +1,4 @@ -jupyter \ No newline at end of file +jupyter +markdown-analysis +pytest-asyncio +pytest-custom_exit_code diff --git a/apps/opik-documentation/documentation/sidebars.ts b/apps/opik-documentation/documentation/sidebars.ts index 4c42861003..7aa40cbcf1 100644 --- a/apps/opik-documentation/documentation/sidebars.ts +++ b/apps/opik-documentation/documentation/sidebars.ts @@ -42,7 +42,6 @@ const sidebars: SidebarsConfig = { "tracing/log_distributed_traces", "tracing/annotate_traces", "tracing/cost_tracking", - "tracing/production_monitoring", "tracing/sdk_configuration", "tracing/export_data", { @@ -58,6 +57,7 @@ const sidebars: SidebarsConfig = { "tracing/integrations/anthropic", "tracing/integrations/bedrock", "tracing/integrations/dify", + "tracing/integrations/dspy", "tracing/integrations/gemini", "tracing/integrations/groq", "tracing/integrations/haystack", @@ -116,7 +116,7 @@ const sidebars: SidebarsConfig = { type: "category", label: "Production", collapsed: true, - items: ["production/gateway"], + items: ["production/gateway", "production/production_monitoring"], }, { type: "category", @@ -130,6 +130,7 @@ const sidebars: SidebarsConfig = { "cookbook/aisuite", "cookbook/anthropic", "cookbook/bedrock", + "cookbook/dspy", "cookbook/gemini", "cookbook/groq", "cookbook/haystack", diff --git a/apps/opik-documentation/documentation/static/img/cookbook/dspy_trace_cookbook.png b/apps/opik-documentation/documentation/static/img/cookbook/dspy_trace_cookbook.png new file mode 100644 index 0000000000..97c966f34e Binary files /dev/null and b/apps/opik-documentation/documentation/static/img/cookbook/dspy_trace_cookbook.png differ diff --git a/apps/opik-documentation/python-sdk-docs/requirements.txt b/apps/opik-documentation/python-sdk-docs/requirements.txt index 525ad61969..33b9883f2d 100644 --- a/apps/opik-documentation/python-sdk-docs/requirements.txt +++ b/apps/opik-documentation/python-sdk-docs/requirements.txt @@ -10,3 +10,5 @@ langchain_core botocore anthropic haystack-ai +dspy-ai +cloudpickle diff --git a/apps/opik-documentation/python-sdk-docs/source/index.rst b/apps/opik-documentation/python-sdk-docs/source/index.rst index 9c07d8d9ad..c9dbcc4a7d 100644 --- a/apps/opik-documentation/python-sdk-docs/source/index.rst +++ b/apps/opik-documentation/python-sdk-docs/source/index.rst @@ -165,12 +165,13 @@ You can learn more about the `opik` python SDK in the following sections: :caption: Integrations :maxdepth: 1 - integrations/openai/index integrations/anthropic/index - integrations/langchain/index - integrations/haystack/index integrations/bedrock/index + integrations/dspy/index + integrations/haystack/index + integrations/langchain/index integrations/llama_index/index + integrations/openai/index .. toctree:: :caption: Evaluation diff --git a/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/OpikCallback.rst b/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/OpikCallback.rst new file mode 100644 index 0000000000..f758737690 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/OpikCallback.rst @@ -0,0 +1,5 @@ +OpikCallback +============ + +.. autoclass:: opik.integrations.dspy.OpikCallback + :members: diff --git a/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/index.rst b/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/index.rst new file mode 100644 index 0000000000..ece283efbd --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/integrations/dspy/index.rst @@ -0,0 +1,32 @@ +DSPy +==== + +Opik integrates with DSPy to allow you to log your DSPy runs to the Opik platform:: + + import dspy + from opik.integrations.dspy.callback import OpikCallback + + project_name = "DSPY" + + lm = dspy.LM( + model="openai/gpt-4o-mini", + ) + dspy.configure(lm=lm) + + + opik_callback = OpikCallback(project_name=project_name) + dspy.settings.configure( + callbacks=[opik_callback], + ) + + cot = dspy.ChainOfThought("question -> answer") + cot(question="What is the meaning of life?") + + +You can learn more about the `OpikCallback` in the following section: + +.. toctree:: + :maxdepth: 4 + :titlesonly: + + OpikCallback diff --git a/apps/opik-frontend/src/api/datasets/useDatasetCreateMutation.ts b/apps/opik-frontend/src/api/datasets/useDatasetCreateMutation.ts index 165d1399a7..74c4649cf1 100644 --- a/apps/opik-frontend/src/api/datasets/useDatasetCreateMutation.ts +++ b/apps/opik-frontend/src/api/datasets/useDatasetCreateMutation.ts @@ -1,11 +1,10 @@ import { useMutation, useQueryClient } from "@tanstack/react-query"; import { AxiosError } from "axios"; import get from "lodash/get"; -import last from "lodash/last"; - import api, { DATASETS_REST_ENDPOINT } from "@/api/api"; import { Dataset } from "@/types/datasets"; import { useToast } from "@/components/ui/use-toast"; +import { extractIdFromLocation } from "@/lib/utils"; type UseDatasetCreateMutationParams = { dataset: Partial; @@ -31,7 +30,7 @@ const useDatasetCreateMutation = () => { ? data : { ...dataset, - id: last(headers?.location?.split("/")), + id: extractIdFromLocation(headers?.location), }; }, onMutate: async (params: UseDatasetCreateMutationParams) => { diff --git a/apps/opik-frontend/src/api/projects/useProjectCreateMutation.ts b/apps/opik-frontend/src/api/projects/useProjectCreateMutation.ts index b44bf6982b..a3d85c8892 100644 --- a/apps/opik-frontend/src/api/projects/useProjectCreateMutation.ts +++ b/apps/opik-frontend/src/api/projects/useProjectCreateMutation.ts @@ -4,6 +4,7 @@ import api, { PROJECTS_REST_ENDPOINT } from "@/api/api"; import { Project } from "@/types/projects"; import { AxiosError } from "axios"; import { useToast } from "@/components/ui/use-toast"; +import { extractIdFromLocation } from "@/lib/utils"; type UseProjectCreateMutationParams = { project: Partial; @@ -15,10 +16,14 @@ const useProjectCreateMutation = () => { return useMutation({ mutationFn: async ({ project }: UseProjectCreateMutationParams) => { - const { data } = await api.post(PROJECTS_REST_ENDPOINT, { + const { headers } = await api.post(PROJECTS_REST_ENDPOINT, { ...project, }); - return data; + + // TODO workaround to return just created resource while implementation on BE is not done + const id = extractIdFromLocation(headers?.location); + + return { id }; }, onError: (error: AxiosError) => { const message = get( diff --git a/apps/opik-frontend/src/components/pages/ProjectsPage/AddEditProjectDialog.tsx b/apps/opik-frontend/src/components/pages/ProjectsPage/AddEditProjectDialog.tsx index bd62d26720..44da48d9fd 100644 --- a/apps/opik-frontend/src/components/pages/ProjectsPage/AddEditProjectDialog.tsx +++ b/apps/opik-frontend/src/components/pages/ProjectsPage/AddEditProjectDialog.tsx @@ -14,6 +14,8 @@ import useProjectCreateMutation from "@/api/projects/useProjectCreateMutation"; import { Project } from "@/types/projects"; import { Textarea } from "@/components/ui/textarea"; import useProjectUpdateMutation from "@/api/projects/useProjectUpdateMutation"; +import { useNavigate } from "@tanstack/react-router"; +import useAppStore from "@/store/AppStore"; type AddEditProjectDialogProps = { project?: Project; @@ -26,6 +28,9 @@ const AddEditProjectDialog: React.FC = ({ open, setOpen, }) => { + const navigate = useNavigate(); + const workspaceName = useAppStore((state) => state.activeWorkspaceName); + const { mutate: createMutate } = useProjectCreateMutation(); const { mutate: updateMutate } = useProjectUpdateMutation(); const [name, setName] = useState(project ? project.name : ""); @@ -47,14 +52,38 @@ const AddEditProjectDialog: React.FC = ({ }, }); } else { - createMutate({ - project: { - name, - ...(description && { description }), + createMutate( + { + project: { + name, + ...(description && { description }), + }, }, - }); + { + onSuccess(projectData) { + if (!projectData.id) return; + + navigate({ + to: "/$workspaceName/projects/$projectId/traces", + params: { + projectId: projectData.id, + workspaceName, + }, + }); + }, + }, + ); } - }, [createMutate, description, isEdit, name, project, updateMutate]); + }, [ + createMutate, + description, + isEdit, + name, + navigate, + project, + updateMutate, + workspaceName, + ]); return ( diff --git a/apps/opik-frontend/src/components/shared/TraceDetailsPanel/TraceTreeViewer/treeRenderers.tsx b/apps/opik-frontend/src/components/shared/TraceDetailsPanel/TraceTreeViewer/treeRenderers.tsx index 762f56853d..09aa217a54 100644 --- a/apps/opik-frontend/src/components/shared/TraceDetailsPanel/TraceTreeViewer/treeRenderers.tsx +++ b/apps/opik-frontend/src/components/shared/TraceDetailsPanel/TraceTreeViewer/treeRenderers.tsx @@ -79,6 +79,11 @@ export const treeRenderers: TreeRenderProps = { props.context.isFocused && styles.focused, )} {...(props.context.itemContainerWithoutChildrenProps as object)} + onClick={() => { + if (props.context.interactiveElementProps.onFocus) { + props.context.focusItem(); + } + }} onFocus={props.context.interactiveElementProps.onFocus} > diff --git a/apps/opik-frontend/src/lib/utils.ts b/apps/opik-frontend/src/lib/utils.ts index 154fa9ea9f..faa4e5bea5 100644 --- a/apps/opik-frontend/src/lib/utils.ts +++ b/apps/opik-frontend/src/lib/utils.ts @@ -7,6 +7,7 @@ import sample from "lodash/sample"; import mapKeys from "lodash/mapKeys"; import snakeCase from "lodash/snakeCase"; import { DEFAULT_WORKSPACE_NAME } from "@/constants/user"; +import { last } from "lodash"; const BASE_DOCUMENTATION_URL = "https://www.comet.com/docs/opik"; @@ -81,3 +82,6 @@ export const calculateWorkspaceName = ( workspaceName: string, defaultName = "Personal", ) => (workspaceName === DEFAULT_WORKSPACE_NAME ? defaultName : workspaceName); + +export const extractIdFromLocation = (location: string) => + last(location?.split("/")); diff --git a/deployment/docker-compose/README.md b/deployment/docker-compose/README.md index 4409e7bd3f..42a5d09ee4 100644 --- a/deployment/docker-compose/README.md +++ b/deployment/docker-compose/README.md @@ -43,14 +43,13 @@ Run the following command to start the services and expose the ports: docker compose -f docker-compose.yaml -f docker-compose.override.yaml up -d ``` -This will expose the following services to the host machine - -- Redis: Available on port 6379 -- ClickHouse: Available on ports 8123 (HTTP) and 9000 (Native Protocol) -- MySQL: Available on port 3306 -- Backend: Available on ports 8080 and 3003 - +This will expose the following services to the host machine: +- Redis: Available on port 6379. +- ClickHouse: Available on ports 8123 (HTTP) and 9000 (Native Protocol). +- MySQL: Available on port 3306. +- Backend: Available on ports 8080 (HTTP) and 3003 (OpenAPI specification). +- Frontend: Available on port 5173. ## Stop opik diff --git a/deployment/docker-compose/docker-compose.override.yaml b/deployment/docker-compose/docker-compose.override.yaml index 656ee239ea..b8f6ecbc5d 100644 --- a/deployment/docker-compose/docker-compose.override.yaml +++ b/deployment/docker-compose/docker-compose.override.yaml @@ -15,8 +15,8 @@ services: backend: ports: - "8080:8080" # Exposing backend HTTP port to host - - "3003:3003" # Exposing additional backend port to host + - "3003:3003" # Exposing backend OpenAPI specification port to host frontend: ports: - - "5173:5173" # Exposing frontend dev server port to host + - "5173:5173" # Exposing frontend server port to host diff --git a/deployment/docker-compose/docker-compose.yaml b/deployment/docker-compose/docker-compose.yaml index f58d6acba2..e66f4252f3 100644 --- a/deployment/docker-compose/docker-compose.yaml +++ b/deployment/docker-compose/docker-compose.yaml @@ -14,8 +14,6 @@ services: timeout: 1s interval: 1s retries: 300 - ports: - - "3306" volumes: - mysql:/var/lib/mysql/:type=volume,source=~/opik/mysql @@ -23,8 +21,6 @@ services: image: redis:7.2.4-alpine3.19 hostname: redis command: redis-server --requirepass opik - ports: - - '6379' healthcheck: test: [ "CMD", "nc", "-z", "localhost", "6379" ] interval: 2s @@ -42,9 +38,6 @@ services: # Enables SQL-driven Access Control and Account Management: # https://clickhouse.com/docs/en/operations/access-rights#enabling-access-control CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 - ports: - - "8123" # HTTP default port - - "9000" # Native Protocol port volumes: - clickhouse:/var/lib/clickhouse/:type=volume,source=~/opik/clickhouse/data - clickhouse-server:/var/log/clickhouse-server/:type=volume,source=~/opik/clickhouse/logs @@ -90,8 +83,7 @@ services: OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE: delta OPIK_USAGE_REPORT_ENABLED: ${OPIK_USAGE_REPORT_ENABLED:-true} ports: - - "8080" - - "3003" + - "3003" # OpenAPI specification port depends_on: mysql: condition: service_healthy @@ -105,7 +97,7 @@ services: dockerfile: Dockerfile hostname: frontend ports: - - "5173:5173" + - "5173:5173" # Frontend server port extra_hosts: - "apihost:host-gateway" volumes: diff --git a/scripts/README.md b/scripts/README.md index dc2ee38031..ad79192699 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -7,3 +7,28 @@ Scripts in this folder are meant to be run from the repository base folder. Exam ```bash ./scripts/generate_openapi.sh ``` + +## Scripts list + +### `generate_openapi.sh` + +Use this script to generate an updated OpenAPI specification file for the documentation application and the SDKs, +and also to build the SDKs autogenerated code, for any supported language by Fern. + +You require to install Fern to run this script. + +See: + +- https://buildwithfern.com/ + +### `start_openapi_server.sh` + +Use this script to start a local server with an updated OpenAPI specification file, to be able to test the specification +quickly. + +Open the server in your browser at http://localhost:3003/ + +See: + +- https://github.com/Redocly/redoc +- https://docs.oracle.com/en/java/javase/23/docs/specs/man/jwebserver.html diff --git a/scripts/start_openapi_server.sh b/scripts/start_openapi_server.sh new file mode 100755 index 0000000000..54829c4392 --- /dev/null +++ b/scripts/start_openapi_server.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e + +REDOC_RELATIVE_PATH="apps/opik-backend/redoc" + +# Generate openapi.yaml +cd apps/opik-backend +mvn compile swagger:resolve +cd - + +# Copy openapi.yaml for Redoc +cp apps/opik-backend/target/openapi.yaml $REDOC_RELATIVE_PATH + +# Resolve the absolute path of the Redoc directory, as jwebserver doesn't work with relative paths +cd $REDOC_RELATIVE_PATH +REDOC_ABSOLUTE_PATH=$(pwd) +cd - + +# Start the Redoc server +jwebserver -d "$REDOC_ABSOLUTE_PATH" -b 0.0.0.0 -p 3003 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 918cce3fdd..fecdf2d7f7 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -56,9 +56,10 @@ ], "console_scripts": ["opik = opik.cli:cli"], }, - include_package_data=True, keywords="opik", name="opik", + include_package_data=True, + package_data={"opik": ["py.typed"]}, packages=find_packages("src"), package_dir={"": "src"}, url="https://www.comet.com", diff --git a/sdks/python/src/opik/py.typed b/sdks/python/src/opik/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests_end_to_end/page_objects/PromptPage.py b/tests_end_to_end/page_objects/PromptPage.py index a2f0d208ff..7a3e277a06 100644 --- a/tests_end_to_end/page_objects/PromptPage.py +++ b/tests_end_to_end/page_objects/PromptPage.py @@ -12,7 +12,7 @@ def edit_prompt(self, new_prompt: str): self.page.get_by_role("button", name="Edit prompt").click() self.page.get_by_role("textbox", name="Prompt").click() self.page.get_by_role("textbox", name="Prompt").fill(new_prompt) - self.page.get_by_role("button", name="Edit prompt").click() + self.page.get_by_role("button", name="Create new commit").click() def click_most_recent_commit(self): self.page.get_by_role("tab", name="Commits").click() diff --git a/version.txt b/version.txt index d0149fef74..80e78df683 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.3.4 +1.3.5