diff --git a/.github/workflows/documentation_codeblock_tests.yml b/.github/workflows/documentation_codeblock_tests.yml
index 679f240291..acb4fa23ea 100644
--- a/.github/workflows/documentation_codeblock_tests.yml
+++ b/.github/workflows/documentation_codeblock_tests.yml
@@ -24,20 +24,20 @@ jobs:
# Get list of changed files in docs directory
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
# For pull requests, compare with base branch
- echo "paths=$(
- git diff --name-only origin/${{ github.base_ref }} |
- grep -E '^apps/opik-documentation/documentation/docs/.*\.(md|mdx)$' |
- sed 's|apps/opik-documentation/documentation/||' |
- jq -R -s -c 'split("\n")[:-1]'
- )" >> $GITHUB_OUTPUT
+ CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }} | grep -E '^apps/opik-documentation/documentation/fern/docs/.*\.(md|mdx)$' || true)
+ if [ -n "$CHANGED_FILES" ]; then
+ echo "paths=$(echo "$CHANGED_FILES" | sed 's|apps/opik-documentation/documentation/||' | jq -R -s 'split("\n")[:-1]' -c)" >> $GITHUB_OUTPUT
+ else
+ echo "paths=[]" >> $GITHUB_OUTPUT
+ fi
else
- # For manual runs and scheduled runs, check all files
- echo "paths=$(
- (
- ls -d docs/*/ 2>/dev/null;
- find docs -maxdepth 1 -type f -name "*.md" -o -name "*.mdx"
- ) | jq -R -s -c 'split("\n")[:-1]'
- )" >> $GITHUB_OUTPUT
+ # For manual runs, get all md/mdx files
+ FILES=$(find fern/docs -type f \( -name "*.md" -o -name "*.mdx" \))
+ if [ -n "$FILES" ]; then
+ echo "paths=$(echo "$FILES" | jq -R -s 'split("\n")[:-1]' -c)" >> $GITHUB_OUTPUT
+ else
+ echo "paths=[]" >> $GITHUB_OUTPUT
+ fi
fi
test:
diff --git a/apps/opik-documentation/documentation/docs/cookbook/instructor.ipynb b/apps/opik-documentation/documentation/docs/cookbook/instructor.ipynb
index 7512964544..fc50a66c27 100644
--- a/apps/opik-documentation/documentation/docs/cookbook/instructor.ipynb
+++ b/apps/opik-documentation/documentation/docs/cookbook/instructor.ipynb
@@ -26,7 +26,7 @@
"metadata": {},
"outputs": [],
"source": [
- "%pip install --upgrade --quiet opik instructor"
+ "%pip install --upgrade --quiet opik instructor anthropic google-generativeai google-genai"
]
},
{
diff --git a/apps/opik-documentation/documentation/fern/docs.yml b/apps/opik-documentation/documentation/fern/docs.yml
index 12dc4694c9..aad4200362 100644
--- a/apps/opik-documentation/documentation/fern/docs.yml
+++ b/apps/opik-documentation/documentation/fern/docs.yml
@@ -78,6 +78,9 @@ navigation:
- page: Log traces
path: docs/tracing/log_traces.mdx
slug: log_traces
+ - page: Log conversations
+ path: docs/tracing/log_chat_conversations.mdx
+ slug: log_chat_conversations
- page: Log agents
path: docs/tracing/log_agents.mdx
slug: log_agents
diff --git a/apps/opik-documentation/documentation/fern/docs/changelog/2025-03-03.mdx b/apps/opik-documentation/documentation/fern/docs/changelog/2025-03-03.mdx
new file mode 100644
index 0000000000..6a988bc99e
--- /dev/null
+++ b/apps/opik-documentation/documentation/fern/docs/changelog/2025-03-03.mdx
@@ -0,0 +1,18 @@
+**Opik Dashboard**:
+
+- Chat conversations can now be reviewed in the platform
+
+
+
+
+
+- Added the ability to leave comments on experiments
+- You can now leave reasons on feedback scores, see [Annotating Traces](/tracing/annotate_traces)
+- Added support for Gemini in the playground
+- A thumbs up / down feedback score definition is now added to all projects by default to make it easier
+ to annotate traces.
+
+**JS / TS SDK**:
+
+- The AnswerRelevanceMetric can now be run without providing a context field
+- Made some updates to how metrics are uploaded to optimize data ingestion
diff --git a/apps/opik-documentation/documentation/fern/docs/cookbook/anthropic.mdx b/apps/opik-documentation/documentation/fern/docs/cookbook/anthropic.mdx
index d9bdfd2446..a58446090a 100644
--- a/apps/opik-documentation/documentation/fern/docs/cookbook/anthropic.mdx
+++ b/apps/opik-documentation/documentation/fern/docs/cookbook/anthropic.mdx
@@ -46,7 +46,9 @@ import os
from opik.integrations.anthropic import track_anthropic
anthropic_client = anthropic.Anthropic()
-anthropic_client = track_anthropic(anthropic_client, project_name="anthropic-integration-demo")
+anthropic_client = track_anthropic(
+ anthropic_client, project_name="anthropic-integration-demo"
+)
```
diff --git a/apps/opik-documentation/documentation/fern/docs/cookbook/gemini.mdx b/apps/opik-documentation/documentation/fern/docs/cookbook/gemini.mdx
index 6761faf80e..9391522dcd 100644
--- a/apps/opik-documentation/documentation/fern/docs/cookbook/gemini.mdx
+++ b/apps/opik-documentation/documentation/fern/docs/cookbook/gemini.mdx
@@ -10,7 +10,7 @@ Opik integrates with Gemini to provide a simple way to log traces for all Gemini
```python
-%pip install --upgrade opik google-generativeai litellm
+%pip install --upgrade opik google-genai litellm
```
@@ -28,45 +28,34 @@ First, we will set up our OpenAI API keys.
```python
import os
import getpass
-import google.generativeai as genai
-if "GEMINI_API_KEY" not in os.environ:
- genai.configure(api_key=getpass.getpass("Enter your Gemini API key: "))
+if "GOOGLE_API_KEY" not in os.environ:
+ os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Gemini API key: ")
```
-## Configure LiteLLM
+## Logging traces
-Add the LiteLLM OpikTracker to log traces and steps to Opik:
+Now each completion will logs a separate trace to LiteLLM:
```python
-import litellm
-import os
-from litellm.integrations.opik.opik import OpikLogger
+from google import genai
from opik import track
-from opik.opik_context import get_current_span_data
+from opik.integrations.genai import track_genai
os.environ["OPIK_PROJECT_NAME"] = "gemini-integration-demo"
-opik_logger = OpikLogger()
-litellm.callbacks = [opik_logger]
-```
-
-## Logging traces
-
-Now each completion will logs a separate trace to LiteLLM:
+client = genai.Client()
+gemini_client = track_genai(client)
-```python
prompt = """
Write a short two sentence story about Opik.
"""
-response = litellm.completion(
- model="gemini/gemini-pro",
- messages=[{"role": "user", "content": prompt}],
+response = gemini_client.models.generate_content(
+ model="gemini-2.0-flash-001", contents=prompt
)
-
-print(response.choices[0].message.content)
+print(response.text)
```
The prompt and response messages are automatically logged to Opik and can be viewed in the UI.
@@ -81,31 +70,19 @@ If you have multiple steps in your LLM pipeline, you can use the `track` decorat
```python
@track
def generate_story(prompt):
- response = litellm.completion(
- model="gemini/gemini-pro",
- messages=[{"role": "user", "content": prompt}],
- metadata={
- "opik": {
- "current_span_data": get_current_span_data(),
- },
- },
+ response = gemini_client.models.generate_content(
+ model="gemini-2.0-flash-001", contents=prompt
)
- return response.choices[0].message.content
+ return response.text
@track
def generate_topic():
prompt = "Generate a topic for a story about Opik."
- response = litellm.completion(
- model="gemini/gemini-pro",
- messages=[{"role": "user", "content": prompt}],
- metadata={
- "opik": {
- "current_span_data": get_current_span_data(),
- },
- },
+ response = gemini_client.models.generate_content(
+ model="gemini-2.0-flash-001", contents=prompt
)
- return response.choices[0].message.content
+ return response.text
@track
@@ -121,5 +98,3 @@ generate_opik_story()
The trace can now be viewed in the UI:

-
-
diff --git a/apps/opik-documentation/documentation/fern/docs/cookbook/instructor.mdx b/apps/opik-documentation/documentation/fern/docs/cookbook/instructor.mdx
index 372468ac99..75f3d3fb87 100644
--- a/apps/opik-documentation/documentation/fern/docs/cookbook/instructor.mdx
+++ b/apps/opik-documentation/documentation/fern/docs/cookbook/instructor.mdx
@@ -10,7 +10,7 @@
```python
-%pip install --upgrade --quiet opik instructor
+%pip install --upgrade --quiet opik instructor anthropic google-generativeai google-genai
```
diff --git a/apps/opik-documentation/documentation/fern/docs/cookbook/quickstart_notebook.mdx b/apps/opik-documentation/documentation/fern/docs/cookbook/quickstart_notebook.mdx
index 3725159027..b164510cc8 100644
--- a/apps/opik-documentation/documentation/fern/docs/cookbook/quickstart_notebook.mdx
+++ b/apps/opik-documentation/documentation/fern/docs/cookbook/quickstart_notebook.mdx
@@ -433,8 +433,6 @@ We can now use the `evaluate` method to evaluate the summaries in our dataset:
```python
from opik.evaluation import evaluate
-os.environ["OPIK_PROJECT_NAME"] = "summary-evaluation-prompts"
-
MODEL = "gpt-4o-mini"
DENSITY_ITERATIONS = 2
@@ -490,8 +488,6 @@ Guidelines:
```python
from opik.evaluation import evaluate
-os.environ["OPIK_PROJECT_NAME"] = "summary-evaluation-prompts"
-
MODEL = "gpt-4o-mini"
DENSITY_ITERATIONS = 2
diff --git a/apps/opik-documentation/documentation/fern/docs/tracing/annotate_traces.mdx b/apps/opik-documentation/documentation/fern/docs/tracing/annotate_traces.mdx
index 6c966b191b..085d02fcb0 100644
--- a/apps/opik-documentation/documentation/fern/docs/tracing/annotate_traces.mdx
+++ b/apps/opik-documentation/documentation/fern/docs/tracing/annotate_traces.mdx
@@ -1,4 +1,5 @@
-Annotating traces is a crucial aspect of evaluating and improving your LLM-based applications. By systematically recording qualitative or quantitative feedback on specific interactions or entire conversation flows, you can:
+Annotating traces is a crucial aspect of evaluating and improving your LLM-based applications. By systematically recording qualitative or quantitative
+feedback on specific interactions or entire conversation flows, you can:
1. Track performance over time
2. Identify areas for improvement
@@ -10,7 +11,8 @@ Opik allows you to annotate traces through the SDK or the UI.
## Annotating Traces through the UI
-To annotate traces through the UI, you can navigate to the trace you want to annotate in the traces page and click on the `Annotate` button. This will open a sidebar where you can add annotations to the trace.
+To annotate traces through the UI, you can navigate to the trace you want to annotate in the traces page and click on the `Annotate` button.
+This will open a sidebar where you can add annotations to the trace.
You can annotate both traces and spans through the UI, make sure you have selected the correct span in the sidebar.
@@ -19,10 +21,12 @@ You can annotate both traces and spans through the UI, make sure you have select
- In order to ensure a consistent set of feedback, you will need to define feedback definitions in the `Feedback
- Definitions` page which supports both numerical and categorical annotations.
+ Once a feedback scores has been provided, you can also add a reason to explain why this particular score was provided.
+ This is useful to add additional context to the score.
+You can also add comments to traces and experiments to share insights with other team members.
+
## Online evaluation
You don't need to manually annotate each trace to measure the performance of your LLM applications! By using Opik's [online evaluation feature](/production/rules), you can define LLM as a Judge metrics that will automatically score all, or a subset, of your production traces.
@@ -77,9 +81,10 @@ client.log_spans_feedback_scores(
)
```
-:::note
-The `FeedbackScoreDict` class supports an optional `reason` field that can be used to provide a human-readable explanation for the feedback score.
-:::
+
+ The `FeedbackScoreDict` class supports an optional `reason` field that can be used to provide a human-readable
+ explanation for the feedback score.
+
### Using Opik's built-in evaluation metrics
@@ -90,7 +95,7 @@ Opik's built-in evaluation metrics are broken down into two main categories:
1. Heuristic metrics
2. LLM as a judge metrics
-### Heuristic Metrics
+#### Heuristic Metrics
Heuristic metrics are use rule-based or statistical methods that can be used to evaluate the output of LLM models.
@@ -118,7 +123,7 @@ score = metric.score(
)
```
-### LLM as a Judge Metrics
+#### LLM as a Judge Metrics
For LLM outputs that cannot be evaluated using heuristic metrics, you can use LLM as a judge metrics. These metrics are based on the idea of using an LLM to evaluate the output of another LLM.
diff --git a/apps/opik-documentation/documentation/fern/docs/tracing/log_chat_conversations.mdx b/apps/opik-documentation/documentation/fern/docs/tracing/log_chat_conversations.mdx
new file mode 100644
index 0000000000..e683fc5417
--- /dev/null
+++ b/apps/opik-documentation/documentation/fern/docs/tracing/log_chat_conversations.mdx
@@ -0,0 +1,78 @@
+You can log chat conversations to the Opik platform and track the full conversations
+your users are having with your chatbot.
+
+
+
+
+
+## Logging conversations
+
+You can log chat conversations by specifying the `thread_id` parameter when using either the low level SDK or
+Python decorators:
+
+
+
+ ```python
+ import opik
+ from opik import opik_context
+
+ @opik.track
+ def chat_message(input, thread_id):
+ opik_context.update_current_trace(
+ thread_id=thread_id
+ )
+ return "Opik is an Open Source GenAI platform"
+
+ thread_id = "f174a"
+ chat_message("What is Opik ?", thread_id)
+ chat_message("Repeat the previous message", thread_id)
+ ```
+
+
+ ```python
+ import opik
+
+ opik_client = opik.Opik()
+
+ thread_id = "55d84"
+
+ # Log a first message
+ trace = opik_client.trace(
+ name="chat_conversation",
+ input="What is Opik?",
+ output="Opik is an Open Source GenAI platform",
+ thread_id=thread_id
+ )
+
+ # Log a second message
+ trace = opik_client.trace(
+ name="chat_conversation",
+ input="Can you track chat conversations in Opik",
+ output="Yes, of course !",
+ thread_id=thread_id
+ )
+ ```
+
+
+
+
+
+ The input to each trace will be displayed as the user message while the output will be displayed as the AI assistant
+ response.
+
+
+## Reviewing conversations
+
+Conversations can be viewed at a project level in the `threads` tab. All conversations are tracked and by clicking on the thread ID you will be able to
+view the full conversation.
+
+The thread view supports markdown making it easier for you to review the content that was returned to the user. If you would like to dig in deeper, you
+can click on the `View trace` button to deepdive into how the AI assistant response was generated.
+
+By clicking on the thumbs up or thumbs down icons, you can quickly rate the AI assistant response. This feedback score will be logged and associated to
+the relevant trace. By switching to the trace view, you can review the full trace as well as add additional feedback scores through the annotation
+functionality.
+
+
+
+
diff --git a/apps/opik-documentation/documentation/fern/img/changelog/2025-03-03/chat_conversations.png b/apps/opik-documentation/documentation/fern/img/changelog/2025-03-03/chat_conversations.png
new file mode 100644
index 0000000000..cfa2057123
Binary files /dev/null and b/apps/opik-documentation/documentation/fern/img/changelog/2025-03-03/chat_conversations.png differ
diff --git a/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations.png b/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations.png
new file mode 100644
index 0000000000..cfa2057123
Binary files /dev/null and b/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations.png differ
diff --git a/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations_actions.png b/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations_actions.png
new file mode 100644
index 0000000000..3f5c053d91
Binary files /dev/null and b/apps/opik-documentation/documentation/fern/img/tracing/chat_conversations_actions.png differ
diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py
index fcdab829e4..0088b02636 100644
--- a/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py
+++ b/apps/opik-documentation/documentation/pytest_codeblocks/evaluators/python_evaluator.py
@@ -1,6 +1,9 @@
import os
import subprocess
import tempfile
+import logging
+
+LOGGER = logging.getLogger(__name__)
class PythonEvaluator:
@@ -19,8 +22,16 @@ def evaluate(self):
# Run the code in a subprocess
with tempfile.TemporaryDirectory() as temp_dir:
script_path = os.path.join(temp_dir, "script.py")
+
+ python_history = [
+ x["content"] for x in self.history if x["language"] == "python"
+ ]
+ bash_history = [
+ x["content"] for x in self.history if x["language"] == "bash"
+ ]
+
with open(script_path, "w") as f:
- f.write("\n".join([*self.history, self.code]))
+ f.write("\n".join([*python_history, self.code]))
env = os.environ.copy()
env.update(
@@ -32,6 +43,9 @@ def evaluate(self):
)
try:
+ for bash_command in bash_history:
+ subprocess.run(bash_command, shell=True, env=env)
+
subprocess.run(
[self.python_path, script_path],
capture_output=True,
diff --git a/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py b/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py
index 68c278b0d3..89245a14c7 100644
--- a/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py
+++ b/apps/opik-documentation/documentation/pytest_codeblocks/parsing_utils.py
@@ -66,6 +66,14 @@ def check_skip_frontmatter(path):
return frontmatter.get("pytest_codeblocks_skip", False)
+def convert_jupyter_pip_install_to_bash(code_block):
+ if "%pip install" in code_block["content"]:
+ code_block["language"] = "bash"
+ code_block["content"] = code_block["content"].replace("%pip", "pip")
+
+ return code_block
+
+
def get_code_blocs(
path: str,
) -> List[Union[evaluators.PythonEvaluator, evaluators.BashEvaluator]]:
@@ -76,9 +84,16 @@ def get_code_blocs(
return []
page_frontmatter = get_page_frontmatter(path)
+ is_cookbook = "/cookbook/" in str(path)
+
code_blocks = []
markdown = MarkdownAnalyzer(path)
mrkdwn_analysis_code_blocks = markdown.identify_code_blocks().get("Code block", [])
+ mrkdwn_analysis_code_blocks = [
+ convert_jupyter_pip_install_to_bash(code_block)
+ for code_block in mrkdwn_analysis_code_blocks
+ ]
+
for i, mk_code_block in enumerate(mrkdwn_analysis_code_blocks):
language = _get_code_block_language(mk_code_block["language"])
start_line = mk_code_block["start_line"]
@@ -97,8 +112,11 @@ def get_code_blocs(
code_str = _reindent_code_block(mk_code_block["content"])
if language == "python":
- if page_frontmatter.get("pytest_codeblocks_execute_previous", False):
- history = [x["content"] for x in mrkdwn_analysis_code_blocks[:i]]
+ if (
+ page_frontmatter.get("pytest_codeblocks_execute_previous", False)
+ or is_cookbook
+ ):
+ history = [x for x in mrkdwn_analysis_code_blocks[:i]]
else:
history = []