From e53d35d2b66a1d8ee0874d7074d74a16376503a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacques=20Verr=C3=A9?= Date: Mon, 4 Nov 2024 15:39:42 +0100 Subject: [PATCH] Introduce Opik.search_spans (#545) * Added Opik.search_spans method --- .../documentation/docs/tracing/export_data.md | 168 ++++++++++++++++++ .../docs/tracing/export_traces.md | 111 ------------ .../documentation/sidebars.ts | 2 +- .../examples/search_traces_and_spans.py | 10 ++ .../src/opik/api_objects/opik_client.py | 42 ++++- sdks/python/tests/e2e/test_tracing.py | 55 ++++++ 6 files changed, 275 insertions(+), 113 deletions(-) create mode 100644 apps/opik-documentation/documentation/docs/tracing/export_data.md delete mode 100644 apps/opik-documentation/documentation/docs/tracing/export_traces.md create mode 100644 sdks/python/examples/search_traces_and_spans.py diff --git a/apps/opik-documentation/documentation/docs/tracing/export_data.md b/apps/opik-documentation/documentation/docs/tracing/export_data.md new file mode 100644 index 0000000000..665c4b9a5d --- /dev/null +++ b/apps/opik-documentation/documentation/docs/tracing/export_data.md @@ -0,0 +1,168 @@ +--- +sidebar_label: Export Traces and Spans +toc_max_heading_level: 4 +--- + +# Exporting Traces and Spans + +When working with Opik, it is important to be able to export traces and spans so that you can use them to fine-tune your models or run deeper analysis. + +You can export the traces you have logged to the Opik platform using: + +1. Using the Opik SDK: You can use the [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) and [`Opik.search_spans`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_spans) methods to export traces and spans. +2. Using the Opik REST API: You can use the [`/traces`](/reference/rest_api/get-traces-by-project.api.mdx) and [`/spans`](/reference/rest_api/get-spans-by-project.api.mdx) endpoints to export traces and spans. +3. Using the UI: Once you have selected the traces or spans you want to export, you can click on the `Export CSV` button in the `Actions` dropdown. + +:::tip +The recommended way to export traces is to use the [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) and [`Opik.search_spans`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_spans) methods in the Opik SDK. +::: + +## Using the Opik SDK + +### Exporting traces + +The [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) method allows you to both export all the traces in a project or search for specific traces and export them. + +#### Exporting all traces + +To export all traces, you will need to specify a `max_results` value that is higher than the total number of traces in your project: + +```python +import opik + +client = opik.Opik() + +traces = client.search_traces(project_name="Default project", max_results=1000000) +``` + +#### Search for specific traces + +You can use the `filter_string` parameter to search for specific traces: + +```python +import opik + +client = opik.Opik() + +traces = client.search_traces( + project_name="Default project", + filter_string='input contains "Opik"' +) + +# Convert to Dict if required +traces = [trace.dict() for trace in traces] +``` + +The `filter_string` parameter should follow the format ` ` with: + +1. ``: The column to filter on, these can be: + - `name` + - `input` + - `output` + - `start_time` + - `end_time` + - `metadata` + - `feedback_score` + - `tags` + - `usage.total_tokens` + - `usage.prompt_tokens` + - `usage.completion_tokens`. +2. ``: The operator to use for the filter, this can be `=`, `!=`, `>`, `>=`, `<`, `<=`, `contains`, `not_contains`. Not that not all operators are supported for all columns. +3. ``: The value to filter on. If you are filtering on a string, you will need to wrap it in double quotes. + +Here are some additional examples of valid `filter_string` values: + +```python +import opik + +client = opik.Opik( + project_name="Default project" +) + +# Search for traces where the input contains text +traces = client.search_traces( + filter_string='input contains "Opik"' +) + +# Search for traces that were logged after a specific date +traces = client.search_traces(filter_string='start_time >= "2024-01-01T00:00:00Z"') + +# Search for traces that have a specific tag +traces = client.search_traces(filter_string='tags contains "production"') + +# Search for traces based on the number of tokens used +traces = client.search_traces(filter_string='usage.total_tokens > 1000') + +# Search for traces based on the model used +traces = client.search_traces(filter_string='metadata.model = "gpt-4o"') +``` + +### Exporting spans + +You can export spans using the [`Opik.search_spans`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_spans) method. This methods allows you to search for spans based on `trace_id` or based on a filter string. + +#### Exporting spans based on `trace_id` + +To export all the spans associated with a specific trace, you can use the `trace_id` parameter: + +```python +import opik + +client = opik.Opik() + +spans = client.search_spans( + project_name="Default project", + trace_id="067092dc-e639-73ff-8000-e1c40172450f" +) +``` + +#### Search for specific spans + +You can use the `filter_string` parameter to search for specific spans: + +```python +import opik + +client = opik.Opik() + +spans = client.search_spans( + project_name="Default project", + filter_string='input contains "Opik"' +) +``` + +:::tip +The `filter_string` parameter should follow the same format as the `filter_string` parameter in the `Opik.search_traces` method as [defined above](#search-for-specific-traces). +::: + +## Using the Opik REST API + +To export traces using the Opik REST API, you can use the [`/traces`](/reference/rest_api/get-traces-by-project.api.mdx) endpoint and the [`/spans`](/reference/rest_api/get-spans-by-project.api.mdx) endpoint. These endpoints are paginated so you will need to make multiple requests to retrieve all the traces or spans you want. + +To search for specific traces or spans, you can use the `filter` parameter. While this is a string parameter, it does not follow the same format as the `filter_string` parameter in the Opik SDK. Instead it is a list of json objects with the following format: + +```json +[ + { + "field": "name", + "type": "string", + "operator": "=", + "value": "Opik" + } +] +``` + +:::warning +The `filter` parameter was designed to be used with the Opik UI and has therefore limited flexibility. If you need more flexibility, +please raise an issue on [GitHub](https://github.com/comet-ml/opik/issues) so we can help. +::: + +## Using the UI + +To export traces as a CSV file from the UI, you can simply select the traces or spans you wish to export and click on `Export CSV` in the `Actions` dropdown: + +![Export CSV](/img/tracing/download_traces.png) + +:::tip +The UI only allows you to export up to 100 traces or spans at a time as it is linked to the page size of the traces table. If you need to export more traces or spans, we recommend using the Opik SDK. +::: diff --git a/apps/opik-documentation/documentation/docs/tracing/export_traces.md b/apps/opik-documentation/documentation/docs/tracing/export_traces.md deleted file mode 100644 index 3fb5259b8f..0000000000 --- a/apps/opik-documentation/documentation/docs/tracing/export_traces.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -sidebar_label: Export Traces ---- - -# Exporting Traces - -You can export the traces you have logged to the Opik platform using: - -1. Using the Opik SDK: You can use the [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) method to export traces. -2. Using the Opik REST API: You can use the [`/traces`](/reference/rest_api/get-traces-by-project.api.mdx) endpoint to export traces. -3. Using the UI: Once you have selected the traces you want to export, you can click on the `Export CSV` button in the `Actions` dropdown. - -:::tip -The recommended way to export traces is to use the [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) method in the Opik SDK. -::: - -## Using the Opik SDK - -The [`Opik.search_traces`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.search_traces) method allows you to both export all the traces in a project or search for specific traces and export them. - -### Exporting all traces - -To export all traces, you will need to specify a `max_results` value that is higher than the total number of traces in your project: - -```python -import opik - -client = opik.Opik() - -traces = client.search_traces(project_name="Default project", max_results=1000000) -``` - -### Search for specific traces - -You can use the `filter_string` parameter to search for specific traces: - -```python -import opik - -client = opik.Opik() - -traces = client.search_traces(project_name="Default project", filter_string='input contains "Opik"') - -# Convert to Dict if required -traces = [trace.dict() for trace in traces] -``` - -The `filter_string` parameter should follow the format ` ` with: - -1. ``: The column to filter on, these can be: - - `name` - - `input` - - `output` - - `start_time` - - `end_time` - - `metadata` - - `feedback_score` - - `tags` - - `usage.total_tokens` - - `usage.prompt_tokens` - - `usage.completion_tokens`. -2. ``: The operator to use for the filter, this can be `=`, `!=`, `>`, `>=`, `<`, `<=`, `contains`, `not_contains`. Not that not all operators are supported for all columns. -3. ``: The value to filter on. If you are filtering on a string, you will need to wrap it in double quotes. - -Here are some additional examples of valid `filter_string` values: - -```python -import opik - -client = opik.Opik( - project_name="Default project" -) - -traces = client.search_traces(filter_string='input contains "Opik"') -traces = client.search_traces(filter_string='start_time >= "2024-01-01T00:00:00Z"') -traces = client.search_traces(filter_string='tags contains "production"') -traces = client.search_traces(filter_string='usage.total_tokens > 1000') -traces = client.search_traces(filter_string='metadata.model = "gpt-4o"') -``` - -## Using the Opik REST API - -To export traces using the Opik REST API, you can use the [`/traces`](/reference/rest_api/get-traces-by-project.api.mdx) endpoint. This endpoint is paginated so you will need to make multiple requests to retrieve all the traces you want. - -To search for specific traces, you can use the `filter` parameter. While this is a string parameter, it does not follow the same format as the `filter_string` parameter in the Opik SDK. Instead it is a list of json objects with the following format: - -```json -[ - { - "field": "name", - "type": "string", - "operator": "=", - "value": "Opik" - } -] -``` - -:::warning -The `filter` parameter was designed to be used with the Opik UI and is therefore not very flexible. If you need more flexibility, -please raise an issue on [GitHub](https://github.com/comet-ml/opik/issues) so we can help. -::: - -## Using the UI - -To export traces as a CSV file from the UI, you can simply select the traces you wish to export and click on `Export CSV` in the `Actions` dropdown: - -![Export CSV](/img/tracing/download_traces.png) - -:::tip -The UI only allows you to export up to 100 traces at a time as it is linked to the page size of the traces table. If you need to export more traces, we recommend using the Opik SDK. -::: diff --git a/apps/opik-documentation/documentation/sidebars.ts b/apps/opik-documentation/documentation/sidebars.ts index 30e2a77b41..95715c5169 100644 --- a/apps/opik-documentation/documentation/sidebars.ts +++ b/apps/opik-documentation/documentation/sidebars.ts @@ -31,7 +31,7 @@ const sidebars: SidebarsConfig = { "tracing/log_distributed_traces", "tracing/annotate_traces", "tracing/sdk_configuration", - "tracing/export_traces", + "tracing/export_data", { type: "category", label: "Integrations", diff --git a/sdks/python/examples/search_traces_and_spans.py b/sdks/python/examples/search_traces_and_spans.py new file mode 100644 index 0000000000..c98896825e --- /dev/null +++ b/sdks/python/examples/search_traces_and_spans.py @@ -0,0 +1,10 @@ +import opik + +opik_client = opik.Opik() + +spans = opik_client.search_spans( + project_name="Demo Project", + filter_string='input contains "How many unique albums"', +) + +print(spans) diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py index 941ab49299..7a65ed3551 100644 --- a/sdks/python/src/opik/api_objects/opik_client.py +++ b/sdks/python/src/opik/api_objects/opik_client.py @@ -505,7 +505,7 @@ def search_traces( Search for traces in the given project. Args: - project_name: The name of the project to search traces in. If not provided the project name configured when the Client was created will be used. + project_name: The name of the project to search traces in. If not provided, will search across the project name configured when the Client was created which defaults to the `Default Project`. filter_string: A filter string to narrow down the search. If not provided, all traces in the project will be returned up to the limit. max_results: The maximum number of traces to return. """ @@ -532,6 +532,46 @@ def search_traces( return traces[:max_results] + def search_spans( + self, + project_name: Optional[str] = None, + trace_id: Optional[str] = None, + filter_string: Optional[str] = None, + max_results: int = 1000, + ) -> List[span_public.SpanPublic]: + """ + Search for spans in the given trace. This allows you to search spans based on the span input, output, + metadata, tags, etc or based on the trace ID. + + Args: + project_name: The name of the project to search spans in. If not provided, will search across the project name configured when the Client was created which defaults to the `Default Project`. + trace_id: The ID of the trace to search spans in. If provided, the search will be limited to the spans in the given trace. + filter_string: A filter string to narrow down the search. + max_results: The maximum number of spans to return. + """ + page_size = 200 + spans: List[span_public.SpanPublic] = [] + + filters = opik_query_language.OpikQueryLanguage(filter_string).parsed_filters + + page = 1 + while len(spans) < max_results: + page_spans = self._rest_client.spans.get_spans_by_project( + project_name=project_name or self._project_name, + trace_id=trace_id, + filters=filters, + page=page, + size=page_size, + ) + + if len(page_spans.content) == 0: + break + + spans.extend(page_spans.content) + page += 1 + + return spans[:max_results] + def get_trace_content(self, id: str) -> trace_public.TracePublic: """ Args: diff --git a/sdks/python/tests/e2e/test_tracing.py b/sdks/python/tests/e2e/test_tracing.py index a2e9687cda..d258f98cc6 100644 --- a/sdks/python/tests/e2e/test_tracing.py +++ b/sdks/python/tests/e2e/test_tracing.py @@ -3,6 +3,7 @@ import opik from opik import opik_context +from opik.api_objects import helpers from . import verifiers from .conftest import OPIK_E2E_TESTS_PROJECT_NAME @@ -259,3 +260,57 @@ def test_search_traces__happyflow(opik_client): output={"output": "trace-output"}, project_name=OPIK_E2E_TESTS_PROJECT_NAME, ) + + +def test_search_spans__happyflow(opik_client): + # In order to define a unique search query, we will create a unique identifier that will be part of the input of the trace + trace_id = helpers.generate_id() + unique_identifier = str(uuid.uuid4())[-6:] + + filter_string = f'input contains "{unique_identifier}"' + + # Send a trace that matches the input filter + trace = opik_client.trace( + id=trace_id, + name="trace-name", + input={"input": "Some random input"}, + output={"output": "trace-output"}, + project_name=OPIK_E2E_TESTS_PROJECT_NAME, + ) + matching_span = trace.span( + name="span-name", + input={"input": f"Some random input - {unique_identifier}"}, + output={"output": "span-output"}, + ) + trace.span( + name="span-name", + input={"input": "Some random input"}, + output={"output": "span-output"}, + ) + + # Send a trace that does not match the input filter + trace = opik_client.trace( + id=trace_id, + name="trace-name", + input={"input": "Some random input"}, + output={"output": "trace-output"}, + project_name=OPIK_E2E_TESTS_PROJECT_NAME, + ) + trace.span( + name="span-name", + input={"input": "Some random input"}, + output={"output": "span-output"}, + ) + + opik_client.flush() + + # Search for the traces - Note that we use a large max_results to ensure that we get all traces, if the project has more than 100000 matching traces it is possible + spans = opik_client.search_spans( + project_name=OPIK_E2E_TESTS_PROJECT_NAME, + trace_id=trace_id, + filter_string=filter_string, + ) + + # Verify that the matching trace is returned + assert len(spans) == 1, "Expected to find 1 matching span" + assert spans[0].id == matching_span.id, "Expected to find the matching span"