Arize-ai · anticorrelator · Feb 3, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/src/phoenix/server/api/helpers/dataset_helpers.py b/src/phoenix/server/api/helpers/dataset_helpers.py
@@ -7,6 +7,7 @@
     OpenInferenceMimeTypeValues,
     OpenInferenceSpanKindValues,
     SpanAttributes,
+    ToolAttributes,
     ToolCallAttributes,
 )
 
@@ -27,12 +28,18 @@ def get_dataset_example_input(span: Span) -> dict[str, Any]:
     input_mime_type = get_attribute_value(attributes, INPUT_MIME_TYPE)
     prompt_template_variables = get_attribute_value(attributes, LLM_PROMPT_TEMPLATE_VARIABLES)
     input_messages = get_attribute_value(attributes, LLM_INPUT_MESSAGES)
+    tool_definitions = []
+    if tools := get_attribute_value(attributes, LLM_TOOLS):
+        for tool in tools:
+            if definition := get_attribute_value(tool, TOOL_DEFINITION):
+                tool_definitions.append(definition)
     if span_kind == LLM:
         return _get_llm_span_input(
             input_messages=input_messages,
             input_value=input_value,
             input_mime_type=input_mime_type,
             prompt_template_variables=prompt_template_variables,
+            tool_definitions=tool_definitions,
         )
     return _get_generic_io_value(io_value=input_value, mime_type=input_mime_type, kind="input")
 
@@ -71,6 +78,7 @@ def _get_llm_span_input(
     input_value: Any,
     input_mime_type: Optional[str],
     prompt_template_variables: Any,
+    tool_definitions: Any,
 ) -> dict[str, Any]:
     """
     Extracts the input value from an LLM span and returns it as a dictionary.
@@ -84,6 +92,10 @@ def _get_llm_span_input(
         input = _get_generic_io_value(io_value=input_value, mime_type=input_mime_type, kind="input")
     if prompt_template_variables_data := _safely_json_decode(prompt_template_variables):
         input["prompt_template_variables"] = prompt_template_variables_data
+    if tool_definitions_data := [
+        _safely_json_decode(tool_definition) for tool_definition in tool_definitions
+    ]:
+        input["tool_definitions"] = tool_definitions_data
-        input["tool_definitions"] = tool_definitions_data
+        input["tools"] = tool_definitions_data
-        input["tool_definitions"] = tool_definitions_data
+        input["tools"] = tool_definitions_data
     return input
 
 
@@ -215,3 +227,7 @@ def _safely_json_decode(value: Any) -> Any:
 # ToolCallAttributes
 TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
 TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
+
+# ToolAttributes
+LLM_TOOLS = SpanAttributes.LLM_TOOLS
+TOOL_DEFINITION = ToolAttributes.TOOL_JSON_SCHEMA
diff --git a/src/phoenix/server/api/helpers/playground_clients.py b/src/phoenix/server/api/helpers/playground_clients.py
@@ -795,7 +795,7 @@ async def chat_completion_create(
                 elif isinstance(event, anthropic_streaming.InputJsonEvent):
                     raise NotImplementedError
                 else:
-                    assert_never(event)
+                    assert_never(event)  # type: ignore
 
     def _build_anthropic_messages(
         self,

diff --git a/src/phoenix/server/api/mutations/chat_mutations.py b/src/phoenix/server/api/mutations/chat_mutations.py
@@ -49,7 +49,6 @@
 from phoenix.server.api.input_types.TemplateOptions import TemplateOptions
 from phoenix.server.api.subscriptions import (
     _default_playground_experiment_description,
-    _default_playground_experiment_metadata,
     _default_playground_experiment_name,
 )
 from phoenix.server.api.types.ChatCompletionMessageRole import ChatCompletionMessageRole
@@ -183,12 +182,7 @@ async def chat_completion_over_dataset(
                 description=input.experiment_description
                 or _default_playground_experiment_description(dataset_name=dataset.name),
                 repetitions=1,
-                metadata_=input.experiment_metadata
-                or _default_playground_experiment_metadata(
-                    dataset_name=dataset.name,
-                    dataset_id=input.dataset_id,
-                    version_id=GlobalID(DatasetVersion.__name__, str(resolved_version_id)),
-                ),
+                metadata_=input.experiment_metadata or dict(),
                 project_name=PLAYGROUND_PROJECT_NAME,
             )
             session.add(experiment)

diff --git a/src/phoenix/server/api/mutations/dataset_mutations.py b/src/phoenix/server/api/mutations/dataset_mutations.py
@@ -4,7 +4,11 @@
 
 import strawberry
 from openinference.semconv.trace import (
+    MessageAttributes,
+    MessageContentAttributes,
     SpanAttributes,
+    ToolAttributes,
+    ToolCallAttributes,
 )
 from sqlalchemy import and_, delete, distinct, func, insert, select, update
 from strawberry import UNSET
@@ -181,6 +185,17 @@ async def add_spans_to_dataset(
             assert all(map(lambda id: isinstance(id, int), dataset_example_rowids))
             DatasetExampleRevision = models.DatasetExampleRevision
 
+            all_span_attributes = {
+                **SpanAttributes.__dict__,
+                **MessageAttributes.__dict__,
+                **MessageContentAttributes.__dict__,
+                **ToolCallAttributes.__dict__,
+                **ToolAttributes.__dict__,
+            }
+            nonprivate_span_attributes = {
+                k: v for k, v in all_span_attributes.items() if not k.startswith("_")
+            }
+
             await session.execute(
                 insert(DatasetExampleRevision),
                 [
@@ -190,6 +205,12 @@ async def add_spans_to_dataset(
                         DatasetExampleRevision.input.key: get_dataset_example_input(span),
                         DatasetExampleRevision.output.key: get_dataset_example_output(span),
                         DatasetExampleRevision.metadata_.key: {
+                            **(span.attributes.get(SpanAttributes.METADATA) or dict()),
+                            **{
+                                k: v
+                                for k, v in span.attributes.items()
+                                if k in nonprivate_span_attributes
+                            },
                             "span_kind": span.span_kind,
                             **(
                                 {"annotations": annotations}

diff --git a/src/phoenix/server/api/subscriptions.py b/src/phoenix/server/api/subscriptions.py
@@ -278,12 +278,7 @@ async def chat_completion_over_dataset(
                 description=input.experiment_description
                 or _default_playground_experiment_description(dataset_name=dataset.name),
                 repetitions=1,
-                metadata_=input.experiment_metadata
-                or _default_playground_experiment_metadata(
-                    dataset_name=dataset.name,
-                    dataset_id=input.dataset_id,
-                    version_id=GlobalID(DatasetVersion.__name__, str(resolved_version_id)),
-                ),
+                metadata_=input.experiment_metadata or dict(),
                 project_name=PLAYGROUND_PROJECT_NAME,
             )
             session.add(experiment)
@@ -581,16 +576,6 @@ def _default_playground_experiment_description(dataset_name: str) -> str:
     return f'Playground experiment for dataset "{dataset_name}"'
 
 
-def _default_playground_experiment_metadata(
-    dataset_name: str, dataset_id: GlobalID, version_id: GlobalID
-) -> dict[str, Any]:
-    return {
-        "dataset_name": dataset_name,
-        "dataset_id": str(dataset_id),
-        "dataset_version_id": str(version_id),
-    }
-
-
 LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
 LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
 LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT

diff --git a/tests/unit/server/api/test_subscriptions.py b/tests/unit/server/api/test_subscriptions.py
@@ -1206,12 +1206,8 @@ async def test_emits_expected_payloads_and_records_expected_spans_and_experiment
         assert experiment.pop("name") == "playground-experiment"
         assert isinstance(experiment_description := experiment.pop("description"), str)
         assert "dataset-name" in experiment_description
-        assert experiment.pop("metadata") == {
-            "dataset_name": "dataset-name",
-            "dataset_id": str(dataset_id),
-            "dataset_version_id": str(version_id),
-        }
         assert experiment.pop("projectName") == "playground"
+        assert experiment.pop("metadata") == {}
         assert isinstance(created_at := experiment.pop("createdAt"), str)
         assert isinstance(updated_at := experiment.pop("updatedAt"), str)
         assert created_at == updated_at