From 5dc6d384b6b769cda58b2da85a52ccf44769e3b2 Mon Sep 17 00:00:00 2001
From: Stefan Krawczyk <stefan@dagworks.io>
Date: Thu, 4 Jul 2024 15:25:55 -0700
Subject: [PATCH 1/6] Sketches Schema UI view

This is hacky UI code. It does not handle "run ids".

1. Adds to pandas & pyspark schema capture using `h_schema.py`.
2. Adds SchemaView and related types to try to show a table...

So it's either we've set something up with too many things, or this
is just a factor of using typescript... Basically I dont like how many
things I had to add to do this..

Otherwise it's non-obvious to me what the pattern should be to
handle comparing runs -- this UI change for the schema table
doesn't take that into account -- it should probably use Generic Table
but I couldn't wrap my head around it.

Otherwise open question whether we reuse the h_schema pyarrow
stuff, or just roll our own again...
---
 .../result-summaries/DataObservability.tsx    | 19 ++++-
 .../Runs/Task/result-summaries/SchemaView.tsx | 82 +++++++++++++++++++
 ui/frontend/src/state/api/backendApiRaw.ts    | 11 +++
 ui/frontend/src/state/api/friendlyApi.ts      |  3 +
 .../src/hamilton_sdk/tracking/pandas_stats.py | 37 +++++++--
 .../hamilton_sdk/tracking/pyspark_stats.py    | 58 +++++++++++--
 6 files changed, 193 insertions(+), 17 deletions(-)
 create mode 100644 ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
index 24fdda202..7682cfe60 100644
--- a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
@@ -5,7 +5,7 @@ import {
   AttributeDict2,
   AttributeHTML1,
   AttributePandasDescribe1,
-  AttributePrimitive1,
+  AttributePrimitive1, AttributeSchema1,
   AttributeUnsupported1,
 } from "../../../../../state/api/backendApiRaw";
 import {
@@ -20,6 +20,7 @@ import { PandasDescribe1View } from "./PandasDescribe";
 import { Dict1View, Dict2View } from "./DictView";
 import { DAGWorksDescribe3View } from "./DAGWorksDescribe";
 import { HTML1View } from "./HTMLView";
+import {Schema1View} from "./SchemaView";
 
 const Primitive1View = (props: {
   taskName: string;
@@ -303,6 +304,22 @@ export const ResultsSummaryView = (props: {
     );
   }
 
+  const schema1View = getNodeRunAttributes<AttributeSchema1>(
+    props.runAttributes,
+    props.runIds,
+    "AttributeSchema1"
+  );
+
+  if (schema1View.length > 0) {
+    allViews.push(
+      <Schema1View
+          attributeSchemas={schema1View.map((i) => i.value)}
+        taskName={props.taskName || ""}
+        runIds={schema1View.map((i) => i.runId)}
+      />
+    );
+  }
+
   const unsupportedViews = getNodeRunAttributes<AttributeUnsupported1>(
     props.runAttributes,
     props.runIds,
diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
new file mode 100644
index 000000000..92a3c05d9
--- /dev/null
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
@@ -0,0 +1,82 @@
+import { AttributeSchema1 } from "../../../../../state/api/friendlyApi";
+import { GenericTable } from "../../../../common/GenericTable";
+import {ColSchema} from "../../../../../state/api/backendApiRaw";
+
+
+// export const Schema1View = (props: {
+//     taskName: string;
+//     runIds: number[];
+//     values: AttributeSchema1[];
+//   }) => {
+//     const columns = [
+//         {
+//             displayName: "ColName",
+//             Render: (item: AttributeSchema1) => <span>{item.name}</span>,
+//         },
+//         {
+//             displayName: "Name",
+//             Render: (item: AttributeSchema1) => <span>{item.name}</span>,
+//         },
+//         {
+//             displayName: "Value",
+//             Render: (item: AttributeSchema1) => <span>{item.value}</span>,
+//         },
+//   ];
+//     const data = props.values.map(value => [value.id, value.name, value.value]);
+//     return (
+// <div>hi</div>
+//     );
+//   };
+
+interface Schema1ViewProps {
+  attributeSchemas: AttributeSchema1[];
+  taskName: string;
+  runIds: number[];
+}
+// export type RunIdToSchema = {
+//   [key: number]: AttributeSchema1;
+// };
+
+// const buildRunIdToAttributeSchemaMapping = (props: Schema1ViewProps): { [key: number]: AttributeSchema1 } => {
+//   const mapping: RunIdToSchema = {};
+//
+//   props.runIds.forEach((runId, index) => {
+//     mapping[runId] = props.attributeSchemas[index];
+//   });
+//
+//   return mapping;
+// };
+
+export const Schema1View: React.FC<Schema1ViewProps> = ({ attributeSchemas, taskName, runIds }) => {
+    // Convert the AttributeSchema1 object into an array of its values
+    // const runIdtoAttributeSchema = buildRunIdToAttributeSchemaMapping({ attributeSchemas, taskName, runIds });
+    const schemaArray = Object.values(attributeSchemas[0]);
+    console.log(attributeSchemas);
+    console.log(runIds);
+
+    // return (
+    //     <GenericTable data={} columns={} dataTypeName={}></GenericTable>
+    // )
+    return (
+        <table>
+            <thead>
+            <tr>
+                <th>Column Name</th>
+                <th>Type</th>
+                <th>Nullable</th>
+                <th>Metadata</th>
+            </tr>
+            </thead>
+            <tbody>
+            {schemaArray.map((col, index) => (
+                <tr key={index}>
+                    <td>{col.name}</td>
+                    <td>{col.type}</td>
+                    <td>{col.nullable}</td>
+                    <td>{JSON.stringify(col.metadata)}</td>
+                </tr>
+            ))}
+            </tbody>
+        </table>
+    );
+}
diff --git a/ui/frontend/src/state/api/backendApiRaw.ts b/ui/frontend/src/state/api/backendApiRaw.ts
index b378f4b06..cb2b64b52 100644
--- a/ui/frontend/src/state/api/backendApiRaw.ts
+++ b/ui/frontend/src/state/api/backendApiRaw.ts
@@ -524,6 +524,16 @@ export type AttributeDagworksDescribe3 = {
 export type AttributeHTML1 = {
   html: string;
 };
+
+export type ColSchema = {
+  name: string;
+  type: string;
+  nullable: boolean;
+  metadata: object;
+};
+export type AttributeSchema1 = {
+  [key: string]: ColSchema;
+};
 export type AllAttributeTypes = {
   documentation_loom__1: AttributeDocumentationLoom1;
   primitive__1: AttributePrimitive1;
@@ -534,6 +544,7 @@ export type AllAttributeTypes = {
   dict__2: AttributeDict2;
   dagworks_describe__3: AttributeDagworksDescribe3;
   html__1: AttributeHTML1;
+  schema__1: AttributeSchema1;
 };
 export type CodeVersionGit1 = {
   git_hash: string;
diff --git a/ui/frontend/src/state/api/friendlyApi.ts b/ui/frontend/src/state/api/friendlyApi.ts
index 0890a1260..c8d4b3297 100644
--- a/ui/frontend/src/state/api/friendlyApi.ts
+++ b/ui/frontend/src/state/api/friendlyApi.ts
@@ -204,6 +204,8 @@ export type AttributeDagworksDescribe3 =
 
 export type AttributeHTML1 = AllAttributeTypes["html__1"];
 
+export type AttributeSchema1 = AllAttributeTypes["schema__1"];
+
 export const nodeAttributeTypeMap = {
   AttributePrimitive1: { version: 1, type: "primitive" },
   AttributeUnsupported1: { version: 1, type: "unsupported" },
@@ -213,6 +215,7 @@ export const nodeAttributeTypeMap = {
   AttributeDict2: { version: 2, type: "dict" },
   AttributeDagworksDescribe3: { version: 3, type: "dagworks_describe" },
   AttributeHTML1: {version: 1, type: "html"},
+  AttributeSchema1: {version: 1, type: "schema"},
 };
 
 export type DAGWorksDescribeColumn =
diff --git a/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
index f190b8b08..4feade7fe 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Union
+from typing import Any, Dict, List, Union
 
 import pandas as pd
 from hamilton_sdk.tracking import pandas_col_stats as pcs
@@ -6,6 +6,11 @@
 
 from hamilton import driver
 
+try:
+    from hamilton.plugins import h_schema
+except (ImportError, ModuleNotFoundError):
+    h_schema = None
+
 """Module that houses functions to compute statistics on pandas series/dataframes.
 Notes:
  - we should assume pandas v1.0+ so that we have a string type
@@ -80,12 +85,30 @@ def execute_col(
 
 
 @stats.compute_stats.register
-def compute_stats_df(result: pd.DataFrame, node_name: str, node_tags: dict) -> Dict[str, Any]:
-    return {
-        "observability_type": "dagworks_describe",
-        "observability_value": _compute_stats(result),
-        "observability_schema_version": "0.0.3",
-    }
+def compute_stats_df(
+    result: pd.DataFrame, node_name: str, node_tags: dict
+) -> List[stats.StatsType]:
+    summary_stats = _compute_stats(result)
+
+    results = [
+        {
+            "observability_type": "dagworks_describe",
+            "observability_value": summary_stats,
+            "observability_schema_version": "0.0.3",
+        },
+    ]
+    if h_schema is not None:
+        schema = h_schema._get_arrow_schema(result)
+        schema.with_metadata(dict(name=node_name))
+        results.append(
+            {
+                "observability_type": "schema",
+                "observability_value": h_schema.pyarrow_schema_to_json(schema),
+                "observability_schema_version": "0.0.1",
+                "name": "Schema",
+            }
+        )
+    return results
 
 
 @stats.compute_stats.register
diff --git a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
index b88f79803..c813ce676 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
@@ -1,8 +1,13 @@
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 import pyspark.sql as ps
 from hamilton_sdk.tracking import stats
 
+try:
+    from hamilton.plugins import h_schema
+except (ImportError, ModuleNotFoundError):
+    h_schema = None
+
 """Module that houses functions to introspect a PySpark dataframe.
 """
 # this is a mapping used in the Backend/UI.
@@ -69,17 +74,52 @@ def _introspect(df: ps.DataFrame) -> Dict[str, Any]:
 
 
 @stats.compute_stats.register
-def compute_stats_psdf(result: ps.DataFrame, node_name: str, node_tags: dict) -> Dict[str, Any]:
+def compute_stats_psdf(
+    result: ps.DataFrame, node_name: str, node_tags: dict
+) -> List[Dict[str, Any]]:
     # TODO: create custom type instead of dict for UI
     o_value = _introspect(result)
-    return {
-        "observability_type": "dict",
-        "observability_value": {
-            "type": str(type(result)),
-            "value": o_value,
+
+    results = [
+        {
+            "observability_type": "dict",
+            "observability_value": {
+                "type": str(type(result)),
+                "value": o_value["columns"],
+            },
+            "observability_schema_version": "0.0.2",
         },
-        "observability_schema_version": "0.0.2",
-    }
+        {
+            "observability_type": "primitive",
+            "observability_value": {
+                "type": str(str),
+                "value": o_value["cost_explain"],
+            },
+            "observability_schema_version": "0.0.1",
+            "name": "Cost Explain",
+        },
+        {
+            "observability_type": "primitive",
+            "observability_value": {
+                "type": str(str),
+                "value": o_value["extended_explain"],
+            },
+            "observability_schema_version": "0.0.1",
+            "name": "Extended Explain",
+        },
+    ]
+    if h_schema is not None:
+        schema = h_schema._get_arrow_schema(result)
+        schema.with_metadata(dict(name=node_name))
+        results.append(
+            {
+                "observability_type": "schema",
+                "observability_value": h_schema.pyarrow_schema_to_json(schema),
+                "observability_schema_version": "0.0.1",
+                "name": "Schema",
+            }
+        )
+    return results
 
 
 if __name__ == "__main__":

From ecc93376068e73b26e89e016f736367d747d02e1 Mon Sep 17 00:00:00 2001
From: elijahbenizzy <elijah_ben_izzy@alumni.brown.edu>
Date: Thu, 11 Jul 2024 17:01:30 -0700
Subject: [PATCH 2/6] Gets the schema view to use GenericGroupedTable

This will make it look like the others, make it expandable, etc...
---
 .../result-summaries/DataObservability.tsx    |   9 +-
 .../Runs/Task/result-summaries/SchemaView.tsx | 204 +++++++++++-------
 2 files changed, 137 insertions(+), 76 deletions(-)

diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
index 7682cfe60..82962faac 100644
--- a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
@@ -5,7 +5,8 @@ import {
   AttributeDict2,
   AttributeHTML1,
   AttributePandasDescribe1,
-  AttributePrimitive1, AttributeSchema1,
+  AttributePrimitive1,
+  AttributeSchema1,
   AttributeUnsupported1,
 } from "../../../../../state/api/backendApiRaw";
 import {
@@ -20,7 +21,7 @@ import { PandasDescribe1View } from "./PandasDescribe";
 import { Dict1View, Dict2View } from "./DictView";
 import { DAGWorksDescribe3View } from "./DAGWorksDescribe";
 import { HTML1View } from "./HTMLView";
-import {Schema1View} from "./SchemaView";
+import { Schema1View } from "./SchemaView";
 
 const Primitive1View = (props: {
   taskName: string;
@@ -313,9 +314,9 @@ export const ResultsSummaryView = (props: {
   if (schema1View.length > 0) {
     allViews.push(
       <Schema1View
-          attributeSchemas={schema1View.map((i) => i.value)}
-        taskName={props.taskName || ""}
+        attributeSchemas={schema1View.map((i) => i.value)}
         runIds={schema1View.map((i) => i.runId)}
+        projectId={props.projectId}
       />
     );
   }
diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
index 92a3c05d9..7e5ee6bf6 100644
--- a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/SchemaView.tsx
@@ -1,82 +1,142 @@
 import { AttributeSchema1 } from "../../../../../state/api/friendlyApi";
-import { GenericTable } from "../../../../common/GenericTable";
-import {ColSchema} from "../../../../../state/api/backendApiRaw";
-
-
-// export const Schema1View = (props: {
-//     taskName: string;
-//     runIds: number[];
-//     values: AttributeSchema1[];
-//   }) => {
-//     const columns = [
-//         {
-//             displayName: "ColName",
-//             Render: (item: AttributeSchema1) => <span>{item.name}</span>,
-//         },
-//         {
-//             displayName: "Name",
-//             Render: (item: AttributeSchema1) => <span>{item.name}</span>,
-//         },
-//         {
-//             displayName: "Value",
-//             Render: (item: AttributeSchema1) => <span>{item.value}</span>,
-//         },
-//   ];
-//     const data = props.values.map(value => [value.id, value.name, value.value]);
-//     return (
-// <div>hi</div>
-//     );
-//   };
+import { GenericGroupedTable } from "../../../../common/GenericTable";
+import { ColSchema } from "../../../../../state/api/backendApiRaw";
+import JsonView from "@uiw/react-json-view";
+import { RunLink } from "../../../../common/CommonLinks";
 
 interface Schema1ViewProps {
   attributeSchemas: AttributeSchema1[];
-  taskName: string;
   runIds: number[];
+  projectId: number;
 }
-// export type RunIdToSchema = {
-//   [key: number]: AttributeSchema1;
-// };
 
-// const buildRunIdToAttributeSchemaMapping = (props: Schema1ViewProps): { [key: number]: AttributeSchema1 } => {
-//   const mapping: RunIdToSchema = {};
-//
-//   props.runIds.forEach((runId, index) => {
-//     mapping[runId] = props.attributeSchemas[index];
-//   });
-//
-//   return mapping;
-// };
+type AttributeColumn = ColSchema & {
+  runId: number;
+  projectId: number;
+};
 
-export const Schema1View: React.FC<Schema1ViewProps> = ({ attributeSchemas, taskName, runIds }) => {
-    // Convert the AttributeSchema1 object into an array of its values
-    // const runIdtoAttributeSchema = buildRunIdToAttributeSchemaMapping({ attributeSchemas, taskName, runIds });
-    const schemaArray = Object.values(attributeSchemas[0]);
-    console.log(attributeSchemas);
-    console.log(runIds);
+export const Schema1View: React.FC<Schema1ViewProps> = ({
+  attributeSchemas,
+  runIds,
+  projectId,
+}) => {
+  const attributeColumns: AttributeColumn[] = [];
 
-    // return (
-    //     <GenericTable data={} columns={} dataTypeName={}></GenericTable>
-    // )
-    return (
-        <table>
-            <thead>
-            <tr>
-                <th>Column Name</th>
-                <th>Type</th>
-                <th>Nullable</th>
-                <th>Metadata</th>
-            </tr>
-            </thead>
-            <tbody>
-            {schemaArray.map((col, index) => (
-                <tr key={index}>
-                    <td>{col.name}</td>
-                    <td>{col.type}</td>
-                    <td>{col.nullable}</td>
-                    <td>{JSON.stringify(col.metadata)}</td>
-                </tr>
+  attributeSchemas.forEach((schema, i) => {
+    const runId = runIds[i];
+    Object.keys(schema).forEach((key) => {
+      attributeColumns.push({
+        runId: runId,
+        projectId: projectId,
+        ...schema[key],
+      });
+    });
+  });
+
+  const columns = [
+    {
+      displayName: "runs",
+      Render: (
+        value: { runId: number }[],
+        isSummaryRow: boolean,
+        isExpanded: boolean
+      ) => {
+        const runIds = value.map((item) => item.runId);
+        if (isSummaryRow && isExpanded) {
+          return <></>;
+        }
+        return (
+          <div className="flex flex-row gap-1">
+            {runIds.map((taskRun, i) => (
+              <RunLink
+                runId={runIds[i]}
+                key={i}
+                projectId={1}
+                highlightedRun={null}
+                setHighlightedRun={() => void 0}
+              />
             ))}
-            </tbody>
-        </table>
-    );
-}
+          </div>
+        );
+      },
+    },
+
+    {
+      displayName: "type",
+      Render: (
+        value: AttributeColumn[],
+        isSummaryRow: boolean,
+        isExpanded: boolean
+      ) => {
+        const toDisplay = Array.from(new Set(value.map((item) => item.type)));
+        console.log(value);
+        return !isSummaryRow || !isExpanded ? (
+          <div className="flex flex-wrap gap-2">
+            {toDisplay.map((item, i) => (
+              <code key={i}>{item}</code>
+            ))}
+          </div>
+        ) : (
+          <></>
+        );
+      },
+    },
+    {
+      displayName: "nullable",
+      Render: (
+        value: AttributeColumn[],
+        isSummaryRow: boolean,
+        isExpanded: boolean
+      ) => {
+        console.log(value);
+        const allExpanded = value.map((item) => item.nullable);
+        console.log(allExpanded);
+
+        return !isSummaryRow || !isExpanded ? (
+          <div className="flex flex-wrap gap-2">
+            {allExpanded.map((item) => (item ? "✓" : "✗"))}
+          </div>
+        ) : (
+          <></>
+        );
+      },
+    },
+    {
+      displayName: "metadata",
+      Render: (
+        value: AttributeColumn[],
+        isSummaryRow: boolean,
+        isExpanded: boolean
+      ) => {
+        const toDisplay = Array.from(
+          new Set(value.map((item) => item.metadata))
+        );
+        console.log(value);
+        return !isSummaryRow || !isExpanded ? (
+          <div className="flex flex-wrap gap-2">
+            {toDisplay.map((item, i) => (
+              <JsonView
+                key={i}
+                value={item}
+                collapsed={2}
+                enableClipboard={false}
+              />
+            ))}
+          </div>
+        ) : (
+          <></>
+        );
+      },
+    },
+  ];
+  return (
+    <div className="py-10">
+      <GenericGroupedTable
+        dataTypeName="column"
+        data={attributeColumns.map((col) => [col.name, col])}
+        // description="Numeric columns in the dataset"
+        columns={columns}
+      />
+    </div>
+  );
+};

From 1c7190cace961e442e719f30aab8d8a2633c441e Mon Sep 17 00:00:00 2001
From: elijahbenizzy <elijah_ben_izzy@alumni.brown.edu>
Date: Thu, 11 Jul 2024 17:09:43 -0700
Subject: [PATCH 3/6] Adds expand/contract of attributes

This allows us to hide attributes now that we have multiple
---
 .../result-summaries/DataObservability.tsx    | 222 +++---------------
 1 file changed, 38 insertions(+), 184 deletions(-)

diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
index 82962faac..5fd29a026 100644
--- a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
@@ -22,6 +22,7 @@ import { Dict1View, Dict2View } from "./DictView";
 import { DAGWorksDescribe3View } from "./DAGWorksDescribe";
 import { HTML1View } from "./HTMLView";
 import { Schema1View } from "./SchemaView";
+import { HiChevronDown, HiChevronUp } from "react-icons/hi";
 
 const Primitive1View = (props: {
   taskName: string;
@@ -149,6 +150,16 @@ export const MultiResultSummaryView = (props: {
   projectId: number;
   runIds: number[];
 }) => {
+  const [minimizedAttributes, setMinimizedAttributes] = useState<string[]>([]);
+  const toggleExpanded = (attributeName: string) => {
+    if (minimizedAttributes.includes(attributeName)) {
+      setMinimizedAttributes(
+        minimizedAttributes.filter((i) => i !== attributeName)
+      );
+    } else {
+      setMinimizedAttributes([...minimizedAttributes, attributeName]);
+    }
+  };
   const attributes = props.nodeRunData.flatMap((i) => i?.attributes || []);
   const attributesGroupedByName = attributes.reduce((acc, item) => {
     if (acc[item.name]) {
@@ -160,19 +171,33 @@ export const MultiResultSummaryView = (props: {
   }, {} as { [key: string]: NodeRunAttribute[] });
   return (
     <div className="flex flex-col gap-2">
-      {Object.entries(attributesGroupedByName).map(([key, value]) => (
-        <div key={key}>
-          <h2 className="text-lg font-semibold text-gray-800">
-            {snakeToTitle(key)}
-          </h2>
-          <ResultsSummaryView
-            runAttributes={value}
-            taskName={props.taskName}
-            projectId={props.projectId}
-            runIds={props.runIds}
-          />
-        </div>
-      ))}
+      {Object.entries(attributesGroupedByName).map(([key, value]) => {
+        const isExpanded = !minimizedAttributes.includes(key);
+        const Icon = isExpanded ? HiChevronUp : HiChevronDown;
+        return (
+          <div key={key}>
+            <div className="flex flex-row gap-2">
+              <h2 className="text-lg font-semibold text-gray-800">
+                {snakeToTitle(key)}
+              </h2>
+              <Icon
+                className="hover:scale-125 hover:cursor-pointer text-2xl text-gray-400 mr-1"
+                onClick={() => {
+                  toggleExpanded(key);
+                }}
+              />
+            </div>
+            {isExpanded && (
+              <ResultsSummaryView
+                runAttributes={value}
+                taskName={props.taskName}
+                projectId={props.projectId}
+                runIds={props.runIds}
+              />
+            )}
+          </div>
+        );
+      })}
     </div>
   );
 };
@@ -344,175 +369,4 @@ export const ResultsSummaryView = (props: {
       })}
     </div>
   );
-
-  // const task_name: string = props.taskName ? props.taskName : "?";
-  // // so yeah this code is messy - we should really massage things into a dict of run_id -> specific type...
-  // // else we're assuming that the order of run ids and results matches.
-  // const {
-  //   observability_value,
-  //   observability_type,
-  //   observability_schema_version,
-  // } = resultSummariesFiltered[0] as ResultsSummary;
-  // if (
-  //   observability_type === "pandas_describe" &&
-  //   observability_schema_version === "0.0.1"
-  // ) {
-  //   return (
-  //     <PandasDescribeV001View
-  //       // results={props.resultsSummaries.map(item => item as PandasDescribeV0_0_1)}
-  //       projectId={props.projectId}
-  //       runIds={props.runs.map((run) => run.id as number)}
-  //       results={resultSummariesFiltered.map(
-  //         (item) => item.observability_value as PandasDescribeV0_0_1
-  //       )}
-  //     />
-  //   );
-  // } else if (
-  //   observability_type === "primitive" &&
-  //   observability_schema_version === "0.0.1"
-  // ) {
-  //   type PrimitiveType = {
-  //     type: string;
-  //     value: string;
-  //   };
-  //   // for each observability_value in resultSummariesFiltered, cast it to PrimitiveType
-  //   // and put it into values
-  //   const values = resultSummariesFiltered.map((item) => {
-  //     return { runId: item.runId, ...item.observability_value };
-  //   }) as ({ runId: number } & PrimitiveType)[];
-  //   return (
-  //     <div className="m-8">
-  //       <GenericTable
-  //         data={values.map((item) => {
-  //           return [item.runId?.toString() || "", item];
-  //         })}
-  //         columns={[
-  //           {
-  //             displayName: "type",
-  //             Render: (item: PrimitiveType) => {
-  //               return (
-  //                 <div className="flex flex-col">
-  //                   <code className="text-sm">
-  //                     {parsePythonType({ typeName: item.type })}
-  //                   </code>
-  //                 </div>
-  //               );
-  //             },
-  //           },
-  //           {
-  //             displayName: "value",
-  //             Render: (item: PrimitiveType) => {
-  //               const [expanded, setExpanded] = useState(false);
-  //               return (
-  //                 <div className="flex flex-col w-144">
-  //                   <pre
-  //                     onClick={(e) => {
-  //                       setExpanded(!expanded);
-  //                       e.stopPropagation();
-  //                     }}
-  //                     className={`${
-  //                       expanded ? "break-word whitespace-pre-wrap" : "truncate"
-  //                     }  text-gray-500 cursor-cell`}
-  //                   >
-  //                     {item.value.toString()}
-  //                   </pre>
-  //                   {/* <code className="text-sm whitespace-pre-wrap">
-  //                     {item.value.toString()}
-  //                   </code> */}
-  //                 </div>
-  //               );
-  //             },
-  //           },
-  //         ]}
-  //         dataTypeName={"Run"}
-  //         dataTypeDisplay={(item: string) => {
-  //           return (
-  //             <RunLink
-  //               projectId={props.projectId}
-  //               runId={parseInt(item) as number}
-  //               setHighlightedRun={() => void 0}
-  //               highlightedRun={null}
-  //             ></RunLink>
-  //           );
-  //         }}
-  //       />
-  //     </div>
-  //   );
-  // } else if (
-  //   observability_type === "dict" &&
-  //   observability_schema_version === "0.0.2"
-  // ) {
-  //   return (
-  //     <DictView
-  //       runIds={props.runs.map((run) => run.id as number)}
-  //       results={resultSummariesFiltered.map(
-  //         (item) => (item.observability_value as any).value as object
-  //       )}
-  //     ></DictView>
-  //   );
-  // } else if (
-  //   observability_type === "dagworks_describe" &&
-  //   observability_schema_version === "0.0.3"
-  // ) {
-  //   return (
-  //     <DAGWorksDescribeV003View
-  //       projectId={props.projectId}
-  //       runIds={props.runs.map((run) => run.id as number)}
-  //       results={resultSummariesFiltered.map(
-  //         (item) => item.observability_value as DAGWorksDescribeV0_0_3
-  //       )}
-  //     />
-  //   );
-  // }
-  // type UnknownType = {
-  //   unsupported_type: string;
-  //   action: string;
-  // };
-  // const values = resultSummariesFiltered.map((item) => {
-  //   return {
-  //     value: item.observability_value as UnknownType,
-  //     runId: item.runId,
-  //   };
-  // });
-  // // TODO: this does not show all the outputs -- just the first one. This code is messy so not going to fix it now.
-  // return (
-  //   <div className="flex flex-col m-20">
-  //     <span className="text-lg text-gray-800">
-  //       We currently do not capture data summaries for run(s){" "}
-  //       <code>[{runIds.join(", ")}]</code>
-  //       for <code>{task_name}</code>. We are working on adding support for
-  //       everything -- reach out if you need it and we can prioritize!
-  //     </span>
-  //     <div className="m-8">
-  //       <GenericTable
-  //         data={values.map((item) => {
-  //           return [item.runId.toString() || "", item.value];
-  //         })}
-  //         columns={[
-  //           {
-  //             displayName: "type",
-  //             Render: (item: UnknownType) => {
-  //               return (
-  //                 <div className="flex flex-col">
-  //                   <code className="text-sm">{item.unsupported_type}</code>
-  //                 </div>
-  //               );
-  //             },
-  //           },
-  //           {
-  //             displayName: "action",
-  //             Render: (item: UnknownType) => {
-  //               return (
-  //                 <div className="flex flex-col">
-  //                   <span className="text-sm">{item.action}</span>
-  //                 </div>
-  //               );
-  //             },
-  //           },
-  //         ]}
-  //         dataTypeName={""}
-  //       />
-  //     </div>
-  //   </div>
-  // );
 };

From 577e6af583651bde7fb432eb33798151fc1ab78a Mon Sep 17 00:00:00 2001
From: elijahbenizzy <elijah_ben_izzy@alumni.brown.edu>
Date: Fri, 12 Jul 2024 12:00:19 -0700
Subject: [PATCH 4/6] Refactors capturing of data to be more flexible

We now have three categories:
1. Result summaries -- we can output one and if not it will be
   unsupported in the UI
2. Schema -- we can output one or none
3. Additional -- as many as we want

We use a single dispatch function for each, and it makes the code a lot
cleaner. We no longer put stuff in lists unless its an additional
result. Note they can also supply their own name, if not, we will
generate a unique attribute name.

In the long term we'll likely want to stop using single dispatch as we
need to register multiple with roles (and single dispatch requires one
function per role). For now this is clean enough and easy to work with,
however.
---
 ui/sdk/src/hamilton_sdk/adapters.py           | 34 ++--------
 .../{stats.py => data_observation.py}         | 41 ++++++++++--
 .../src/hamilton_sdk/tracking/ibis_stats.py   |  6 +-
 .../hamilton_sdk/tracking/langchain_stats.py  | 12 ++--
 .../src/hamilton_sdk/tracking/numpy_stats.py  |  4 +-
 .../src/hamilton_sdk/tracking/pandas_stats.py | 44 ++++++------
 .../src/hamilton_sdk/tracking/polars_stats.py |  6 +-
 .../hamilton_sdk/tracking/pydantic_stats.py   |  4 +-
 .../hamilton_sdk/tracking/pyspark_stats.py    | 67 +++++++------------
 ui/sdk/src/hamilton_sdk/tracking/runs.py      | 52 ++++++++------
 .../tracking/scikit_learn_stats.py            | 30 +++++----
 ui/sdk/tests/tracking/test_pandas_stats.py    |  4 +-
 ui/sdk/tests/tracking/test_runs.py            | 16 +++--
 ui/sdk/tests/tracking/test_stats.py           | 10 +--
 14 files changed, 168 insertions(+), 162 deletions(-)
 rename ui/sdk/src/hamilton_sdk/tracking/{stats.py => data_observation.py} (87%)

diff --git a/ui/sdk/src/hamilton_sdk/adapters.py b/ui/sdk/src/hamilton_sdk/adapters.py
index abfda447d..b74703a70 100644
--- a/ui/sdk/src/hamilton_sdk/adapters.py
+++ b/ui/sdk/src/hamilton_sdk/adapters.py
@@ -260,7 +260,7 @@ def post_node_execute(
         if success:
             task_run.status = Status.SUCCESS
             task_run.result_type = type(result)
-            result_summary = runs.process_result(result, node_)
+            result_summary, schema, additional_attributes = runs.process_result(result, node_)
             if result_summary is None:
                 result_summary = {
                     "observability_type": "observability_failure",
@@ -270,18 +270,7 @@ def post_node_execute(
                         "value": "Failed to process result.",
                     },
                 }
-            # NOTE This is a temporary hack to make process_result() able to return
-            # more than one object that will be used as UI "task attributes".
-            # There's a conflict between `TaskRun.result_summary` that expect a single
-            # dict from process_result() and the `HamiltonTracker.post_node_execute()`
-            # that can more freely handle "stats" to create multiple "task attributes"
-            elif isinstance(result_summary, dict):
-                result_summary = result_summary
-            elif isinstance(result_summary, list):
-                other_results = [obj for obj in result_summary[1:]]
-                result_summary = result_summary[0]
-            else:
-                raise TypeError("`process_result()` needs to return a dict or sequence of dict")
+            other_results = ([schema] if schema is not None else []) + additional_attributes
 
             task_run.result_summary = result_summary
             task_attr = dict(
@@ -546,12 +535,13 @@ async def post_node_execute(
         task_run = self.task_runs[run_id][node_.name]
         tracking_state = self.tracking_states[run_id]
         task_run.end_time = datetime.datetime.now(timezone.utc)
-
         other_results = []
+
         if success:
             task_run.status = Status.SUCCESS
             task_run.result_type = type(result)
-            result_summary = runs.process_result(result, node_)  # add node
+            result_summary, schema, additional = runs.process_result(result, node_)  # add node
+            other_results = ([schema] if schema is not None else []) + additional
             if result_summary is None:
                 result_summary = {
                     "observability_type": "observability_failure",
@@ -561,19 +551,6 @@ async def post_node_execute(
                         "value": "Failed to process result.",
                     },
                 }
-            # NOTE This is a temporary hack to make process_result() able to return
-            # more than one object that will be used as UI "task attributes".
-            # There's a conflict between `TaskRun.result_summary` that expect a single
-            # dict from process_result() and the `HamiltonTracker.post_node_execute()`
-            # that can more freely handle "stats" to create multiple "task attributes"
-            elif isinstance(result_summary, dict):
-                result_summary = result_summary
-            elif isinstance(result_summary, list):
-                other_results = [obj for obj in result_summary[1:]]
-                result_summary = result_summary[0]
-            else:
-                raise TypeError("`process_result()` needs to return a dict or sequence of dict")
-
             task_run.result_summary = result_summary
             task_attr = dict(
                 node_name=get_node_name(node_, task_id),
@@ -603,7 +580,6 @@ async def post_node_execute(
                 attribute_role="error",
             )
 
-        # `result_summary` or "error" is first because the order influences UI display order
         attributes = [task_attr]
         for i, other_result in enumerate(other_results):
             other_attr = dict(
diff --git a/ui/sdk/src/hamilton_sdk/tracking/stats.py b/ui/sdk/src/hamilton_sdk/tracking/data_observation.py
similarity index 87%
rename from ui/sdk/src/hamilton_sdk/tracking/stats.py
rename to ui/sdk/src/hamilton_sdk/tracking/data_observation.py
index 0b968e27f..688468657 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/data_observation.py
@@ -1,15 +1,29 @@
 import json
 from functools import singledispatch
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional
 
 import pandas as pd
 from hamilton_sdk.tracking import sql_utils
 
-StatsType = Dict[str, Any]
+# Multiple observations per are allowed
+ObservationType = Dict[str, Any]
 
 
 @singledispatch
-def compute_stats(result, node_name: str, node_tags: dict) -> Union[StatsType, List[StatsType]]:
+def compute_schema(result, node_name: str, node_tags: dict) -> Optional[ObservationType]:
+    """The default schema will be None, and filtered out.
+    We can polymoorphically implement this for different types of results.
+
+    :param result:
+    :param node_name:
+    :param node_tags:
+    :return:
+    """
+    return None
+
+
+@singledispatch
+def compute_stats(result, node_name: str, node_tags: dict) -> Optional[ObservationType]:
     """This is the default implementation for computing stats on a result.
 
     All other implementations should be registered with the `@compute_stats.register` decorator.
@@ -29,11 +43,24 @@ def compute_stats(result, node_name: str, node_tags: dict) -> Union[StatsType, L
     }
 
 
+@singledispatch
+def compute_additional_results(result, node_name: str, node_tags: dict) -> List[ObservationType]:
+    """The default schema will be None, and filtered out.
+    We can polymoorphically implement this for different types of results.
+
+    :param result:
+    :param node_name:
+    :param node_tags:
+    :return:
+    """
+    return []
+
+
 @compute_stats.register(str)
 @compute_stats.register(int)
 @compute_stats.register(float)
 @compute_stats.register(bool)
-def compute_stats_primitives(result, node_name: str, node_tags: dict) -> StatsType:
+def compute_stats_primitives(result, node_name: str, node_tags: dict) -> ObservationType:
     return {
         "observability_type": "primitive",
         "observability_value": {
@@ -45,7 +72,7 @@ def compute_stats_primitives(result, node_name: str, node_tags: dict) -> StatsTy
 
 
 @compute_stats.register(dict)
-def compute_stats_dict(result: dict, node_name: str, node_tags: dict) -> StatsType:
+def compute_stats_dict(result: dict, node_name: str, node_tags: dict) -> ObservationType:
     """call summary stats on the values in the dict"""
     try:
         # if it's JSON serializable, take it.
@@ -94,7 +121,7 @@ def compute_stats_dict(result: dict, node_name: str, node_tags: dict) -> StatsTy
 
 
 @compute_stats.register(tuple)
-def compute_stats_tuple(result: tuple, node_name: str, node_tags: dict) -> StatsType:
+def compute_stats_tuple(result: tuple, node_name: str, node_tags: dict) -> ObservationType:
     if "hamilton.data_loader" in node_tags and node_tags["hamilton.data_loader"] is True:
         # assumption it's a tuple
         if isinstance(result[1], dict):
@@ -141,7 +168,7 @@ def compute_stats_tuple(result: tuple, node_name: str, node_tags: dict) -> Stats
 
 
 @compute_stats.register(list)
-def compute_stats_list(result: list, node_name: str, node_tags: dict) -> StatsType:
+def compute_stats_list(result: list, node_name: str, node_tags: dict) -> ObservationType:
     """call summary stats on the values in the list"""
     try:
         # if it's JSON serializable, take it.
diff --git a/ui/sdk/src/hamilton_sdk/tracking/ibis_stats.py b/ui/sdk/src/hamilton_sdk/tracking/ibis_stats.py
index 15ef99458..677b6f272 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/ibis_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/ibis_stats.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict
 
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
 from ibis.expr.datatypes import core
 
 # import ibis.expr.types as ir
@@ -73,11 +73,11 @@ def _introspect(table: relations.Table) -> Dict[str, Any]:
     }
 
 
-@stats.compute_stats.register
+@data_observation.compute_schema.register
 def compute_stats_ibis_table(
     result: relations.Table, node_name: str, node_tags: dict
 ) -> Dict[str, Any]:
-    # TODO: create custom type instead of dict for UI
+    # TODO: use the schema type
     o_value = _introspect(result)
     return {
         "observability_type": "dict",
diff --git a/ui/sdk/src/hamilton_sdk/tracking/langchain_stats.py b/ui/sdk/src/hamilton_sdk/tracking/langchain_stats.py
index 9d59f3018..e2fb432ec 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/langchain_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/langchain_stats.py
@@ -4,12 +4,12 @@
 
 from typing import Any, Dict
 
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
 from langchain_core import documents as lc_documents
 from langchain_core import messages as lc_messages
 
 
-@stats.compute_stats.register(lc_messages.BaseMessage)
+@data_observation.compute_stats.register(lc_messages.BaseMessage)
 def compute_stats_lc_messages(
     result: lc_messages.BaseMessage, node_name: str, node_tags: dict
 ) -> Dict[str, Any]:
@@ -22,12 +22,12 @@ def compute_stats_lc_messages(
     }
 
 
-@stats.compute_stats.register(lc_documents.Document)
+@data_observation.compute_stats.register(lc_documents.Document)
 def compute_stats_lc_docs(
     result: lc_documents.Document, node_name: str, node_tags: dict
 ) -> Dict[str, Any]:
     if hasattr(result, "to_document"):
-        return stats.compute_stats(result.to_document(), node_name, node_tags)
+        return data_observation.compute_stats(result.to_document(), node_name, node_tags)
     else:
         # d.page_content  # hack because not all documents are serializable
         result = {"content": result.page_content, "metadata": result.metadata}
@@ -43,7 +43,7 @@ def compute_stats_lc_docs(
     from langchain_core import messages
 
     msg = messages.BaseMessage(content="Hello, World!", type="greeting")
-    print(stats.compute_stats(msg, "greeting", {}))
+    print(data_observation.compute_stats(msg, "greeting", {}))
 
     doc = lc_documents.Document(page_content="Hello, World!", metadata={"source": "local_dir"})
-    print(stats.compute_stats(doc, "document", {}))
+    print(data_observation.compute_stats(doc, "document", {}))
diff --git a/ui/sdk/src/hamilton_sdk/tracking/numpy_stats.py b/ui/sdk/src/hamilton_sdk/tracking/numpy_stats.py
index 24d4abf88..47edd64ac 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/numpy_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/numpy_stats.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pandas as pd
-from hamilton_sdk.tracking import pandas_stats, stats
+from hamilton_sdk.tracking import data_observation, pandas_stats
 
 """Module that houses functions to compute statistics on numpy objects
 Notes:
@@ -10,7 +10,7 @@
 """
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_numpy(result: np.ndarray, node_name: str, node_tags: dict) -> Dict[str, Any]:
     try:
         df = pd.DataFrame(result)  # hack - reuse pandas stuff
diff --git a/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
index 4feade7fe..e1ac4ecbd 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pandas_stats.py
@@ -1,8 +1,8 @@
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, Optional, Union
 
 import pandas as pd
+from hamilton_sdk.tracking import data_observation
 from hamilton_sdk.tracking import pandas_col_stats as pcs
-from hamilton_sdk.tracking import stats
 
 from hamilton import driver
 
@@ -84,34 +84,36 @@ def execute_col(
     return stats
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_df(
     result: pd.DataFrame, node_name: str, node_tags: dict
-) -> List[stats.StatsType]:
+) -> data_observation.ObservationType:
     summary_stats = _compute_stats(result)
 
-    results = [
-        {
-            "observability_type": "dagworks_describe",
-            "observability_value": summary_stats,
-            "observability_schema_version": "0.0.3",
-        },
-    ]
+    return {
+        "observability_type": "dagworks_describe",
+        "observability_value": summary_stats,
+        "observability_schema_version": "0.0.3",
+    }
+
+
+@data_observation.compute_schema.register
+def compute_schema(
+    result: pd.DataFrame, node_name: str, node_tags: dict
+) -> Optional[data_observation.ObservationType]:
     if h_schema is not None:
         schema = h_schema._get_arrow_schema(result)
         schema.with_metadata(dict(name=node_name))
-        results.append(
-            {
-                "observability_type": "schema",
-                "observability_value": h_schema.pyarrow_schema_to_json(schema),
-                "observability_schema_version": "0.0.1",
-                "name": "Schema",
-            }
-        )
-    return results
+        return {
+            "observability_type": "schema",
+            "observability_value": h_schema.pyarrow_schema_to_json(schema),
+            "observability_schema_version": "0.0.1",
+            "name": "Schema",
+        }
+    return None
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_series(result: pd.Series, node_name: str, node_tags: dict) -> Dict[str, Any]:
     col_name = result.name if result.name else node_name
     return {
diff --git a/ui/sdk/src/hamilton_sdk/tracking/polars_stats.py b/ui/sdk/src/hamilton_sdk/tracking/polars_stats.py
index 6fb6da825..b5169c10d 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/polars_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/polars_stats.py
@@ -4,8 +4,8 @@
 
 if not hasattr(pl, "Series"):
     raise ImportError("Polars is not installed")
+from hamilton_sdk.tracking import data_observation
 from hamilton_sdk.tracking import polars_col_stats as pls
-from hamilton_sdk.tracking import stats
 
 from hamilton import driver
 
@@ -83,7 +83,7 @@ def execute_col(target_output: str, col: pl.Series, name: str, position: int) ->
     return stats
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_df(result: pl.DataFrame, node_name: str, node_tags: dict) -> Dict[str, Any]:
     return {
         "observability_type": "dagworks_describe",
@@ -92,7 +92,7 @@ def compute_stats_df(result: pl.DataFrame, node_name: str, node_tags: dict) -> D
     }
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_series(result: pl.Series, node_name: str, node_tags: dict) -> Dict[str, Any]:
     return {
         "observability_type": "dagworks_describe",
diff --git a/ui/sdk/src/hamilton_sdk/tracking/pydantic_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pydantic_stats.py
index eb342b887..4effe9d8b 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pydantic_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pydantic_stats.py
@@ -1,10 +1,10 @@
 from typing import Any, Dict
 
 import pydantic
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
 
 
-@stats.compute_stats.register
+@data_observation.compute_stats.register
 def compute_stats_pydantic(
     result: pydantic.BaseModel, node_name: str, node_tags: dict
 ) -> Dict[str, Any]:
diff --git a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
index c813ce676..947b42525 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
@@ -1,7 +1,8 @@
-from typing import Any, Dict, List
+from typing import Any, Dict, Optional
 
 import pyspark.sql as ps
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
+from hamilton_sdk.tracking.data_observation import ObservationType
 
 try:
     from hamilton.plugins import h_schema
@@ -73,53 +74,35 @@ def _introspect(df: ps.DataFrame) -> Dict[str, Any]:
     }
 
 
-@stats.compute_stats.register
-def compute_stats_psdf(
-    result: ps.DataFrame, node_name: str, node_tags: dict
-) -> List[Dict[str, Any]]:
+@data_observation.compute_stats.register
+def compute_stats_psdf(result: ps.DataFrame, node_name: str, node_tags: dict) -> ObservationType:
     # TODO: create custom type instead of dict for UI
     o_value = _introspect(result)
 
-    results = [
-        {
-            "observability_type": "dict",
-            "observability_value": {
-                "type": str(type(result)),
-                "value": o_value["columns"],
-            },
-            "observability_schema_version": "0.0.2",
-        },
-        {
-            "observability_type": "primitive",
-            "observability_value": {
-                "type": str(str),
-                "value": o_value["cost_explain"],
-            },
-            "observability_schema_version": "0.0.1",
-            "name": "Cost Explain",
-        },
-        {
-            "observability_type": "primitive",
-            "observability_value": {
-                "type": str(str),
-                "value": o_value["extended_explain"],
-            },
-            "observability_schema_version": "0.0.1",
-            "name": "Extended Explain",
+    return {
+        "observability_type": "dict",
+        "observability_value": {
+            "type": str(type(result)),
+            "value": o_value,
         },
-    ]
+        "observability_schema_version": "0.0.2",
+    }
+
+
+@data_observation.compute_schema.register
+def compute_schema_psdf(
+    result: ps.DataFrame, node_name: str, node_tags: dict
+) -> Optional[ObservationType]:
     if h_schema is not None:
         schema = h_schema._get_arrow_schema(result)
         schema.with_metadata(dict(name=node_name))
-        results.append(
-            {
-                "observability_type": "schema",
-                "observability_value": h_schema.pyarrow_schema_to_json(schema),
-                "observability_schema_version": "0.0.1",
-                "name": "Schema",
-            }
-        )
-    return results
+        return {
+            "observability_type": "schema",
+            "observability_value": h_schema.pyarrow_schema_to_json(schema),
+            "observability_schema_version": "0.0.1",
+            "name": "Schema",
+        }
+    return None
 
 
 if __name__ == "__main__":
diff --git a/ui/sdk/src/hamilton_sdk/tracking/runs.py b/ui/sdk/src/hamilton_sdk/tracking/runs.py
index ff3d7ffa1..6e329c511 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/runs.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/runs.py
@@ -6,9 +6,10 @@
 import traceback
 from contextlib import contextmanager
 from datetime import datetime, timezone
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
+from hamilton_sdk.tracking.data_observation import ObservationType
 from hamilton_sdk.tracking.trackingtypes import DAGRun, Status, TaskRun
 
 from hamilton import node as h_node
@@ -36,7 +37,9 @@
         pass
 
 
-def process_result(result: Any, node: h_node.Node) -> Any:
+def process_result(
+    result: Any, node: h_node.Node
+) -> Tuple[Optional[ObservationType], Optional[ObservationType], List[ObservationType]]:
     """Processes result -- this is purely a by-type mapping.
     Note that this doesn't actually do anything yet -- the idea is that we can return DQ
     results, and do other stuff with other results -- E.G. summary stats on dataframes,
@@ -53,16 +56,31 @@ def process_result(result: Any, node: h_node.Node) -> Any:
     :param node: The node that produced the result
     :return: The processed  result - it has to be JSON serializable!
     """
+    statistics = None
+    schema = None
+    additional = []
     try:
         start = py_time.time()
-        statistics = stats.compute_stats(result, node.name, node.tags)
+        statistics = data_observation.compute_stats(result, node.name, node.tags)
         end = py_time.time()
         logger.debug(f"Took {end - start} seconds to describe {node.name}")
-        return statistics
-    # TODO: introspect other nodes
-    # if it's a check_output node, then we want to process the pandera result/the result from it.
     except Exception as e:
-        logger.warning(f"Failed to introspect result for {node.name}. Error:\n{e}")
+        logger.warning(f"Failed to introspect statistics for {node.name}. Error:\n{e}")
+    try:
+        start = py_time.time()
+        schema = data_observation.compute_schema(result, node.name, node.tags)
+        end = py_time.time()
+        logger.debug(f"Took {end - start} seconds to introspect schema for {node.name}")
+    except Exception as e:
+        logger.warning(f"Failed to introspect schema for {node.name}. Error:\n{e}")
+    try:
+        start = py_time.time()
+        additional.extend(data_observation.compute_additional_results(result, node.name, node.tags))
+        end = py_time.time()
+        logger.debug(f"Took {end - start} seconds to introspect additional results for {node.name}")
+    except Exception as e:
+        logger.warning(f"Failed to introspect additional results for {node.name}. Error:\n{e}")
+    return statistics, schema, additional
 
 
 class TrackingState:
@@ -184,19 +202,11 @@ def execute_node(
         try:
             result = original_do_node_execute(run_id, node_, kwargs, task_id)
 
-            # NOTE This is a temporary hack to make process_result() able to return
-            # more than one object that will be used as UI "task attributes".
-            # There's a conflict between `TaskRun.result_summary` that expect a single
-            # dict from process_result() and the `HamiltonTracker.post_node_execute()`
-            # that can more freely handle "stats" to create multiple "task attributes"
-            result_summary = process_result(result, node_)  # add node
-            if isinstance(result_summary, dict):
-                result_summary = result_summary
-            elif isinstance(result_summary, list):
-                result_summary = result_summary[0]
-            else:
-                raise TypeError("`process_result()` needs to return a dict or list of dict")
-
+            # In subsequent versions we will stop supporting the class-based approach
+            # If you find yourself relying on this, switch to use adapters
+            result_summary, *ignored_because_this_is_a_defunct_code_path = process_result(
+                result, node_
+            )  # add node
             task_run.status = Status.SUCCESS
             task_run.result_type = type(result)
             task_run.result_summary = result_summary
diff --git a/ui/sdk/src/hamilton_sdk/tracking/scikit_learn_stats.py b/ui/sdk/src/hamilton_sdk/tracking/scikit_learn_stats.py
index fe7087ec4..b8e230a4e 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/scikit_learn_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/scikit_learn_stats.py
@@ -1,30 +1,34 @@
 from typing import List
 
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
 from sklearn.base import BaseEstimator
 
 """Module that houses functions to compute statistics on numpy objects"""
 
 
-@stats.compute_stats.register(BaseEstimator)
-def get_estimator_html_representation(result, *args, **kwargs) -> List[stats.StatsType]:
+@data_observation.compute_stats.register(BaseEstimator)
+def get_estimator_params(result, *args, **kwargs) -> data_observation.ObservationType:
     """
     ref: https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_display_object_visualization.html
     """
-    return [
-        {
-            "name": "Parameters",
-            "observability_type": "dict",
-            "observability_value": {
-                "type": str(type(result)),
-                "value": result.get_params(deep=True),
-            },
-            "observability_schema_version": "0.0.2",
+    return {
+        "name": "Parameters",
+        "observability_type": "dict",
+        "observability_value": {
+            "type": str(type(result)),
+            "value": result.get_params(deep=True),
         },
+        "observability_schema_version": "0.0.2",
+    }
+
+
+@data_observation.compute_additional_results.register(BaseEstimator)
+def get_estimator_html(result, *args, **kwargs) -> List[data_observation.ObservationType]:
+    return [
         {
             "name": "Components",
             "observability_type": "html",
             "observability_value": {"html": result._repr_html_inner()},  # get_params(deep=True),
             "observability_schema_version": "0.0.1",
-        },
+        }
     ]
diff --git a/ui/sdk/tests/tracking/test_pandas_stats.py b/ui/sdk/tests/tracking/test_pandas_stats.py
index 421add51e..da4903015 100644
--- a/ui/sdk/tests/tracking/test_pandas_stats.py
+++ b/ui/sdk/tests/tracking/test_pandas_stats.py
@@ -20,7 +20,7 @@ def test_compute_stats_df():
         }
     )
     actual = ps.compute_stats_df(df, "test", {})
-    expected = {
+    expected_stats = {
         "observability_schema_version": "0.0.3",
         "observability_type": "dagworks_describe",
         "observability_value": {
@@ -206,4 +206,4 @@ def test_compute_stats_df():
             },
         },
     }
-    assert actual == expected
+    assert actual == expected_stats
diff --git a/ui/sdk/tests/tracking/test_runs.py b/ui/sdk/tests/tracking/test_runs.py
index 922addffb..3ea627a9d 100644
--- a/ui/sdk/tests/tracking/test_runs.py
+++ b/ui/sdk/tests/tracking/test_runs.py
@@ -26,7 +26,7 @@ def create_node(name: str, type_: type) -> node.Node:
 
 
 @pytest.mark.parametrize(
-    "test_result,test_node,observability_type,observability_value",
+    "test_result,test_node,observability_type,stats",
     [
         (
             pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
@@ -517,16 +517,20 @@ def create_node(name: str, type_: type) -> node.Node:
         "pl_datetime_series",
     ],
 )
-def test_process_result_happy(test_result, test_node, observability_type, observability_value):
+def test_process_result_happy(test_result, test_node, observability_type, stats):
     """Tests a happy path for the process result function."""
-    actual_result = runs.process_result(test_result, test_node)
+    stats, schema, additional = runs.process_result(test_result, test_node)
     expected_result = result_base.copy()
     if observability_type in ["dict"]:
         expected_result["observability_schema_version"] = "0.0.2"
     if observability_type in ["primitive", "unsupported"]:
         expected_result["observability_schema_version"] = "0.0.1"
     expected_result["observability_type"] = observability_type
-    expected_result["observability_value"] = observability_value
-    assert actual_result == expected_result
+    expected_result["observability_value"] = stats["observability_value"]
+    assert stats == expected_result
     # Allows us to double-check that everything can be json-dumped
-    json.dumps(actual_result)
+    json.dumps(stats)
+    # TODO -- test schema values, but probably not here
+    if schema is not None:
+        json.dumps(schema)
+    [json.dumps(add) for add in additional]
diff --git a/ui/sdk/tests/tracking/test_stats.py b/ui/sdk/tests/tracking/test_stats.py
index 10aa9a095..af6b2377d 100644
--- a/ui/sdk/tests/tracking/test_stats.py
+++ b/ui/sdk/tests/tracking/test_stats.py
@@ -3,13 +3,13 @@
 from typing import NamedTuple
 
 import pandas as pd
-from hamilton_sdk.tracking import stats
+from hamilton_sdk.tracking import data_observation
 
 
 def test_compute_stats_namedtuple():
     config = namedtuple("config", ["secret_key"])
     result = config("secret_value")
-    actual = stats.compute_stats(result, "test_node", {})
+    actual = data_observation.compute_stats(result, "test_node", {})
     assert actual == {
         "observability_type": "unsupported",
         "observability_value": {
@@ -21,7 +21,7 @@ def test_compute_stats_namedtuple():
 
 
 def test_compute_stats_dict():
-    actual = stats.compute_stats({"a": 1}, "test_node", {})
+    actual = data_observation.compute_stats({"a": 1}, "test_node", {})
     assert actual == {
         "observability_type": "dict",
         "observability_value": {
@@ -34,7 +34,7 @@ def test_compute_stats_dict():
 
 def test_compute_stats_tuple_dataloader():
     """tests case of a dataloader"""
-    actual = stats.compute_stats(
+    actual = data_observation.compute_stats(
         (
             pd.DataFrame({"a": [1, 2, 3]}),
             {"SQL_QUERY": "SELECT * FROM FOO.BAR.TABLE", "CONNECTION_INFO": {"URL": "SOME_URL"}},
@@ -75,7 +75,7 @@ class Foo(NamedTuple):
         y: str
 
     f = Foo(1, "a")
-    actual = stats.compute_stats(
+    actual = data_observation.compute_stats(
         f,
         "test_node",
         {"some_tag": "foo-bar"},

From 38bef2dc530226ae612b2b81a11eca15e02e9a65 Mon Sep 17 00:00:00 2001
From: elijahbenizzy <elijah_ben_izzy@alumni.brown.edu>
Date: Fri, 12 Jul 2024 14:54:58 -0700
Subject: [PATCH 5/6] Adds string diff view

This just treats all primitives as strings, which can show the view.
This works with strings and will actually just look fine otherwise. It
uses the ReactDiffViewer component which is simple, and we use elsewhere
in the app.

We can probably tune this a bit (the interaction is a little jumpy), but
for now this is OK.
---
 ui/frontend/package-lock.json                 | 198 ++++++++++++++++--
 ui/frontend/package.json                      |   5 +-
 .../result-summaries/DataObservability.tsx    | 161 ++++++++++----
 3 files changed, 311 insertions(+), 53 deletions(-)

diff --git a/ui/frontend/package-lock.json b/ui/frontend/package-lock.json
index f206e334c..bf3394340 100644
--- a/ui/frontend/package-lock.json
+++ b/ui/frontend/package-lock.json
@@ -8,7 +8,7 @@
       "name": "frontend",
       "version": "0.1.0",
       "dependencies": {
-        "@headlessui/react": "^1.7.7",
+        "@headlessui/react": "^2.1.2",
         "@heroicons/react": "^2.0.13",
         "@propelauth/react": "^2.0.6",
         "@reduxjs/toolkit": "^1.9.7",
@@ -30,13 +30,14 @@
         "dayjs": "^1.11.9",
         "elkjs": "^0.8.2",
         "fuse.js": "^6.6.2",
+        "headlessui": "^0.0.0",
         "http-proxy-middleware": "^2.0.6",
         "moment-timezone": "^0.5.40",
         "posthog-js": "^1.82.1",
         "prism-react-renderer": "^1.3.5",
         "react": "^18.2.0",
         "react-chartjs-2": "^5.2.0",
-        "react-diff-viewer-continued": "^3.2.6",
+        "react-diff-viewer-continued": "^3.4.0",
         "react-dom": "^18.2.0",
         "react-draggable": "^4.4.5",
         "react-icons": "^4.10.1",
@@ -2749,24 +2750,58 @@
         "@floating-ui/utils": "^0.1.3"
       }
     },
+    "node_modules/@floating-ui/react": {
+      "version": "0.26.19",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react/-/react-0.26.19.tgz",
+      "integrity": "sha512-Jk6zITdjjIvjO/VdQFvpRaD3qPwOHH6AoDHxjhpy+oK4KFgaSP871HYWUAPdnLmx1gQ+w/pB312co3tVml+BXA==",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.1.1",
+        "@floating-ui/utils": "^0.2.4",
+        "tabbable": "^6.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@floating-ui/react-dom": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.1.tgz",
+      "integrity": "sha512-4h84MJt3CHrtG18mGsXuLCHMrug49d7DFkU0RMIyshRveBeyV2hmV/pDaF2Uxtu8kgq5r46llp5E5FQiR0K2Yg==",
+      "dependencies": {
+        "@floating-ui/dom": "^1.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@floating-ui/react/node_modules/@floating-ui/utils": {
+      "version": "0.2.4",
+      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.4.tgz",
+      "integrity": "sha512-dWO2pw8hhi+WrXq1YJy2yCuWoL20PddgGaqTgVe4cOS9Q6qklXCiA1tJEqX6BEwRNSCP84/afac9hd4MS+zEUA=="
+    },
     "node_modules/@floating-ui/utils": {
       "version": "0.1.6",
       "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.1.6.tgz",
       "integrity": "sha512-OfX7E2oUDYxtBvsuS4e/jSn4Q9Qb6DzgeYtsAdkPZ47znpoNsMgZw0+tVijiv3uGNR6dgNlty6r9rzIzHjtd/A=="
     },
     "node_modules/@headlessui/react": {
-      "version": "1.7.17",
-      "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-1.7.17.tgz",
-      "integrity": "sha512-4am+tzvkqDSSgiwrsEpGWqgGo9dz8qU5M3znCkC4PgkpY4HcCZzEDEvozltGGGHIKl9jbXbZPSH5TWn4sWJdow==",
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-2.1.2.tgz",
+      "integrity": "sha512-Kb3hgk9gRNRcTZktBrKdHhF3xFhYkca1Rk6e1/im2ENf83dgN54orMW0uSKTXFnUpZOUFZ+wcY05LlipwgZIFQ==",
       "dependencies": {
-        "client-only": "^0.0.1"
+        "@floating-ui/react": "^0.26.16",
+        "@react-aria/focus": "^3.17.1",
+        "@react-aria/interactions": "^3.21.3",
+        "@tanstack/react-virtual": "^3.8.1"
       },
       "engines": {
         "node": ">=10"
       },
       "peerDependencies": {
-        "react": "^16 || ^17 || ^18",
-        "react-dom": "^16 || ^17 || ^18"
+        "react": "^18",
+        "react-dom": "^18"
       }
     },
     "node_modules/@heroicons/react": {
@@ -3400,6 +3435,99 @@
         "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0"
       }
     },
+    "node_modules/@react-aria/focus": {
+      "version": "3.17.1",
+      "resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.17.1.tgz",
+      "integrity": "sha512-FLTySoSNqX++u0nWZJPPN5etXY0WBxaIe/YuL/GTEeuqUIuC/2bJSaw5hlsM6T2yjy6Y/VAxBcKSdAFUlU6njQ==",
+      "dependencies": {
+        "@react-aria/interactions": "^3.21.3",
+        "@react-aria/utils": "^3.24.1",
+        "@react-types/shared": "^3.23.1",
+        "@swc/helpers": "^0.5.0",
+        "clsx": "^2.0.0"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
+    "node_modules/@react-aria/focus/node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/@react-aria/interactions": {
+      "version": "3.21.3",
+      "resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.21.3.tgz",
+      "integrity": "sha512-BWIuf4qCs5FreDJ9AguawLVS0lV9UU+sK4CCnbCNNmYqOWY+1+gRXCsnOM32K+oMESBxilAjdHW5n1hsMqYMpA==",
+      "dependencies": {
+        "@react-aria/ssr": "^3.9.4",
+        "@react-aria/utils": "^3.24.1",
+        "@react-types/shared": "^3.23.1",
+        "@swc/helpers": "^0.5.0"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
+    "node_modules/@react-aria/ssr": {
+      "version": "3.9.4",
+      "resolved": "https://registry.npmjs.org/@react-aria/ssr/-/ssr-3.9.4.tgz",
+      "integrity": "sha512-4jmAigVq409qcJvQyuorsmBR4+9r3+JEC60wC+Y0MZV0HCtTmm8D9guYXlJMdx0SSkgj0hHAyFm/HvPNFofCoQ==",
+      "dependencies": {
+        "@swc/helpers": "^0.5.0"
+      },
+      "engines": {
+        "node": ">= 12"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
+    "node_modules/@react-aria/utils": {
+      "version": "3.24.1",
+      "resolved": "https://registry.npmjs.org/@react-aria/utils/-/utils-3.24.1.tgz",
+      "integrity": "sha512-O3s9qhPMd6n42x9sKeJ3lhu5V1Tlnzhu6Yk8QOvDuXf7UGuUjXf9mzfHJt1dYzID4l9Fwm8toczBzPM9t0jc8Q==",
+      "dependencies": {
+        "@react-aria/ssr": "^3.9.4",
+        "@react-stately/utils": "^3.10.1",
+        "@react-types/shared": "^3.23.1",
+        "@swc/helpers": "^0.5.0",
+        "clsx": "^2.0.0"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
+    "node_modules/@react-aria/utils/node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/@react-stately/utils": {
+      "version": "3.10.1",
+      "resolved": "https://registry.npmjs.org/@react-stately/utils/-/utils-3.10.1.tgz",
+      "integrity": "sha512-VS/EHRyicef25zDZcM/ClpzYMC5i2YGN6uegOeQawmgfGjb02yaCX0F0zR69Pod9m2Hr3wunTbtpgVXvYbZItg==",
+      "dependencies": {
+        "@swc/helpers": "^0.5.0"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
+    "node_modules/@react-types/shared": {
+      "version": "3.23.1",
+      "resolved": "https://registry.npmjs.org/@react-types/shared/-/shared-3.23.1.tgz",
+      "integrity": "sha512-5d+3HbFDxGZjhbMBeFHRQhexMFt4pUce3okyRtUVKbbedQFUrtXSBg9VszgF2RTeQDKDkMCIQDtz5ccP/Lk1gw==",
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0"
+      }
+    },
     "node_modules/@reactflow/background": {
       "version": "11.3.6",
       "resolved": "https://registry.npmjs.org/@reactflow/background/-/background-11.3.6.tgz",
@@ -3861,6 +3989,14 @@
         "url": "https://github.com/sponsors/gregberge"
       }
     },
+    "node_modules/@swc/helpers": {
+      "version": "0.5.12",
+      "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.12.tgz",
+      "integrity": "sha512-KMZNXiGibsW9kvZAO1Pam2JPTDBm+KSHMMHWdsyI/1DbIZjT2A6Gy3hblVXUMEDvUAKq+e0vL0X0o54owWji7g==",
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
     "node_modules/@tailwindcss/forms": {
       "version": "0.5.7",
       "resolved": "https://registry.npmjs.org/@tailwindcss/forms/-/forms-0.5.7.tgz",
@@ -3886,6 +4022,31 @@
         "tailwindcss": ">=3.0.0 || insiders"
       }
     },
+    "node_modules/@tanstack/react-virtual": {
+      "version": "3.8.3",
+      "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.8.3.tgz",
+      "integrity": "sha512-9ICwbDUUzN99CJIGc373i8NLoj6zFTKI2Hlcmo0+lCSAhPQ5mxq4dGOMKmLYoEFyHcGQ64Bd6ZVbnPpM6lNK5w==",
+      "dependencies": {
+        "@tanstack/virtual-core": "3.8.3"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/tannerlinsley"
+      },
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0",
+        "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0"
+      }
+    },
+    "node_modules/@tanstack/virtual-core": {
+      "version": "3.8.3",
+      "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.8.3.tgz",
+      "integrity": "sha512-vd2A2TnM5lbnWZnHi9B+L2gPtkSeOtJOAw358JqokIH1+v2J7vUAzFVPwB/wrye12RFOurffXu33plm4uQ+JBQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/tannerlinsley"
+      }
+    },
     "node_modules/@testing-library/jest-dom": {
       "version": "5.17.0",
       "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-5.17.0.tgz",
@@ -6299,11 +6460,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/client-only": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
-      "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
-    },
     "node_modules/cliui": {
       "version": "7.0.4",
       "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz",
@@ -9620,6 +9776,11 @@
         "he": "bin/he"
       }
     },
+    "node_modules/headlessui": {
+      "version": "0.0.0",
+      "resolved": "https://registry.npmjs.org/headlessui/-/headlessui-0.0.0.tgz",
+      "integrity": "sha512-CHvacVPbl8AqIg2sBNKySUmumu7o15jSrCaTrIh9GW2Eq4y/krCN/vZFOsKCwlrhWQbO4267a8xvvP8bs+qREQ=="
+    },
     "node_modules/highlight.js": {
       "version": "10.7.3",
       "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz",
@@ -15396,9 +15557,9 @@
       }
     },
     "node_modules/react-diff-viewer-continued": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/react-diff-viewer-continued/-/react-diff-viewer-continued-3.3.1.tgz",
-      "integrity": "sha512-YhjWjCUq6cs8k9iErpWh/xB2jFCndigGAz2TKubdqrSTtDH5Ib+tdQgzBWVXMMqgtEwoPLi+WFmSsdSoYbDVpw==",
+      "version": "3.4.0",
+      "resolved": "https://registry.npmjs.org/react-diff-viewer-continued/-/react-diff-viewer-continued-3.4.0.tgz",
+      "integrity": "sha512-kMZmUyb3Pv5L9vUtCfIGYsdOHs8mUojblGy1U1Sm0D7FhAOEsH9QhnngEIRo5hXWIPNGupNRJls1TJ6Eqx84eg==",
       "dependencies": {
         "@emotion/css": "^11.11.2",
         "classnames": "^2.3.2",
@@ -17562,6 +17723,11 @@
       "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
       "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="
     },
+    "node_modules/tabbable": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz",
+      "integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew=="
+    },
     "node_modules/tailwind-scrollbar-hide": {
       "version": "1.1.7",
       "resolved": "https://registry.npmjs.org/tailwind-scrollbar-hide/-/tailwind-scrollbar-hide-1.1.7.tgz",
diff --git a/ui/frontend/package.json b/ui/frontend/package.json
index 660d031bb..7cc72462c 100644
--- a/ui/frontend/package.json
+++ b/ui/frontend/package.json
@@ -3,7 +3,7 @@
   "version": "0.1.0",
   "private": true,
   "dependencies": {
-    "@headlessui/react": "^1.7.7",
+    "@headlessui/react": "^2.1.2",
     "@heroicons/react": "^2.0.13",
     "@propelauth/react": "^2.0.6",
     "@reduxjs/toolkit": "^1.9.7",
@@ -25,13 +25,14 @@
     "dayjs": "^1.11.9",
     "elkjs": "^0.8.2",
     "fuse.js": "^6.6.2",
+    "headlessui": "^0.0.0",
     "http-proxy-middleware": "^2.0.6",
     "moment-timezone": "^0.5.40",
     "posthog-js": "^1.82.1",
     "prism-react-renderer": "^1.3.5",
     "react": "^18.2.0",
     "react-chartjs-2": "^5.2.0",
-    "react-diff-viewer-continued": "^3.2.6",
+    "react-diff-viewer-continued": "^3.4.0",
     "react-dom": "^18.2.0",
     "react-draggable": "^4.4.5",
     "react-icons": "^4.10.1",
diff --git a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
index 5fd29a026..cadcbf0c5 100644
--- a/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
+++ b/ui/frontend/src/components/dashboard/Runs/Task/result-summaries/DataObservability.tsx
@@ -15,7 +15,10 @@ import {
   getNodeRunAttributes,
 } from "../../../../../state/api/friendlyApi";
 import { parsePythonType } from "../../../../../utils";
-import { GenericTable } from "../../../../common/GenericTable";
+import {
+  GenericGroupedTable,
+  GenericTable,
+} from "../../../../common/GenericTable";
 import { RunLink } from "../../../../common/CommonLinks";
 import { PandasDescribe1View } from "./PandasDescribe";
 import { Dict1View, Dict2View } from "./DictView";
@@ -23,68 +26,156 @@ import { DAGWorksDescribe3View } from "./DAGWorksDescribe";
 import { HTML1View } from "./HTMLView";
 import { Schema1View } from "./SchemaView";
 import { HiChevronDown, HiChevronUp } from "react-icons/hi";
+import ReactDiffViewer from "react-diff-viewer-continued";
+import { Field, Label, Switch } from "@headlessui/react";
 
+const DiffView = (props: { oldValue: string; newValue: string }) => {
+  const [splitView, setSplitView] = useState(false);
+  return (
+    <div className="flex flex-col gap-2">
+      <div className="flex flex-row gap-1">
+        <Field className="flex items-center">
+          <Switch
+            checked={splitView}
+            onChange={setSplitView}
+            onClick={(e) => {
+              setSplitView(!splitView);
+              e.stopPropagation();
+            }}
+            className="group relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent bg-gray-200 transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-dwdarkblue focus:ring-offset-2 data-[checked]:bg-dwdarkblue"
+          >
+            <span
+              aria-hidden="true"
+              className="pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out group-data-[checked]:translate-x-5"
+            />
+          </Switch>
+          <Label as="span" className="ml-3 text-sm">
+            <span className="font-medium text-gray-500">Split view</span>{" "}
+          </Label>
+        </Field>
+      </div>
+      <ReactDiffViewer
+        oldValue={props.oldValue}
+        newValue={props.newValue}
+        splitView={splitView}
+        linesOffset={3}
+        extraLinesSurroundingDiff={1}
+        showDiffOnly={false}
+        hideLineNumbers
+        disableWordDiff
+        styles={{
+          diffContainer: {
+            backgroundColor: "transparent",
+          },
+        }}
+      />
+    </div>
+  );
+};
 const Primitive1View = (props: {
   taskName: string;
   runIds: number[];
   values: AttributePrimitive1[];
   projectId: number;
 }) => {
+  const valuesWithRunID = props.values.map((item, i) => {
+    return { ...item, runId: props.runIds[i] };
+  });
   return (
     <div className="m-8">
-      <GenericTable
-        data={props.values.map((item, i) => {
-          return [props.runIds[i].toString() || "", item];
+      <GenericGroupedTable
+        data={valuesWithRunID.map((item) => {
+          return ["", item];
         })}
         columns={[
           {
             displayName: "type",
-            Render: (item: AttributePrimitive1) => {
-              return (
+            Render: (
+              items: AttributePrimitive1[],
+              isSummaryRow: boolean,
+              isExpanded: boolean
+            ) => {
+              const uniqueTypes = Array.from(
+                new Set(
+                  items.map((item) => parsePythonType({ type_name: item.type }))
+                )
+              );
+              return isSummaryRow && isExpanded ? (
+                <></>
+              ) : (
                 <div className="flex flex-col">
-                  <code className="text-sm">
-                    {parsePythonType({ type_name: item.type })}
-                  </code>
+                  {uniqueTypes.map((type, i) => {
+                    return (
+                      <div key={i}>
+                        <code className="text-sm">{type}</code>
+                      </div>
+                    );
+                  })}
                 </div>
               );
             },
           },
           {
             displayName: "value",
-            Render: (item: AttributePrimitive1) => {
+            Render: (
+              items: AttributePrimitive1[],
+              isSummaryRow: boolean,
+              isExpanded: boolean
+            ) => {
               const [expanded, setExpanded] = useState(false);
               return (
-                <div className="flex flex-col w-144">
-                  <pre
-                    onClick={(e) => {
-                      setExpanded(!expanded);
-                      e.stopPropagation();
-                    }}
-                    className={`${
-                      expanded ? "break-word whitespace-pre-wrap" : "truncate"
-                    }  text-gray-500 cursor-cell`}
-                  >
-                    {item.value.toString()}
-                  </pre>
-                  {/* <code className="text-sm whitespace-pre-wrap">
-                    {item.value.toString()}
-                  </code> */}
+                <div className="flex flex-col w-192">
+                  {isSummaryRow && isExpanded ? (
+                    <></>
+                  ) : isSummaryRow && !isExpanded ? (
+                    <DiffView
+                      oldValue={items[0].value.toString()}
+                      newValue={items[1].value.toString()}
+                    />
+                  ) : (
+                    <pre
+                      onClick={(e) => {
+                        setExpanded(!expanded);
+                        e.stopPropagation();
+                      }}
+                      className={`w- ${
+                        expanded ? "break-word whitespace-pre-wrap" : "truncate"
+                      }  text-gray-500 cursor-cell`}
+                    >
+                      {items[0].value.toString()}
+                    </pre>
+                  )}
+                </div>
+              );
+            },
+          },
+          {
+            displayName: "runs",
+            Render: (
+              value: AttributePrimitive1[],
+              isSummaryRow: boolean,
+              isExpanded: boolean
+            ) => {
+              if (isSummaryRow && isExpanded) {
+                return <></>;
+              }
+              return (
+                <div className="flex flex-row gap-1">
+                  {props.runIds.map((taskRun, i) => (
+                    <RunLink
+                      runId={props.runIds[i]}
+                      key={i}
+                      projectId={1}
+                      highlightedRun={null}
+                      setHighlightedRun={() => void 0}
+                    />
+                  ))}
                 </div>
               );
             },
           },
         ]}
         dataTypeName={"Run"}
-        dataTypeDisplay={(item: string) => {
-          return (
-            <RunLink
-              projectId={props.projectId}
-              runId={parseInt(item) as number}
-              setHighlightedRun={() => void 0}
-              highlightedRun={null}
-            ></RunLink>
-          );
-        }}
       />
     </div>
   );

From 161b0288d70a28dae3949018540a80f29ec12b7e Mon Sep 17 00:00:00 2001
From: elijahbenizzy <elijah_ben_izzy@alumni.brown.edu>
Date: Mon, 15 Jul 2024 16:49:45 -0700
Subject: [PATCH 6/6] Adds back in pyspark individual metrics

We had captured all of them. Now we capture individual data as well,
which allows for easy comparison. It's duplicated, so we use an
lru_tools cache (which should cache based on the pyspark dataframe ID)
---
 .../hamilton_sdk/tracking/pyspark_stats.py    | 32 ++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
index 947b42525..32baaa406 100644
--- a/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
+++ b/ui/sdk/src/hamilton_sdk/tracking/pyspark_stats.py
@@ -1,4 +1,5 @@
-from typing import Any, Dict, Optional
+import functools
+from typing import Any, Dict, List, Optional
 
 import pyspark.sql as ps
 from hamilton_sdk.tracking import data_observation
@@ -43,6 +44,8 @@
 }
 
 
+# quick cache to ensure we don't compute twice
+@functools.lru_cache(maxsize=128)
 def _introspect(df: ps.DataFrame) -> Dict[str, Any]:
     """Introspect a PySpark dataframe and return a dictionary of statistics.
 
@@ -105,6 +108,33 @@ def compute_schema_psdf(
     return None
 
 
+@data_observation.compute_additional_results.register
+def compute_additional_psdf(
+    result: ps.DataFrame, node_name: str, node_tags: dict
+) -> List[ObservationType]:
+    o_value = _introspect(result)
+    return [
+        {
+            "observability_type": "primitive",
+            "observability_value": {
+                "type": str(str),
+                "value": o_value["cost_explain"],
+            },
+            "observability_schema_version": "0.0.1",
+            "name": "Cost Explain",
+        },
+        {
+            "observability_type": "primitive",
+            "observability_value": {
+                "type": str(str),
+                "value": o_value["extended_explain"],
+            },
+            "observability_schema_version": "0.0.1",
+            "name": "Extended Explain",
+        },
+    ]
+
+
 if __name__ == "__main__":
     import numpy as np
     import pandas as pd