Skip to content

Commit

Permalink
Rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
nfcampos committed Dec 12, 2024
1 parent da0aac7 commit a783901
Showing 1 changed file with 113 additions and 0 deletions.
113 changes: 113 additions & 0 deletions libs/langgraph/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from collections import defaultdict

Check notice on line 1 in libs/langgraph/eval.py

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

......................................... fanout_to_subgraph_10x: Mean +- std dev: 60.7 ms +- 1.3 ms ......................................... fanout_to_subgraph_10x_sync: Mean +- std dev: 51.9 ms +- 0.8 ms ......................................... fanout_to_subgraph_10x_checkpoint: Mean +- std dev: 73.9 ms +- 1.2 ms ......................................... fanout_to_subgraph_10x_checkpoint_sync: Mean +- std dev: 94.1 ms +- 1.2 ms ......................................... fanout_to_subgraph_100x: Mean +- std dev: 617 ms +- 29 ms ......................................... fanout_to_subgraph_100x_sync: Mean +- std dev: 509 ms +- 7 ms ......................................... fanout_to_subgraph_100x_checkpoint: Mean +- std dev: 778 ms +- 33 ms ......................................... fanout_to_subgraph_100x_checkpoint_sync: Mean +- std dev: 934 ms +- 17 ms ......................................... react_agent_10x: Mean +- std dev: 30.5 ms +- 0.6 ms ......................................... react_agent_10x_sync: Mean +- std dev: 22.8 ms +- 0.4 ms ......................................... react_agent_10x_checkpoint: Mean +- std dev: 37.6 ms +- 0.6 ms ......................................... react_agent_10x_checkpoint_sync: Mean +- std dev: 36.6 ms +- 0.4 ms ......................................... react_agent_100x: Mean +- std dev: 337 ms +- 6 ms ......................................... react_agent_100x_sync: Mean +- std dev: 273 ms +- 2 ms ......................................... react_agent_100x_checkpoint: Mean +- std dev: 836 ms +- 5 ms ......................................... react_agent_100x_checkpoint_sync: Mean +- std dev: 828 ms +- 6 ms ......................................... wide_state_25x300: Mean +- std dev: 22.7 ms +- 0.5 ms ......................................... wide_state_25x300_sync: Mean +- std dev: 14.6 ms +- 0.1 ms ......................................... wide_state_25x300_checkpoint: Mean +- std dev: 273 ms +- 13 ms ......................................... wide_state_25x300_checkpoint_sync: Mean +- std dev: 274 ms +- 13 ms ......................................... wide_state_15x600: Mean +- std dev: 26.5 ms +- 0.5 ms ......................................... wide_state_15x600_sync: Mean +- std dev: 16.8 ms +- 0.3 ms ......................................... wide_state_15x600_checkpoint: Mean +- std dev: 458 ms +- 14 ms ......................................... wide_state_15x600_checkpoint_sync: Mean +- std dev: 460 ms +- 14 ms ......................................... wide_state_9x1200: Mean +- std dev: 25.9 ms +- 0.7 ms ......................................... wide_state_9x1200_sync: Mean +- std dev: 16.8 ms +- 0.3 ms ......................................... wide_state_9x1200_checkpoint: Mean +- std dev: 302 ms +- 15 ms ......................................... wide_state_9x1200_checkpoint_sync: Mean +- std dev: 300 ms +- 12 ms

Check notice on line 1 in libs/langgraph/eval.py

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------+---------+-----------------------+ | Benchmark | main | changes | +=========================================+=========+=======================+ | wide_state_15x600_checkpoint | 487 ms | 458 ms: 1.06x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_checkpoint | 317 ms | 302 ms: 1.05x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200 | 26.9 ms | 25.9 ms: 1.04x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_25x300_checkpoint | 283 ms | 273 ms: 1.04x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600_checkpoint_sync | 475 ms | 460 ms: 1.03x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600_sync | 17.3 ms | 16.8 ms: 1.03x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_checkpoint_sync | 307 ms | 300 ms: 1.03x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_9x1200_sync | 17.2 ms | 16.8 ms: 1.02x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_100x_checkpoint_sync | 950 ms | 934 ms: 1.02x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_checkpoint | 75.1 ms | 73.9 ms: 1.02x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_15x600 | 26.8 ms | 26.5 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x | 342 ms | 337 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x | 30.9 ms | 30.5 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_checkpoint_sync | 95.2 ms | 94.1 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_25x300 | 23.0 ms | 22.7 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_checkpoint | 38.1 ms | 37.6 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_checkpoint_sync | 37.0 ms | 36.6 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x | 61.3 ms | 60.7 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_checkpoint_sync | 835 ms | 828 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | wide_state_25x300_sync | 14.7 ms | 14.6 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_sync | 275 ms | 273 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_10x_sync | 52.2 ms | 51.9 ms: 1.01x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_100x_checkpoint | 840 ms | 836 ms: 1.00x faster | +-----------------------------------------+---------+-----------------------+ | react_agent_10x_sync | 22.9 ms | 22.8 ms: 1.00x faster | +-----------------------------------------+---------+-----------------------+ | fanout_to_subgraph_100x_checkpoint | 769 ms | 778 ms: 1.01x slower | +---------------------------------------
from typing import Any, Dict, Iterable, List, TypedDict

from langchain_core.runnables import RunnableConfig

from langgraph.pregel import Pregel
from langgraph.pregel.types import StateSnapshot


class TestCase(TypedDict):
id: str
inputs: Dict[str, Any]
outputs: Dict[str, Any]
metadata: Dict[str, Any]


def _node_test_cases(snapshots: Iterable[StateSnapshot]) -> Dict[str, List[TestCase]]:
test_cases = defaultdict(list)
partials: Dict[str, Dict[str, TestCase]] = defaultdict(dict)
for snapshot in snapshots:
thread_ts = snapshot.config["configurable"]["thread_ts"]
if partials[thread_ts]:
for node, partial in partials[thread_ts].items():
test_cases[node].append(
{
"id": partial["id"],
"inputs": snapshot.values,
"outputs": partial["outputs"],
"metadata": partial["metadata"],
}
)
partials[thread_ts].clear()
if (
(writes := snapshot.metadata["writes"])
and snapshot.parent_config
and isinstance(writes, dict)
and snapshot.metadata["source"] == "loop"
):
parent_thread_ts = snapshot.parent_config["configurable"]["thread_ts"]
for node, outputs in writes.items():
partials[parent_thread_ts][node] = {
"id": snapshot.config["configurable"]["thread_ts"],
"inputs": None,
"outputs": outputs,
"metadata": {
"source": snapshot.metadata["source"],
"step": snapshot.metadata["step"],
**snapshot.config["configurable"],
},
}
return dict(test_cases)


def extract_node_test_cases_from_thread(
graph: Pregel, config: RunnableConfig
) -> Dict[str, List[TestCase]]:
return _node_test_cases(graph.get_state_history(config))


async def aextract_node_test_cases_from_thread(
graph: Pregel, config: RunnableConfig
) -> Dict[str, List[TestCase]]:
return _node_test_cases([s async for s in graph.get_state_history(config)])


def _graph_test_case(snapshots: Iterable[StateSnapshot]) -> TestCase:
test_case = TestCase(
id=None,
inputs={
"input": [],
},
outputs={
"output": [],
"steps": [],
},
)
is_acc_steps = False
for snapshot in snapshots:
if not test_case["id"]:
test_case["id"] = snapshot.config["configurable"]["thread_id"]
if not snapshot.next:
is_acc_steps = True
test_case["outputs"]["output"].append(snapshot.values)
test_case["outputs"]["steps"].append([])
if not test_case.get("metadata"):
test_case["metadata"] = snapshot.config["configurable"]
if (
is_acc_steps
and snapshot.metadata["source"] == "loop"
and snapshot.metadata["writes"]
):
for node in snapshot.metadata["writes"]:
test_case["outputs"]["steps"][-1].append(node)
if is_acc_steps and snapshot.metadata["source"] == "input":
test_case["inputs"]["input"].append(snapshot.metadata["writes"])
test_case["inputs"]["input"].reverse()
test_case["outputs"]["output"].reverse()
test_case["outputs"]["steps"].reverse()
for ss in test_case["outputs"]["steps"]:
ss.reverse()
return test_case


def extract_graph_test_case_from_thread(
graph: Pregel, config: RunnableConfig
) -> TestCase:
return _graph_test_case(graph.get_state_history(config))


async def aextract_graph_test_case_from_thread(
graph: Pregel, config: RunnableConfig
) -> TestCase:
return _graph_test_case([s async for s in graph.get_state_history(config)])

0 comments on commit a783901

Please sign in to comment.