From fdeca4da6a0e1d272ed298b8dce3ff49795b77d8 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 23 Dec 2024 21:36:06 -0800 Subject: [PATCH 1/5] fix(mypy): enum vals are unannotated --- graphistry/Engine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/graphistry/Engine.py b/graphistry/Engine.py index 57b0a3cbce..1f962b992b 100644 --- a/graphistry/Engine.py +++ b/graphistry/Engine.py @@ -7,10 +7,10 @@ class Engine(Enum): - PANDAS : str = 'pandas' - CUDF : str = 'cudf' - DASK : str = 'dask' - DASK_CUDF : str = 'dask_cudf' + PANDAS = 'pandas' + CUDF = 'cudf' + DASK = 'dask' + DASK_CUDF = 'dask_cudf' class EngineAbstract(Enum): PANDAS = Engine.PANDAS.value From 63db90b3991034c53910901d65d90e0d36652f4c Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 23 Dec 2024 21:38:59 -0800 Subject: [PATCH 2/5] feat(gfql ast): type edge by direction --- graphistry/compute/ast.py | 60 +++++++++++++++++++++++++- graphistry/tests/compute/test_chain.py | 25 +++++++++-- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/graphistry/compute/ast.py b/graphistry/compute/ast.py index 22683c76dd..59601cf474 100644 --- a/graphistry/compute/ast.py +++ b/graphistry/compute/ast.py @@ -400,6 +400,22 @@ def __init__(self, edge_query=edge_query ) + @classmethod + def from_json(cls, d: dict) -> 'ASTEdge': + out = ASTEdgeForward( + edge_match=maybe_filter_dict_from_json(d, 'edge_match'), + hops=d['hops'] if 'hops' in d else None, + to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT, + source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'), + destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'), + source_node_query=d['source_node_query'] if 'source_node_query' in d else None, + destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None, + edge_query=d['edge_query'] if 'edge_query' in d else None, + name=d['name'] if 'name' in d else None + ) + out.validate() + return out + e_forward = ASTEdgeForward # noqa: E305 class ASTEdgeReverse(ASTEdge): @@ -430,6 +446,22 @@ def __init__(self, edge_query=edge_query ) + @classmethod + def from_json(cls, d: dict) -> 'ASTEdge': + out = ASTEdgeReverse( + edge_match=maybe_filter_dict_from_json(d, 'edge_match'), + hops=d['hops'] if 'hops' in d else None, + to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT, + source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'), + destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'), + source_node_query=d['source_node_query'] if 'source_node_query' in d else None, + destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None, + edge_query=d['edge_query'] if 'edge_query' in d else None, + name=d['name'] if 'name' in d else None + ) + out.validate() + return out + e_reverse = ASTEdgeReverse # noqa: E305 class ASTEdgeUndirected(ASTEdge): @@ -460,6 +492,22 @@ def __init__(self, edge_query=edge_query ) + @classmethod + def from_json(cls, d: dict) -> 'ASTEdge': + out = ASTEdgeUndirected( + edge_match=maybe_filter_dict_from_json(d, 'edge_match'), + hops=d['hops'] if 'hops' in d else None, + to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT, + source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'), + destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'), + source_node_query=d['source_node_query'] if 'source_node_query' in d else None, + destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None, + edge_query=d['edge_query'] if 'edge_query' in d else None, + name=d['name'] if 'name' in d else None + ) + out.validate() + return out + e_undirected = ASTEdgeUndirected # noqa: E305 e = ASTEdgeUndirected # noqa: E305 @@ -472,7 +520,17 @@ def from_json(o: JSONVal) -> Union[ASTNode, ASTEdge]: if o['type'] == 'Node': out = ASTNode.from_json(o) elif o['type'] == 'Edge': - out = ASTEdge.from_json(o) + if 'direction' in o: + if o['direction'] == 'forward': + out = ASTEdgeForward.from_json(o) + elif o['direction'] == 'reverse': + out = ASTEdgeReverse.from_json(o) + elif o['direction'] == 'undirected': + out = ASTEdgeUndirected.from_json(o) + else: + raise ValueError(f'Edge has unknown direction {o["direction"]}') + else: + raise ValueError('Edge missing direction') else: raise ValueError(f'Unknown type {o["type"]}') return out diff --git a/graphistry/tests/compute/test_chain.py b/graphistry/tests/compute/test_chain.py index d38e0af1a2..9b34ae4cb3 100644 --- a/graphistry/tests/compute/test_chain.py +++ b/graphistry/tests/compute/test_chain.py @@ -1,11 +1,11 @@ import os import pandas as pd -from graphistry.compute.predicates.is_in import is_in -from graphistry.compute.predicates.numeric import gt import pytest -from graphistry.compute.ast import ASTNode, ASTEdge, n, e, e_undirected, e_forward +from graphistry.compute.ast import ASTEdgeUndirected, ASTNode, ASTEdge, n, e, e_undirected, e_forward from graphistry.compute.chain import Chain +from graphistry.compute.predicates.is_in import IsIn, is_in +from graphistry.compute.predicates.numeric import gt from graphistry.tests.test_compute import CGFull @@ -298,6 +298,25 @@ def test_chain_serialization_pred(): o2 = d.to_json() assert o == o2 +def test_chain_serialize_pred_is_in(): + + #from graphistry.compute.chain import Chain + #from graphistry import e_undirected, is_in + o = Chain([ + e_undirected( + hops=1, + edge_match={"source": is_in(options=[ + "Oakville Square", + "Maplewood Square" + ])}) + ]).to_json() + d = Chain.from_json(o) + assert isinstance(d.chain[0], ASTEdgeUndirected), f'got: {type(d.chain[0])}' + assert d.chain[0].direction == 'undirected' + assert d.chain[0].hops == 1 + assert isinstance(d.chain[0].edge_match['source'], IsIn) + assert d.chain[0].edge_match['source'].options == ['Oakville Square', 'Maplewood Square'] + def test_chain_simple_cudf_pd(): nodes_df = pd.DataFrame({'id': [0, 1, 2], 'label': ['a', 'b', 'c']}) edges_df = pd.DataFrame({'src': [0, 1, 2], 'dst': [1, 2, 0]}) From 54faed3ae1f9c584937a75ef543eae22faca4d0d Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 24 Dec 2024 01:01:45 -0800 Subject: [PATCH 3/5] fix(chain): flag unhandled binding collision #614 --- CHANGELOG.md | 5 +++++ graphistry/compute/hop.py | 5 +++++ graphistry/tests/compute/test_hop.py | 22 ++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 821d106096..70f78251c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The changelog format is based on [Keep a Changelog](https://keepachangelog.com/e This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and all PyGraphistry-specific breaking changes are explictly noted here. ## [Development] + +### Fixes + +* Hop: Detect #614 of node id column name colliding with edge src/dst id column name and raise `NotImplementedError` + ### Docs * Python remote mode notebook: Fixed engine results diff --git a/graphistry/compute/hop.py b/graphistry/compute/hop.py index e14da1aca7..0610514944 100644 --- a/graphistry/compute/hop.py +++ b/graphistry/compute/hop.py @@ -116,6 +116,11 @@ def hop(self: Plottable, g2 = self.materialize_nodes(engine=EngineAbstract(engine_concrete.value)) logger.debug('materialized node/eddge types: %s, %s', type(g2._nodes), type(g2._edges)) + if g2._node == g2._source: + raise NotImplementedError(f'Not supported: Node id column cannot currently have the same name as edge src column: {g2._node}') + if g2._node == g2._destination: + raise NotImplementedError(f'Not supported: Node id column cannot currently have the same name as edge dst column: {g2._node}') + starting_nodes = nodes if nodes is not None else g2._nodes if g2._edge is None: diff --git a/graphistry/tests/compute/test_hop.py b/graphistry/tests/compute/test_hop.py index 232421c88d..ce1c82d417 100644 --- a/graphistry/tests/compute/test_hop.py +++ b/graphistry/tests/compute/test_hop.py @@ -415,6 +415,28 @@ def test_hop_predicates_fail_destination_forward(self, g_long_forwards_chain: CG assert g2._edges[['s', 'd']].sort_values(['s', 'd']).to_dict(orient='records') == [] +def test_hop_binding_reuse(): + edges_df = pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}) + nodes1_df = pd.DataFrame({'v': ['a', 'b', 'c']}) + nodes2_df = pd.DataFrame({'s': ['a', 'b', 'c']}) + nodes3_df = pd.DataFrame({'d': ['a', 'b', 'c']}) + + g1 = CGFull().nodes(nodes1_df, 'v').edges(edges_df, 's', 'd') + g2 = CGFull().nodes(nodes2_df, 's').edges(edges_df, 's', 'd') + g3 = CGFull().nodes(nodes3_df, 'd').edges(edges_df, 's', 'd') + + try: + g1_hop = g1.hop() + g2_hop = g2.hop() + g3_hop = g3.hop() + except NotImplementedError: + return + + assert g1_hop._nodes.shape == g2_hop._nodes.shape + assert g1_hop._edges.shape == g2_hop._edges.shape + assert g1_hop._nodes.shape == g3_hop._nodes.shape + assert g1_hop._edges.shape == g3_hop._edges.shape + def test_hop_simple_cudf_pd(): nodes_df = pd.DataFrame({'id': [0, 1, 2], 'label': ['a', 'b', 'c']}) edges_df = pd.DataFrame({'src': [0, 1, 2], 'dst': [1, 2, 0]}) From 4a45a28d1bf7900b6fd110158894f28c18a05df4 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 24 Dec 2024 01:04:47 -0800 Subject: [PATCH 4/5] docs(changelog) --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70f78251c0..1242fd360a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Development] +### Feat + +* GFQL chain edge AST node deserialization as more precise `ASTEdge` subclasses + ### Fixes * Hop: Detect #614 of node id column name colliding with edge src/dst id column name and raise `NotImplementedError` +* MyPy: Remove explicit type annotation from Engine ### Docs From 501e51de8c2fbc2d391855c467ae066a4129629d Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 24 Dec 2024 01:21:44 -0800 Subject: [PATCH 5/5] test(gfql): 614 aliasing bug test --- graphistry/tests/compute/test_chain.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/graphistry/tests/compute/test_chain.py b/graphistry/tests/compute/test_chain.py index 9b34ae4cb3..1487ff0934 100644 --- a/graphistry/tests/compute/test_chain.py +++ b/graphistry/tests/compute/test_chain.py @@ -435,3 +435,26 @@ def test_preds_more_pd_2(): ) assert len(g2._nodes) == 2 assert set(g2._nodes[g._node].tolist()) == set(['b2', 'c2']) + + +def test_chain_binding_reuse(): + edges_df = pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}) + nodes1_df = pd.DataFrame({'v': ['a', 'b', 'c']}) + nodes2_df = pd.DataFrame({'s': ['a', 'b', 'c']}) + nodes3_df = pd.DataFrame({'d': ['a', 'b', 'c']}) + + g1 = CGFull().nodes(nodes1_df, 'v').edges(edges_df, 's', 'd') + g2 = CGFull().nodes(nodes2_df, 's').edges(edges_df, 's', 'd') + g3 = CGFull().nodes(nodes3_df, 'd').edges(edges_df, 's', 'd') + + try: + g1_hop = g1.chain([n(), e(), n()]) + g2_hop = g2.chain([n(), e(), n()]) + g3_hop = g3.chain([n(), e(), n()]) + except NotImplementedError: + return + + assert g1_hop._nodes.shape == g2_hop._nodes.shape + assert g1_hop._edges.shape == g2_hop._edges.shape + assert g1_hop._nodes.shape == g3_hop._nodes.shape + assert g1_hop._edges.shape == g3_hop._edges.shape