Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev/gfql fixes #623

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ The changelog format is based on [Keep a Changelog](https://keepachangelog.com/e
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and all PyGraphistry-specific breaking changes are explictly noted here.

## [Development]

### Feat

* GFQL chain edge AST node deserialization as more precise `ASTEdge` subclasses

### Fixes

* Hop: Detect #614 of node id column name colliding with edge src/dst id column name and raise `NotImplementedError`
* MyPy: Remove explicit type annotation from Engine

### Docs

* Python remote mode notebook: Fixed engine results
Expand Down
8 changes: 4 additions & 4 deletions graphistry/Engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@


class Engine(Enum):
PANDAS : str = 'pandas'
CUDF : str = 'cudf'
DASK : str = 'dask'
DASK_CUDF : str = 'dask_cudf'
PANDAS = 'pandas'
CUDF = 'cudf'
DASK = 'dask'
DASK_CUDF = 'dask_cudf'

class EngineAbstract(Enum):
PANDAS = Engine.PANDAS.value
Expand Down
60 changes: 59 additions & 1 deletion graphistry/compute/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,22 @@ def __init__(self,
edge_query=edge_query
)

@classmethod
def from_json(cls, d: dict) -> 'ASTEdge':
out = ASTEdgeForward(
edge_match=maybe_filter_dict_from_json(d, 'edge_match'),
hops=d['hops'] if 'hops' in d else None,
to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT,
source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'),
destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'),
source_node_query=d['source_node_query'] if 'source_node_query' in d else None,
destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None,
edge_query=d['edge_query'] if 'edge_query' in d else None,
name=d['name'] if 'name' in d else None
)
out.validate()
return out

e_forward = ASTEdgeForward # noqa: E305

class ASTEdgeReverse(ASTEdge):
Expand Down Expand Up @@ -430,6 +446,22 @@ def __init__(self,
edge_query=edge_query
)

@classmethod
def from_json(cls, d: dict) -> 'ASTEdge':
out = ASTEdgeReverse(
edge_match=maybe_filter_dict_from_json(d, 'edge_match'),
hops=d['hops'] if 'hops' in d else None,
to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT,
source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'),
destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'),
source_node_query=d['source_node_query'] if 'source_node_query' in d else None,
destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None,
edge_query=d['edge_query'] if 'edge_query' in d else None,
name=d['name'] if 'name' in d else None
)
out.validate()
return out

e_reverse = ASTEdgeReverse # noqa: E305

class ASTEdgeUndirected(ASTEdge):
Expand Down Expand Up @@ -460,6 +492,22 @@ def __init__(self,
edge_query=edge_query
)

@classmethod
def from_json(cls, d: dict) -> 'ASTEdge':
out = ASTEdgeUndirected(
edge_match=maybe_filter_dict_from_json(d, 'edge_match'),
hops=d['hops'] if 'hops' in d else None,
to_fixed_point=d['to_fixed_point'] if 'to_fixed_point' in d else DEFAULT_FIXED_POINT,
source_node_match=maybe_filter_dict_from_json(d, 'source_node_match'),
destination_node_match=maybe_filter_dict_from_json(d, 'destination_node_match'),
source_node_query=d['source_node_query'] if 'source_node_query' in d else None,
destination_node_query=d['destination_node_query'] if 'destination_node_query' in d else None,
edge_query=d['edge_query'] if 'edge_query' in d else None,
name=d['name'] if 'name' in d else None
)
out.validate()
return out

e_undirected = ASTEdgeUndirected # noqa: E305
e = ASTEdgeUndirected # noqa: E305

Expand All @@ -472,7 +520,17 @@ def from_json(o: JSONVal) -> Union[ASTNode, ASTEdge]:
if o['type'] == 'Node':
out = ASTNode.from_json(o)
elif o['type'] == 'Edge':
out = ASTEdge.from_json(o)
if 'direction' in o:
if o['direction'] == 'forward':
out = ASTEdgeForward.from_json(o)
elif o['direction'] == 'reverse':
out = ASTEdgeReverse.from_json(o)
elif o['direction'] == 'undirected':
out = ASTEdgeUndirected.from_json(o)
else:
raise ValueError(f'Edge has unknown direction {o["direction"]}')
else:
raise ValueError('Edge missing direction')
else:
raise ValueError(f'Unknown type {o["type"]}')
return out
5 changes: 5 additions & 0 deletions graphistry/compute/hop.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def hop(self: Plottable,
g2 = self.materialize_nodes(engine=EngineAbstract(engine_concrete.value))
logger.debug('materialized node/eddge types: %s, %s', type(g2._nodes), type(g2._edges))

if g2._node == g2._source:
raise NotImplementedError(f'Not supported: Node id column cannot currently have the same name as edge src column: {g2._node}')
if g2._node == g2._destination:
raise NotImplementedError(f'Not supported: Node id column cannot currently have the same name as edge dst column: {g2._node}')

starting_nodes = nodes if nodes is not None else g2._nodes

if g2._edge is None:
Expand Down
48 changes: 45 additions & 3 deletions graphistry/tests/compute/test_chain.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os
import pandas as pd
from graphistry.compute.predicates.is_in import is_in
from graphistry.compute.predicates.numeric import gt
import pytest

from graphistry.compute.ast import ASTNode, ASTEdge, n, e, e_undirected, e_forward
from graphistry.compute.ast import ASTEdgeUndirected, ASTNode, ASTEdge, n, e, e_undirected, e_forward
from graphistry.compute.chain import Chain
from graphistry.compute.predicates.is_in import IsIn, is_in
from graphistry.compute.predicates.numeric import gt
from graphistry.tests.test_compute import CGFull


Expand Down Expand Up @@ -298,6 +298,25 @@ def test_chain_serialization_pred():
o2 = d.to_json()
assert o == o2

def test_chain_serialize_pred_is_in():

#from graphistry.compute.chain import Chain
#from graphistry import e_undirected, is_in
o = Chain([
e_undirected(
hops=1,
edge_match={"source": is_in(options=[
"Oakville Square",
"Maplewood Square"
])})
]).to_json()
d = Chain.from_json(o)
assert isinstance(d.chain[0], ASTEdgeUndirected), f'got: {type(d.chain[0])}'
assert d.chain[0].direction == 'undirected'
assert d.chain[0].hops == 1
assert isinstance(d.chain[0].edge_match['source'], IsIn)
assert d.chain[0].edge_match['source'].options == ['Oakville Square', 'Maplewood Square']

def test_chain_simple_cudf_pd():
nodes_df = pd.DataFrame({'id': [0, 1, 2], 'label': ['a', 'b', 'c']})
edges_df = pd.DataFrame({'src': [0, 1, 2], 'dst': [1, 2, 0]})
Expand Down Expand Up @@ -416,3 +435,26 @@ def test_preds_more_pd_2():
)
assert len(g2._nodes) == 2
assert set(g2._nodes[g._node].tolist()) == set(['b2', 'c2'])


def test_chain_binding_reuse():
edges_df = pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']})
nodes1_df = pd.DataFrame({'v': ['a', 'b', 'c']})
nodes2_df = pd.DataFrame({'s': ['a', 'b', 'c']})
nodes3_df = pd.DataFrame({'d': ['a', 'b', 'c']})

g1 = CGFull().nodes(nodes1_df, 'v').edges(edges_df, 's', 'd')
g2 = CGFull().nodes(nodes2_df, 's').edges(edges_df, 's', 'd')
g3 = CGFull().nodes(nodes3_df, 'd').edges(edges_df, 's', 'd')

try:
g1_hop = g1.chain([n(), e(), n()])
g2_hop = g2.chain([n(), e(), n()])
g3_hop = g3.chain([n(), e(), n()])
except NotImplementedError:
return

assert g1_hop._nodes.shape == g2_hop._nodes.shape
assert g1_hop._edges.shape == g2_hop._edges.shape
assert g1_hop._nodes.shape == g3_hop._nodes.shape
assert g1_hop._edges.shape == g3_hop._edges.shape
22 changes: 22 additions & 0 deletions graphistry/tests/compute/test_hop.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,28 @@ def test_hop_predicates_fail_destination_forward(self, g_long_forwards_chain: CG
assert g2._edges[['s', 'd']].sort_values(['s', 'd']).to_dict(orient='records') == []


def test_hop_binding_reuse():
edges_df = pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']})
nodes1_df = pd.DataFrame({'v': ['a', 'b', 'c']})
nodes2_df = pd.DataFrame({'s': ['a', 'b', 'c']})
nodes3_df = pd.DataFrame({'d': ['a', 'b', 'c']})

g1 = CGFull().nodes(nodes1_df, 'v').edges(edges_df, 's', 'd')
g2 = CGFull().nodes(nodes2_df, 's').edges(edges_df, 's', 'd')
g3 = CGFull().nodes(nodes3_df, 'd').edges(edges_df, 's', 'd')

try:
g1_hop = g1.hop()
g2_hop = g2.hop()
g3_hop = g3.hop()
except NotImplementedError:
return

assert g1_hop._nodes.shape == g2_hop._nodes.shape
assert g1_hop._edges.shape == g2_hop._edges.shape
assert g1_hop._nodes.shape == g3_hop._nodes.shape
assert g1_hop._edges.shape == g3_hop._edges.shape

def test_hop_simple_cudf_pd():
nodes_df = pd.DataFrame({'id': [0, 1, 2], 'label': ['a', 'b', 'c']})
edges_df = pd.DataFrame({'src': [0, 1, 2], 'dst': [1, 2, 0]})
Expand Down
Loading