Skip to content

Commit

Permalink
fix(cugraph): breaking - handle 26.10
Browse files Browse the repository at this point in the history
  • Loading branch information
lmeyerov committed Feb 3, 2025
1 parent afff91d commit dfd7d35
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 19 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Development]

### Breaking

* `from_cugraph` returns using the src/dst bindings of `cugraph.Graph` object instead of base `Plottable`

### Feat

* Switch to `skrub` for feature engineering
* More AI methods support GPU path
* Support cugraph 26.10+

## [0.35.4 - 2024-12-28]

### Fixes
Expand Down
41 changes: 35 additions & 6 deletions graphistry/plugins/cugraph.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
from typing import Any, Dict, List, Optional, Union
import pandas as pd
import warnings

from graphistry.constants import NODE
from graphistry.Engine import EngineAbstract
from graphistry.Plottable import Plottable
Expand All @@ -8,7 +10,6 @@
logger = setup_logger(__name__)



#import logging
#logger.setLevel(logging.DEBUG)

Expand Down Expand Up @@ -38,7 +39,7 @@ def from_cugraph(self,
) -> Plottable:
"""
If bound IDs, use the same IDs in the returned graph.
Take input cugraph.Graph object and load in data and bindings (source, destination, edge_weight)
If non-empty nodes/edges, instead of returning G's topology, use existing topology and merge in G's attributes
Expand All @@ -50,8 +51,28 @@ def from_cugraph(self,

####

src = self._source or SRC_CUGRAPH
dst = self._destination or DST_CUGRAPH
if hasattr(G, 'source_columns') and G.source_columns is not None:
s = G.source_columns
if isinstance(s, list):
s = s[0]
assert isinstance(s, str), "Found G.source_columns, and expected it to be a string or a list of one string, but was: %s" % G.souurce_columns
if self._source is not None and self._source != s:
warnings.warn('Switching g source column name to G source column name')
else:
s = self._source or SRC_CUGRAPH
src = s

if hasattr(G, 'destination_columns') and G.destination_columns is not None:
d = G.destination_columns
if isinstance(d, list):
d = d[0]
assert isinstance(d, str), "Found G.destination_columns, and expected it to be a string or a list of one string, but was: %s" % G.destination_columns
if self._destination is not None and self._destination != d:
warnings.warn('Switching g destination column name to G destination column name')
else:
d = self._destination or DST_CUGRAPH
dst = d

edges_gdf = G.view_edge_list() # src, dst

if g._nodes is not None and load_nodes:
Expand Down Expand Up @@ -326,7 +347,15 @@ def compute_cugraph_core(
out = out[0]
if out_col is not None:
raise ValueError('Graph returned, but out_col was specified')
return from_cugraph(self, out, load_nodes=False)
self2 = self
if self._source != out.source_columns:
logger.debug('Switching g source column name to G source column name to work around cugraph inconsistency')
if out.source_columns == 'src':
self2 = self.edges(self._edges.rename(columns={self._source: 'src', self._destination: 'dst'}), 'src', 'dst')
res = from_cugraph(self2, out, load_nodes=False)
if not (self2 is self):
res = res.edges(self._edges, self._source, self._destination)
return res

raise ValueError('Unsupported algorithm: %s', alg)

Expand Down
28 changes: 15 additions & 13 deletions graphistry/tests/plugins/test_cugraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def test_minimal_edges(self):
g = graphistry.from_cugraph(G, load_nodes=False)
assert g._nodes is None and g._node is None
assert g._source is not None and g._destination is not None
assert g._source == SRC_CUGRAPH
assert g._destination == DST_CUGRAPH
assert g._source == 'a'
assert g._destination == 'b'
assert g._edges is not None
assert isinstance(g._edges, cudf.DataFrame)
assert len(g._edges) == len(edges)
Expand All @@ -88,14 +88,14 @@ def test_minimal_attributed_edges(self):
assert g._nodes is None and g._node is None
assert len(g._edges) == len(edges)
assert g._source is not None and g._destination is not None
assert g._source == SRC_CUGRAPH
assert g._destination == DST_CUGRAPH
assert g._source == 'a'
assert g._destination == 'b'
assert g._edges is not None
assert isinstance(g._edges, cudf.DataFrame)
assert len(g._edges) == len(edges)
assert len(g._edges[g._source].dropna()) == len(edges)
assert len(g._edges[g._destination].dropna()) == len(edges)
assert (g._edges['weights'].to_pandas() == edges_w['w']).all()
assert (g._edges['w'].to_pandas() == edges_w['w']).all()

def test_merge_existing_edges_pandas(self):

Expand Down Expand Up @@ -191,8 +191,8 @@ def test_minimal_edges(self):
logger.debug('G: %s', G)
g2 = graphistry.from_cugraph(G)
assert g2._edges.shape == g._edges.shape
assert g2._source == SRC_CUGRAPH
assert g2._destination == DST_CUGRAPH
assert g2._source == g._source
assert g2._destination == g._destination
assert g2._edge is None
assert g2._nodes is None and g2._node is None
#logger.debug('g2._nodes: %s', g2._nodes)
Expand Down Expand Up @@ -249,8 +249,8 @@ def test_minimal_edges_str(self):
logger.debug('G: %s', G)
g2 = graphistry.from_cugraph(G)
assert g2._edges.shape == g._edges.shape
assert g2._source == SRC_CUGRAPH
assert g2._destination == DST_CUGRAPH
assert g2._source == g._source
assert g2._destination == g._destination
assert g2._edge is None
assert (
g2._edges
Expand Down Expand Up @@ -283,8 +283,8 @@ def test_nodes(self):
logger.debug('ig: %s', G)
g2 = graphistry.from_cugraph(G).materialize_nodes()
assert g2._edges.shape == g._edges.shape
assert g2._source == SRC_CUGRAPH
assert g2._destination == DST_CUGRAPH
assert g2._source == g._source
assert g2._destination == g._destination
assert g2._edge is None
assert g2._node == 'id'
logger.debug('g2._nodes: %s', g2._nodes)
Expand Down Expand Up @@ -336,8 +336,8 @@ def test_drop_nodes(self):
logger.debug('G: %s', G)
g2 = graphistry.from_cugraph(G).materialize_nodes()
assert g2._edges.shape == g._edges.shape
assert g2._source == SRC_CUGRAPH
assert g2._destination == DST_CUGRAPH
assert g2._source == g._source
assert g2._destination == g._destination
assert g2._edge is None
logger.debug('g2._nodes: %s', g2._nodes)
logger.debug('other: %s', nodes)
Expand Down Expand Up @@ -604,6 +604,8 @@ def test_all_calls(self):

edges3_gdf = cudf.from_pandas(edges3_df)
g = graphistry.edges(edges3_gdf, 'a', 'b').bind(edge_weight='f').materialize_nodes()
assert g._source == 'a'
assert g._destination == 'b'
for alg in [x for x in compute_algs]:
if alg not in skiplist:
opts = overrides[alg] if alg in overrides else {}
Expand Down

0 comments on commit dfd7d35

Please sign in to comment.