Skip to content

Commit

Permalink
feat(graph validation): add graph validation and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
defendable-sindre committed Jan 8, 2025
1 parent 7f5b2db commit 25b35ef
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 0 deletions.
46 changes: 46 additions & 0 deletions graphistry/tests/validate/test_validate_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from graphistry.validate.validate_graph import validate_graph
import graphistry
import pandas as pd


def test_validate_graph_good():
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': ['a', 'b', 'c'], 'name': ['A', 'B', 'C']}), node='id')
assert (validate_graph(g) is True)


def test_validate_graph_undefined_nodeid():
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': ['a', 'b', 'c'], 'name': ['A', 'B', 'C']}))
assert (validate_graph(g) is False)


def test_validate_graph_duplicate_nodeid():
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': ['a','a', 'b', 'c'], 'name': ['A','A2', 'B', 'C']}), node='id')
assert (validate_graph(g) is False)


def test_validate_graph_missing_nodes():
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}))
assert (validate_graph(g) is False)


def test_validate_graph_nan_nodes():
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': [None, 'b', 'c'], 'name': ['A', 'B', 'C']}), node='id')
assert (validate_graph(g) is False)


def test_validate_graph_missing_src_node():
# Only returns warning
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': ['b', 'c'], 'name': ['B', 'C']}), node='id')
assert (validate_graph(g) is True)


def test_validate_graph_missing_dst_node():
# Only returns warning
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes(
pd.DataFrame({'id': ['a','b', ], 'name': ['A', 'B']}), node='id')
assert (validate_graph(g) is True)
61 changes: 61 additions & 0 deletions graphistry/validate/validate_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
def check_node_dataframe_exists(g, verbose=True):
if g._nodes is None:
if verbose:
print("Warning: graph was created with only edges. Skipping Node ID check if Node IDs match edge IDs. Use g2 = g.materialize_nodes() to force node df creation. Exiting.")
return False
return True


def check_node_id_defined(g, verbose=True):
if g._node is None:
if verbose:
print("Invalid graph: Missing Node ID. Did you forget to specify the node ID in the .nodes() function? Exiting.")
return False
return True


def check_nan_node_ids(g, verbose=True):
if g._nodes[g._node].isnull().any():
if verbose:
print("Invalid graph: Contains NaN Node IDs.")
return False
return True


def check_duplicate_node_ids(g, verbose=True):
if g._nodes[g._node].duplicated().any():
if verbose:
print("Invalid graph: Contains duplicate Node IDs.")
return False
return True


def check_edge_sources_exist_in_nodes(g, verbose=True):
if not g._edges[g._source].isin(g._nodes[g._node]).all():
if verbose:
print("Warning: Contains source edge IDs that do not exist in the node DataFrame. This can cause unexpected results.")
return True


def check_edge_destinations_exist_in_nodes(g, verbose=True):
if not g._edges[g._destination].isin(g._nodes[g._node]).all():
if verbose:
print("Warning: Contains destination edge IDs that do not exist in the node DataFrame. This can cause unexpected results.")
return True


def validate_graph(g, verbose=True):
if not check_node_dataframe_exists(g, verbose):
return False
if not check_node_id_defined(g, verbose):
return False
if not check_nan_node_ids(g, verbose):
return False
if not check_duplicate_node_ids(g, verbose):
return False
check_edge_sources_exist_in_nodes(g, verbose) # Warnings only
check_edge_destinations_exist_in_nodes(g, verbose) # Warnings only

if verbose:
print("Graph is valid.")
return True

0 comments on commit 25b35ef

Please sign in to comment.