-
Notifications
You must be signed in to change notification settings - Fork 210
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(graph validation): add graph validation and tests
- Loading branch information
1 parent
7f5b2db
commit 25b35ef
Showing
2 changed files
with
107 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from graphistry.validate.validate_graph import validate_graph | ||
import graphistry | ||
import pandas as pd | ||
|
||
|
||
def test_validate_graph_good(): | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': ['a', 'b', 'c'], 'name': ['A', 'B', 'C']}), node='id') | ||
assert (validate_graph(g) is True) | ||
|
||
|
||
def test_validate_graph_undefined_nodeid(): | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': ['a', 'b', 'c'], 'name': ['A', 'B', 'C']})) | ||
assert (validate_graph(g) is False) | ||
|
||
|
||
def test_validate_graph_duplicate_nodeid(): | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': ['a','a', 'b', 'c'], 'name': ['A','A2', 'B', 'C']}), node='id') | ||
assert (validate_graph(g) is False) | ||
|
||
|
||
def test_validate_graph_missing_nodes(): | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']})) | ||
assert (validate_graph(g) is False) | ||
|
||
|
||
def test_validate_graph_nan_nodes(): | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': [None, 'b', 'c'], 'name': ['A', 'B', 'C']}), node='id') | ||
assert (validate_graph(g) is False) | ||
|
||
|
||
def test_validate_graph_missing_src_node(): | ||
# Only returns warning | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': ['b', 'c'], 'name': ['B', 'C']}), node='id') | ||
assert (validate_graph(g) is True) | ||
|
||
|
||
def test_validate_graph_missing_dst_node(): | ||
# Only returns warning | ||
g = graphistry.edges(pd.DataFrame({'s': ['a', 'b'], 'd': ['b', 'c']}), 's', 'd').nodes( | ||
pd.DataFrame({'id': ['a','b', ], 'name': ['A', 'B']}), node='id') | ||
assert (validate_graph(g) is True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
def check_node_dataframe_exists(g, verbose=True): | ||
if g._nodes is None: | ||
if verbose: | ||
print("Warning: graph was created with only edges. Skipping Node ID check if Node IDs match edge IDs. Use g2 = g.materialize_nodes() to force node df creation. Exiting.") | ||
return False | ||
return True | ||
|
||
|
||
def check_node_id_defined(g, verbose=True): | ||
if g._node is None: | ||
if verbose: | ||
print("Invalid graph: Missing Node ID. Did you forget to specify the node ID in the .nodes() function? Exiting.") | ||
return False | ||
return True | ||
|
||
|
||
def check_nan_node_ids(g, verbose=True): | ||
if g._nodes[g._node].isnull().any(): | ||
if verbose: | ||
print("Invalid graph: Contains NaN Node IDs.") | ||
return False | ||
return True | ||
|
||
|
||
def check_duplicate_node_ids(g, verbose=True): | ||
if g._nodes[g._node].duplicated().any(): | ||
if verbose: | ||
print("Invalid graph: Contains duplicate Node IDs.") | ||
return False | ||
return True | ||
|
||
|
||
def check_edge_sources_exist_in_nodes(g, verbose=True): | ||
if not g._edges[g._source].isin(g._nodes[g._node]).all(): | ||
if verbose: | ||
print("Warning: Contains source edge IDs that do not exist in the node DataFrame. This can cause unexpected results.") | ||
return True | ||
|
||
|
||
def check_edge_destinations_exist_in_nodes(g, verbose=True): | ||
if not g._edges[g._destination].isin(g._nodes[g._node]).all(): | ||
if verbose: | ||
print("Warning: Contains destination edge IDs that do not exist in the node DataFrame. This can cause unexpected results.") | ||
return True | ||
|
||
|
||
def validate_graph(g, verbose=True): | ||
if not check_node_dataframe_exists(g, verbose): | ||
return False | ||
if not check_node_id_defined(g, verbose): | ||
return False | ||
if not check_nan_node_ids(g, verbose): | ||
return False | ||
if not check_duplicate_node_ids(g, verbose): | ||
return False | ||
check_edge_sources_exist_in_nodes(g, verbose) # Warnings only | ||
check_edge_destinations_exist_in_nodes(g, verbose) # Warnings only | ||
|
||
if verbose: | ||
print("Graph is valid.") | ||
return True |