Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for google spanner graph #622

Merged
merged 34 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f6ac839
(feature): new support for Google Spanner Graph
DataBoyTX Dec 21, 2024
fbfff6c
renamed spannergraph file
DataBoyTX Dec 24, 2024
53bd5ab
changed dir for spannergraph.py, added module in setup.py
DataBoyTX Jan 10, 2025
5d23ae3
added lazy imports
DataBoyTX Jan 10, 2025
c5fc0d4
removed import from Plotterbase.py
DataBoyTX Jan 10, 2025
5a5d211
fix pydocs
DataBoyTX Jan 13, 2025
af75ce9
remove database_clients dir, moved to plugins/
DataBoyTX Jan 13, 2025
ab3e5e7
various changes to pass in spanner config to register()
DataBoyTX Jan 15, 2025
d2f6b57
added debugging
DataBoyTX Jan 16, 2025
6c7324a
added demo notebook, debug output
DataBoyTX Jan 16, 2025
4e76820
minor changes
DataBoyTX Jan 16, 2025
e000018
various changes for error handling, imports and pydocs
DataBoyTX Jan 16, 2025
70a038d
fixed register and uncomment gcloud
DataBoyTX Jan 16, 2025
f374279
fixed typo in register
DataBoyTX Jan 16, 2025
116334a
added spanner_query_to_df and other fixes from PR comments
DataBoyTX Jan 17, 2025
ed364ee
updated notebook with more examples
DataBoyTX Jan 17, 2025
8a2df08
fix lint issues
DataBoyTX Jan 17, 2025
5e15cbe
fix linting errors
DataBoyTX Jan 17, 2025
4a8e244
fix linting errors
DataBoyTX Jan 17, 2025
46daae6
Merge branch 'master' into tcook-add-spanner
DataBoyTX Jan 17, 2025
fecc260
fix more lint issue
DataBoyTX Jan 17, 2025
ddbfc12
fix more lint issue
DataBoyTX Jan 17, 2025
886f23d
fix more lint issue
DataBoyTX Jan 17, 2025
0d6d2f3
fix more lint issue
DataBoyTX Jan 17, 2025
63598fd
updated notebook with CTA and other docs
DataBoyTX Jan 20, 2025
c8941a7
fix for readthedocs markdown
DataBoyTX Jan 20, 2025
1611c0e
updates from PR comments
DataBoyTX Jan 22, 2025
6491cdc
removed None assignment for _spannergraph - per PR comments
DataBoyTX Jan 22, 2025
b78c56b
changes to pass Plottable dynamically to SpannerGraph.gql_to_g
DataBoyTX Jan 22, 2025
64cdc8d
various PR review changes
DataBoyTX Jan 22, 2025
bc71b2b
fix lint error and add plot output back to notebook
DataBoyTX Jan 22, 2025
cc0c88c
fix lint error and add plot output back to notebook
DataBoyTX Jan 22, 2025
1bbe1b1
fix lint error about blank line at end of file
DataBoyTX Jan 22, 2025
c9dc546
remove stray comment
DataBoyTX Jan 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,173 changes: 1,173 additions & 0 deletions demos/demos_databases_apis/spanner/google_spanner_finance_graph.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/source/plugins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Graph
* `Gremlin <https://tinkerpop.apache.org>`_ (:class:`graphistry.gremlin.GremlinMixin`)
* `Memgraph <https://memgraph.com>`_ (:meth:`graphistry.PlotterBase.PlotterBase.cypher`)
* `Neo4j <https://neo4j.com>`_ (:meth:`graphistry.PlotterBase.PlotterBase.cypher`)
* `Google Spanner Graph <https://cloud.google.com/spanner/docs/graph/overview>`_ (:meth:`graphistry.PlotterBase.PlotterBase.spanner_gql_to_g`)
* `TigerGraph <https://www.tigergraph.com>`_ (:meth:`graphistry.PlotterBase.PlotterBase.gsql`)
* `Trovares <https://trovares.com>`_

Expand Down
3 changes: 2 additions & 1 deletion graphistry/Plottable.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class Plottable(object):
_complex_encodings : dict
_bolt_driver : Any
_tigergraph : Any

_spannergraph: Any

_dataset_id: Optional[str]
_url: Optional[str]
_nodes_file_id: Optional[str]
Expand Down
133 changes: 133 additions & 0 deletions graphistry/PlotterBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
end_node_id_key,
to_bolt_driver)


from .arrow_uploader import ArrowUploader
from .nodexlistry import NodeXLGraphistry
from .tigeristry import Tigeristry
Expand Down Expand Up @@ -176,6 +177,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
# Integrations
self._bolt_driver : Any = None
self._tigergraph : Any = None
self._spannergraph: Any

# feature engineering
self._node_embedding = None
Expand Down Expand Up @@ -2269,7 +2271,31 @@ def bolt(self, driver):
res = copy.copy(self)
res._bolt_driver = to_bolt_driver(driver)
return res

def spanner_init(self: Plottable, spanner_config: Dict[str, str]) -> Plottable:
"""
Initializes a SpannerGraph object with the provided configuration and connects to the instance db

spanner_config dict must contain the include the following keys, credentials_file is optional:
- "project_id": The GCP project ID.
- "instance_id": The Spanner instance ID.
- "database_id": The Spanner database ID.
- "credentials_file": json file API key for service accounts

:param spanner_config A dictionary containing the Spanner configuration.
:type (Dict[str, str])
:return: Plottable with a Spanner connection
:rtype: Plottable
:raises ValueError: If any of the required keys in `spanner_config` are missing or have invalid values.

"""
from .plugins.spannergraph import SpannerGraph

res = copy.copy(self)

res._spannergraph = SpannerGraph(res, spanner_config)
logger.debug("Created SpannerGraph object: {res._spannergraph}")
return res
DataBoyTX marked this conversation as resolved.
Show resolved Hide resolved

def infer_labels(self):
"""
Expand Down Expand Up @@ -2458,6 +2484,113 @@ def cypher(self, query: str, params: Dict[str, Any] = {}) -> Plottable:
)\
.nodes(nodes)\
.edges(edges)


def spanner_gql_to_g(self: Plottable, query: str) -> Plottable:
"""
Submit GQL query to google spanner graph database and return Plottable with nodes and edges populated

GQL must be a path query with a syntax similar to the following, it's recommended to return the path with
SAFE_TO_JSON(p), TO_JSON() can also be used, but not recommend. LIMIT is optional, but for large graphs with millions
of edges or more, it's best to filter either in the query or use LIMIT so as not to exhaust GPU memory.

query=f'''GRAPH my_graph
MATCH p = (a)-[b]->(c) LIMIT 100000 return SAFE_TO_JSON(p) as path'''

:param query: GQL query string
:type query: Str

:returns: Plottable with the results of GQL query as a graph
:rtype: Plottable

**Example: calling spanner_gql_to_g
::

import graphistry

# credentials_file is optional, all others are required
SPANNER_CONF = { "project_id": PROJECT_ID,
"instance_id": INSTANCE_ID,
"database_id": DATABASE_ID,
"credentials_file": CREDENTIALS_FILE }

graphistry.register(..., spanner_config=SPANNER_CONF)

query=f'''GRAPH my_graph
MATCH p = (a)-[b]->(c) LIMIT 100000 return SAFE_TO_JSON(p) as path'''

g = graphistry.spanner_gql_to_g(query)

g.plot()

"""
from .pygraphistry import PyGraphistry
from .plugins.spannergraph import SpannerGraph

res = copy.copy(self)

if not hasattr(res, '_spannergraph'):
spanner_config = PyGraphistry._config["spanner"]
if spanner_config is not None:
logger.debug(f"Spanner Config: {spanner_config}")
else:
raise ValueError('spanner_config is None, use spanner_init() or register() passing spanner_config')

res = res.spanner_init(PyGraphistry._config["spanner"]) # type: ignore[attr-defined]

return res._spannergraph.gql_to_graph(res, query)

def spanner_query_to_df(self: Plottable, query: str) -> pd.DataFrame:
"""

Submit query to google spanner database and return a df of the results

query can be SQL or GQL as long as table of results are returned

query='SELECT * from Account limit 10000'

:param query: query string
:type query: Str

:returns: Pandas DataFrame with the results of query
:rtype: pd.DataFrame

**Example: calling spanner_query_to_df
::

import graphistry

# credentials_file is optional, all others are required
SPANNER_CONF = { "project_id": PROJECT_ID,
"instance_id": INSTANCE_ID,
"database_id": DATABASE_ID,
"credentials_file": CREDENTIALS_FILE }

graphistry.register(..., spanner_config=SPANNER_CONF)

query='SELECT * from Account limit 10000'

df = graphistry.spanner_query_to_df(query)

g.plot()

"""

from .pygraphistry import PyGraphistry

res = copy.copy(self)

if not hasattr(res, '_spannergraph'):
spanner_config = PyGraphistry._config["spanner"]
if spanner_config is not None:
logger.debug(f"Spanner Config: {spanner_config}")
else:
logger.warning('PyGraphistry._config["spanner"] is None')

res = res.spanner_init(PyGraphistry._config["spanner"]) # type: ignore[attr-defined]

return res._spannergraph.query_to_df(query)


def nodexl(self, xls_or_url, source='default', engine=None, verbose=False):

Expand Down
3 changes: 3 additions & 0 deletions graphistry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
bolt,
cypher,
tigergraph,
spanner_gql_to_g,
spanner_query_to_df,
spanner_init,
gsql,
gsql_endpoint,
cosmos,
Expand Down
Loading
Loading