Skip to content

Commit

Permalink
Update README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
aMahanna committed Oct 26, 2023
1 parent 03f3e48 commit dbee0fc
Showing 1 changed file with 80 additions and 50 deletions.
130 changes: 80 additions & 50 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,17 @@ import torch
import pandas
from torch_geometric.datasets import FakeHeteroDataset

from arango import ArangoClient # Python-Arango driver

from arango import ArangoClient
from adbpyg_adapter import ADBPyG_Adapter, ADBPyG_Controller
from adbpyg_adapter.encoders import IdentityEncoder, CategoricalEncoder

# Load some fake PyG data for demo purposes
# Connect to ArangoDB
db = ArangoClient().db()

# Instantiate the adapter
adbpyg_adapter = ADBPyG_Adapter(db)

# Create a PyG Heterogeneous Graph
data = FakeHeteroDataset(
num_node_types=2,
num_edge_types=3,
Expand All @@ -63,43 +68,24 @@ data = FakeHeteroDataset(
edge_dim=2, # number of features per edge
num_classes=3, # number of unique label values
)[0]

# Let's assume that the ArangoDB "IMDB" dataset is imported to this endpoint
db = ArangoClient(hosts="http://localhost:8529").db("_system", username="root", password="")

adbpyg_adapter = ADBPyG_Adapter(db)
```

### PyG to ArangoDB

Note: If the PyG graph contains `_key`, `_v_key`, or `_e_key` properties for any node / edge types, the adapter will assume to persist those values as [ArangoDB document keys](https://www.arangodb.com/docs/stable/data-modeling-naming-conventions-document-keys.html). See the `Full Cycle (ArangoDB -> PyG -> ArangoDB)` section below for an example.

```py
# 1.1: PyG to ArangoDB
adb_g = adbpyg_adapter.pyg_to_arangodb("FakeData", data)

# 1.2: PyG to ArangoDB with a (completely optional) metagraph for customized adapter behaviour
def y_tensor_to_2_column_dataframe(pyg_tensor, adb_df):
"""
A user-defined function to create two
ArangoDB attributes out of the 'user' label tensor
:param dgl_tensor: The DGL Tensor containing the data
:type dgl_tensor: torch.Tensor
:param adb_df: The ArangoDB DataFrame to populate, whose
size is preset to the length of **dgl_tensor**.
:type adb_df: pandas.DataFrame
#############################
# 1.1: without a Metagraph #
#############################

NOTE: user-defined functions must return the modified **adb_df**
"""
label_map = {0: "Kiwi", 1: "Blueberry", 2: "Avocado"}

adb_df["label_num"] = pyg_tensor.tolist()
adb_df["label_str"] = adb_df["label_num"].map(label_map)

return adb_df
adb_g = adbpyg_adapter.pyg_to_arangodb("FakeData", data)

#########################
# 1.2: with a Metagraph #
#########################

# Specifying a Metagraph provides customized adapter behaviour
metagraph = {
"nodeTypes": {
"v0": {
Expand All @@ -117,15 +103,39 @@ metagraph = {
},
}

def y_tensor_to_2_column_dataframe(pyg_tensor: torch.Tensor, adb_df: pandas.DataFrame) -> pandas.DataFrame:
"""A user-defined function to create two
ArangoDB attributes out of the 'user' label tensor
:param pyg_tensor: The PyG Tensor containing the data
:type pyg_tensor: torch.Tensor
:param adb_df: The ArangoDB DataFrame to populate, whose
size is preset to the length of **pyg_tensor**.
:type adb_df: pandas.DataFrame
:return: The populated ArangoDB DataFrame
:rtype: pandas.DataFrame
"""
label_map = {0: "Kiwi", 1: "Blueberry", 2: "Avocado"}

adb_df["label_num"] = pyg_tensor.tolist()
adb_df["label_str"] = adb_df["label_num"].map(label_map)

return adb_df


adb_g = adbpyg_adapter.pyg_to_arangodb("FakeData", data, metagraph, explicit_metagraph=False)

# 1.3: PyG to ArangoDB with the same (optional) metagraph, but with `explicit_metagraph=True`
#######################################################
# 1.3: with a Metagraph and `explicit_metagraph=True` #
#######################################################

# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.
# Only 'v0', 'v1' and ('v0', 'e0', 'v0') will be brought over (i.e 'v2', ('v0', 'e0', 'v1'), ... are ignored)
adb_g = adbpyg_adapter.pyg_to_arangodb("FakeData", data, metagraph, explicit_metagraph=True)

# 1.4: PyG to ArangoDB with a Custom Controller (more user-defined behavior)
########################################
# 1.4: with a custom ADBPyG Controller #
########################################

class Custom_ADBPyG_Controller(ADBPyG_Controller):
def _prepare_pyg_node(self, pyg_node: dict, node_type: str) -> dict:
"""Optionally modify a PyG node object before it gets inserted into its designated ArangoDB collection.
Expand Down Expand Up @@ -158,13 +168,25 @@ adb_g = ADBPyG_Adapter(db, Custom_ADBPyG_Controller()).pyg_to_arangodb("FakeData
db.delete_graph("FakeData", drop_collections=True, ignore_missing=True)
adbpyg_adapter.pyg_to_arangodb("FakeData", data)

# 2.1: ArangoDB to PyG via Graph name (does not transfer attributes)
#######################
# 2.1: via Graph name #
#######################

# Due to risk of ambiguity, this method does not transfer attributes
pyg_g = adbpyg_adapter.arangodb_graph_to_pyg("FakeData")

# 2.2: ArangoDB to PyG via Collection names (does not transfer attributes)
#############################
# 2.2: via Collection names #
#############################

# Due to risk of ambiguity, this method does not transfer attributes
pyg_g = adbpyg_adapter.arangodb_collections_to_pyg("FakeData", v_cols={"v0", "v1"}, e_cols={"e0"})

# 2.3: ArangoDB to PyG via Metagraph v1 (transfer attributes "as is", meaning they are already formatted to PyG data standards)
######################
# 2.3: via Metagraph #
######################

# Transfers attributes "as is", meaning they are already formatted to PyG data standards.
metagraph_v1 = {
"vertexCollections": {
# Move the "x" & "y" ArangoDB attributes to PyG as "x" & "y" Tensors
Expand All @@ -175,9 +197,14 @@ metagraph_v1 = {
"e0": {"edge_attr"},
},
}

pyg_g = adbpyg_adapter.arangodb_to_pyg("FakeData", metagraph_v1)

# 2.4: ArangoDB to PyG via Metagraph v2 (transfer attributes via user-defined encoders)
#################################################
# 2.4: via Metagraph with user-defined encoders #
#################################################

# Transforms attributes via user-defined encoders
# For more info on user-defined encoders in PyG, see https://pytorch-geometric.readthedocs.io/en/latest/notes/load_csv.html
metagraph_v2 = {
"vertexCollections": {
Expand All @@ -199,21 +226,14 @@ metagraph_v2 = {
"Ratings": { "edge_weight": "Rating" } # Use the 'Rating' attribute for the PyG 'edge_weight' property
},
}
pyg_g = adbpyg_adapter.arangodb_to_pyg("IMDB", metagraph_v2)

# 2.5: ArangoDB to PyG via Metagraph v3 (transfer attributes via user-defined functions)
def udf_v0_x(v0_df):
# process v0_df here to return v0 "x" feature matrix
# v0_df["x"] = ...
return torch.tensor(v0_df["x"].to_list())

pyg_g = adbpyg_adapter.arangodb_to_pyg("imdb", metagraph_v2)

def udf_v1_x(v1_df):
# process v1_df here to return v1 "x" feature matrix
# v1_df["x"] = ...
return torch.tensor(v1_df["x"].to_list())

##################################################
# 2.5: via Metagraph with user-defined functions #
##################################################

# Transforms attributes via user-defined functions
metagraph_v3 = {
"vertexCollections": {
"v0": {
Expand All @@ -226,10 +246,20 @@ metagraph_v3 = {
"e0": {"edge_attr": (lambda df: torch.tensor(df["edge_attr"].to_list()))},
},
}

def udf_v0_x(v0_df: pandas.DataFrame) -> torch.Tensor:
# v0_df["x"] = ...
return torch.tensor(v0_df["x"].to_list())


def udf_v1_x(v1_df: pandas.DataFrame) -> torch.Tensor:
# v1_df["x"] = ...
return torch.tensor(v1_df["x"].to_list())

pyg_g = adbpyg_adapter.arangodb_to_pyg("FakeData", metagraph_v3)
```

### Experimental: `preserve_adb_keys`
### Full Cycle (ArangoDB -> PyG -> ArangoDB)
```py
# With `preserve_adb_keys=True`, the adapter will preserve the ArangoDB vertex & edge _key values into the (newly created) PyG graph.
# Users can then re-import their PyG graph into ArangoDB using the same _key values
Expand Down

0 comments on commit dbee0fc

Please sign in to comment.