-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SQL database to graph conversion tool (Db2Graph) (#99)
* Migrate from internal * Change to delta and limit size: - delta change to avoid div by zero error - limit size max 1 billion as too laarge values lead to postgres error * Adding the workflow and pytest for db2graph * For verification testing * Workflow verified branch db2graph removed * Changing the position because affecting build test * Updated file name and setup.cfg; cleanup dup code, typos * Small changes after previous commit Renamed the file to marius_db2graph to align with commands like marius_preprocess; Created two new functions for get_fetch_size to avoid duplicate code; Added the marius_db2graph command to setup.cfg (but haven't tested it because I'm not sure if pip installing it right now would work); Added 'my-sql' as an option to use mysql-connector because this wasn't added previously; * Changing installation via marius path * Updating testing setup * hydra-core only 1.1.2 version working for tests * Updated the documentation with marius_db2graph * Updated the code to you only omegaconf * Resolving review comments * pushed repetative code to a function * hydra-core added right now till other PR merges * Removed edges_entity_feature_values * Removed generate_uuid and related parts * Resolving other review comments * Workflow naming restrictions * wrong file name * Updated the workflow using matrix * Correcting a typo * Matched naming in code and documentation Renamed edge_entity_entity_queries to edge_queries, edge_entity_entity_queries_list to edge_queries_list, and edge_entity_entity_rel_list to edge_rel_list * Sample dockerfile for the Sakila dataset * Added end-to-end example; Modified basicConfig usage Added dockerfile, run.sh, sakila.yaml Modified basicConfig in marius_db2graph.py to avoid Python version issue * Updated script to correctly set password; Fixed typo * Updated validation for better info & moved files * Moved file to proper folder * Moved by mistake * Updated doc to reflect parsing and logging changes; Changed dockerfile to install from marius main * Fix doc typos * apply autoformatter * add optional db2graph dependency * fix linter issues * include test dependencies in github actions * update github actions Co-authored-by: mohilp1998 <[email protected]> Co-authored-by: Roger Waleffe <[email protected]> Co-authored-by: Jason Mohoney <[email protected]>
- Loading branch information
1 parent
24b396e
commit e1c0126
Showing
8 changed files
with
1,387 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
name: Testing DB2GRAPH using postgres | ||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
|
||
db2graph: | ||
runs-on: ubuntu-latest | ||
container: ${{ matrix.python_container }} | ||
strategy: | ||
matrix: | ||
python_container: ["python:3.6", "python:3.7", "python:3.8", "python:3.9", "python:3.10"] | ||
|
||
services: | ||
postgres: | ||
# Docker Hub image | ||
image: postgres | ||
# Provide the password for postgres | ||
env: | ||
POSTGRES_PASSWORD: postgres | ||
# Set health checks to wait until postgres has started | ||
options: >- | ||
--health-cmd pg_isready | ||
--health-interval 10s | ||
--health-timeout 5s | ||
--health-retries 5 | ||
steps: | ||
# Downloads a copy of the code in your repository before running CI tests | ||
- name: Check out repository code | ||
uses: actions/checkout@v3 | ||
|
||
- name: Installing dependencies | ||
run: MARIUS_NO_BINDINGS=1 python3 -m pip install .[db2graph,tests] | ||
|
||
- name: Running pytest | ||
run: MARIUS_NO_BINDINGS=1 pytest -s test/db2graph/test_postgres.py | ||
# Environment variables used in the test | ||
env: | ||
# The hostname used to communicate with the PostgreSQL service container | ||
POSTGRES_HOST: postgres | ||
# The default PostgreSQL port - using default port | ||
POSTGRES_PORT: 5432 |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
model: | ||
learning_task: LINK_PREDICTION # set the learning task to link prediction | ||
encoder: | ||
layers: | ||
- - type: EMBEDDING # set the encoder to be an embedding table with 50-dimensional embeddings | ||
output_dim: 50 | ||
decoder: | ||
type: DISTMULT # set the decoder to DistMult | ||
options: | ||
input_dim: 50 | ||
loss: | ||
type: SOFTMAX_CE | ||
options: | ||
reduction: SUM | ||
dense_optimizer: # optimizer to use for dense model parameters. In this case these are the DistMult relation (edge-type) embeddings | ||
type: ADAM | ||
options: | ||
learning_rate: 0.1 | ||
sparse_optimizer: # optimizer to use for node embedding table | ||
type: ADAGRAD | ||
options: | ||
learning_rate: 0.1 | ||
storage: | ||
device_type: cuda | ||
dataset: | ||
dataset_dir: /marius/datasets/sakila/ | ||
edges: | ||
type: DEVICE_MEMORY | ||
embeddings: | ||
type: DEVICE_MEMORY | ||
save_model: true | ||
training: | ||
batch_size: 1000 | ||
negative_sampling: | ||
num_chunks: 10 | ||
negatives_per_positive: 500 | ||
degree_fraction: 0.0 | ||
filtered: false | ||
num_epochs: 10 | ||
pipeline: | ||
sync: true | ||
epochs_per_shuffle: 1 | ||
evaluation: | ||
batch_size: 1000 | ||
negative_sampling: | ||
filtered: true | ||
pipeline: | ||
sync: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# setup for Marius | ||
FROM nvidia/cuda:11.4.2-cudnn8-devel-ubuntu20.04 | ||
|
||
ENV TZ=US | ||
|
||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone | ||
|
||
RUN apt update | ||
|
||
RUN apt install -y g++ \ | ||
make \ | ||
wget \ | ||
unzip \ | ||
vim \ | ||
git \ | ||
python3-pip \ | ||
build-essential \ | ||
python-dev \ | ||
libpq-dev | ||
|
||
# install cmake 3.20 | ||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.20.0/cmake-3.20.0-linux-x86_64.sh \ | ||
&& mkdir /opt/cmake \ | ||
&& sh cmake-3.20.0-linux-x86_64.sh --skip-license --prefix=/opt/cmake/ \ | ||
&& ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake | ||
|
||
# install pytorch | ||
RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 && pip3 install docutils==0.17 | ||
|
||
# install Marius | ||
RUN git clone https://github.com/marius-team/marius.git && cd marius && pip3 install . | ||
|
||
# install debconf-set-selections & systemctl | ||
RUN apt-get install debconf | ||
|
||
RUN apt-get install systemctl | ||
|
||
# install mysql-8 | ||
RUN echo "mysql-community-server mysql-community-server/root-pass password password" | debconf-set-selections | ||
|
||
RUN echo "mysql-community-server mysql-community-server/re-root-pass password password" | debconf-set-selections | ||
|
||
RUN DEBIAN_FRONTEND=noninteractive apt-get -y install mysql-server | ||
|
||
# Adding a run.sh script to initialize things | ||
COPY run.sh /usr/local/bin/run.sh | ||
|
||
RUN chmod +x usr/local/bin/run.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/sh | ||
systemctl start mysql | ||
mkdir /db2graph_eg | ||
wget -O /db2graph_eg/sakila-db.tar.gz https://downloads.mysql.com/docs/sakila-db.tar.gz | ||
tar -xf /db2graph_eg/sakila-db.tar.gz -C /db2graph_eg/ | ||
mysql -u root -p=password < /db2graph_eg/sakila-db/sakila-schema.sql | ||
mysql -u root -p=password < /db2graph_eg/sakila-db/sakila-data.sql | ||
## For creating a new user for accessing the data | ||
mysql -u root -p=password mysql -e "CREATE USER 'sakila_user'@'localhost' IDENTIFIED BY 'sakila_password';" | ||
mysql -u root -p=password mysql -e "GRANT ALL PRIVILEGES ON *.* TO 'sakila_user'@'localhost';" | ||
mysql -u root -p=password mysql -e "FLUSH PRIVILEGES;" | ||
service mysql restart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.