diff --git a/README.md b/README.md index 9c702588c..0a5e30883 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,12 @@ ## Updates +**Nov 2, 2020** + * This branch of the project is compatible with DGL 0.4.2. + * Added [ZINC-full](./data/script_download_molecules.sh) dataset (249K molecular graphs) with [scripts](./scripts/ZINC-full/). + + + **Jun 11, 2020** * Second release of the project. Major updates : + Added experimental pipeline for Weisfeiler-Lehman-GNNs operating on dense rank-2 tensors. diff --git a/data/SBMs/generate_SBM_CLUSTER.ipynb b/data/SBMs/generate_SBM_CLUSTER.ipynb index 67d81cc3d..77f4e0c14 100644 --- a/data/SBMs/generate_SBM_CLUSTER.ipynb +++ b/data/SBMs/generate_SBM_CLUSTER.ipynb @@ -208,9 +208,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -392,199 +390,6 @@ "print('Time (sec):',time.time() - start) # 190s\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Convert to DGL format and save with pickle" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/xbresson/Documents/Dropbox/06_NTU_2017_now/03_my_codes/34_benchmark20/GITHUB_benchmark_project/benchmarking-gnn\n" - ] - } - ], - "source": [ - "import os\n", - "os.chdir('../../') # go to root folder of the project\n", - "print(os.getcwd())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import pickle\n", - "\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from data.SBMs import SBMsDatasetDGL \n", - "\n", - "from data.data import LoadData\n", - "from torch.utils.data import DataLoader\n", - "from data.SBMs import SBMsDataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading data ...\n", - "preparing 10000 graphs for the TRAIN set...\n", - "preparing 1000 graphs for the TEST set...\n", - "preparing 1000 graphs for the VAL set...\n", - "[I] Finished loading.\n", - "[I] Data load time: 3983.7924s\n", - "Time (sec): 3983.794214248657\n" - ] - } - ], - "source": [ - "DATASET_NAME = 'SBM_CLUSTER'\n", - "dataset = SBMsDatasetDGL(DATASET_NAME) #3983s\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10000\n", - "1000\n", - "1000\n", - "(DGLGraph(num_nodes=117, num_edges=4104,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 3, 3, 0, 4, 3, 0, 2, 0, 0, 0, 2, 2, 0, 1, 5, 3, 0, 2, 4, 2, 3, 2, 4,\n", - " 3, 1, 3, 5, 2, 3, 0, 0, 3, 5, 2, 5, 3, 2, 0, 3, 0, 3, 3, 3, 0, 3, 2, 0,\n", - " 3, 5, 2, 4, 1, 1, 3, 4, 4, 3, 3, 3, 0, 5, 2, 4, 3, 0, 0, 4, 3, 0, 0, 1,\n", - " 4, 2, 3, 2, 0, 0, 0, 4, 2, 2, 3, 3, 3, 0, 0, 2, 2, 5, 4, 0, 2, 5, 4, 0,\n", - " 0, 2, 0, 0, 0, 3, 3, 2, 2, 1, 2, 0, 0, 0, 5, 3, 1, 4, 3, 3, 5],\n", - " dtype=torch.int16))\n", - "(DGLGraph(num_nodes=90, num_edges=2396,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([1, 0, 0, 4, 4, 0, 5, 3, 4, 0, 3, 1, 0, 5, 5, 5, 1, 3, 3, 4, 1, 2, 5, 4,\n", - " 5, 5, 2, 0, 5, 3, 2, 5, 5, 5, 5, 0, 3, 3, 0, 2, 3, 3, 3, 3, 5, 3, 1, 1,\n", - " 5, 2, 5, 1, 1, 4, 5, 2, 0, 4, 4, 0, 3, 4, 0, 0, 2, 3, 5, 3, 3, 4, 0, 5,\n", - " 1, 0, 0, 0, 0, 2, 4, 0, 5, 0, 3, 0, 5, 3, 4, 3, 0, 5],\n", - " dtype=torch.int16))\n", - "(DGLGraph(num_nodes=134, num_edges=5570,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([2, 5, 4, 4, 4, 5, 2, 1, 5, 0, 0, 1, 5, 5, 4, 2, 5, 5, 0, 0, 3, 0, 1, 2,\n", - " 2, 5, 0, 2, 0, 5, 1, 5, 5, 1, 0, 0, 5, 2, 2, 5, 5, 1, 4, 0, 0, 5, 1, 0,\n", - " 3, 0, 5, 1, 5, 4, 0, 4, 5, 1, 5, 4, 4, 0, 2, 5, 2, 5, 0, 1, 0, 1, 2, 0,\n", - " 2, 2, 0, 3, 2, 4, 0, 5, 2, 0, 2, 2, 5, 4, 2, 0, 4, 0, 0, 5, 1, 0, 5, 3,\n", - " 2, 3, 5, 0, 1, 5, 2, 0, 1, 4, 0, 3, 2, 1, 0, 2, 1, 4, 2, 5, 2, 0, 5, 2,\n", - " 5, 5, 0, 1, 5, 4, 2, 2, 2, 0, 1, 0, 2, 1], dtype=torch.int16))\n" - ] - } - ], - "source": [ - "print(len(dataset.train))\n", - "print(len(dataset.val))\n", - "print(len(dataset.test))\n", - "\n", - "print(dataset.train[0])\n", - "print(dataset.val[0])\n", - "print(dataset.test[0])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time (sec): 15.637878656387329\n" - ] - } - ], - "source": [ - "start = time.time()\n", - "\n", - "with open('data/SBMs/SBM_CLUSTER.pkl','wb') as f:\n", - " pickle.dump([dataset.train,dataset.val,dataset.test],f)\n", - " \n", - "print('Time (sec):',time.time() - start)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test load function" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading dataset SBM_CLUSTER...\n", - "train, test, val sizes : 10000 1000 1000\n", - "[I] Finished loading.\n", - "[I] Data load time: 29.6175s\n" - ] - } - ], - "source": [ - "DATASET_NAME = 'SBM_CLUSTER'\n", - "dataset = LoadData(DATASET_NAME) # 29s\n", - "trainset, valset, testset = dataset.train, dataset.val, dataset.test\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Time (sec): 0.002402067184448242\n" - ] - } - ], - "source": [ - "start = time.time()\n", - "\n", - "batch_size = 10\n", - "collate = SBMsDataset.collate\n", - "print(SBMsDataset)\n", - "train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)\n", - "\n", - "print('Time (sec):',time.time() - start) #0.002s\n" - ] - }, { "cell_type": "code", "execution_count": null, @@ -620,5 +425,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/data/SBMs/generate_SBM_PATTERN.ipynb b/data/SBMs/generate_SBM_PATTERN.ipynb index ff7b137ef..60000e321 100644 --- a/data/SBMs/generate_SBM_PATTERN.ipynb +++ b/data/SBMs/generate_SBM_PATTERN.ipynb @@ -266,9 +266,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -546,194 +544,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Convert to DGL format and save with pickle" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/xbresson/Documents/Dropbox/06_NTU_2017_now/03_my_codes/34_benchmark20/14_GITHUB_benchmark_project_apr20/benchmarking-gnns-dev_NEW_PATTERN\n" - ] - } - ], - "source": [ - "import os\n", - "os.chdir('../../') # go to root folder of the project\n", - "print(os.getcwd())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import pickle\n", - "\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from data.SBMs import SBMsDatasetDGL \n", - "\n", - "from data.data import LoadData\n", - "from torch.utils.data import DataLoader\n", - "from data.SBMs import SBMsDataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading data ...\n", - "preparing 10000 graphs for the TRAIN set...\n", - "preparing 2000 graphs for the TEST set...\n", - "preparing 2000 graphs for the VAL set...\n", - "[I] Finished loading.\n", - "[I] Data load time: 6211.2342s\n" - ] - } - ], - "source": [ - "DATASET_NAME = 'SBM_PATTERN'\n", - "dataset = SBMsDatasetDGL(DATASET_NAME) # 6211s\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10000\n", - "2000\n", - "2000\n", - "(DGLGraph(num_nodes=108, num_edges=4884,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", - " 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,\n", - " 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0], dtype=torch.int16))\n", - "(DGLGraph(num_nodes=108, num_edges=4738,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", - " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,\n", - " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n", - " 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1], dtype=torch.int16))\n", - "(DGLGraph(num_nodes=94, num_edges=3772,\n", - " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", - " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,\n", - " 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", - " 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n", - " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],\n", - " dtype=torch.int16))\n" - ] - } - ], - "source": [ - "print(len(dataset.train))\n", - "print(len(dataset.val))\n", - "print(len(dataset.test))\n", - "\n", - "print(dataset.train[0])\n", - "print(dataset.val[0])\n", - "print(dataset.test[0])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time (sec): 24.47579312324524\n" - ] - } - ], - "source": [ - "start = time.time()\n", - "\n", - "with open('data/SBMs/SBM_PATTERN.pkl','wb') as f:\n", - " pickle.dump([dataset.train,dataset.val,dataset.test],f)\n", - " \n", - "print('Time (sec):',time.time() - start) # 21s\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test load function" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[I] Loading dataset SBM_PATTERN...\n", - "train, test, val sizes : 10000 2000 2000\n", - "[I] Finished loading.\n", - "[I] Data load time: 47.9751s\n" - ] - } - ], - "source": [ - "DATASET_NAME = 'SBM_PATTERN'\n", - "dataset = LoadData(DATASET_NAME) # 30s\n", - "trainset, valset, testset = dataset.train, dataset.val, dataset.test\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time (sec): 0.0036211013793945312\n" - ] - } - ], - "source": [ - "start = time.time()\n", - "\n", - "batch_size = 10\n", - "collate = SBMsDataset.collate\n", - "train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)\n", - "\n", - "print('Time (sec):',time.time() - start) #0.0006" - ] - }, { "cell_type": "code", "execution_count": null, @@ -769,5 +579,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/data/SBMs/prepare_SBM_CLUSTER.ipynb b/data/SBMs/prepare_SBM_CLUSTER.ipynb new file mode 100644 index 000000000..7b55bd366 --- /dev/null +++ b/data/SBMs/prepare_SBM_CLUSTER.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook for preparing and saving SBM_CLUSTER graphs in DGL form" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import pickle\n", + "import time\n", + "import os\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download SBM_CLUSTER dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.isfile('SBM_CLUSTER.zip'):\n", + " print('downloading..')\n", + " !curl https://www.dropbox.com/s/e67bisl7zpqnioq/SBM_CLUSTER.zip?dl=1 -o SBM_CLUSTER.zip -J -L -k\n", + " !unzip SBM_CLUSTER.zip -d ./\n", + "else:\n", + " print('File already downloaded')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert to DGL format and save with pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/vijay/graphdeeplearning/benchmarking-gnns\n" + ] + } + ], + "source": [ + "import os\n", + "os.chdir('../../') # go to root folder of the project\n", + "print(os.getcwd())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The OGB package is out of date. Your version is 1.2.2, while the latest version is 1.2.3.\n" + ] + } + ], + "source": [ + "import pickle\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from data.SBMs import SBMsDatasetDGL \n", + "\n", + "from data.data import LoadData\n", + "from torch.utils.data import DataLoader\n", + "from data.SBMs import SBMsDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class DotDict(dict):\n", + " def __init__(self, **kwds):\n", + " self.update(kwds)\n", + " self.__dict__ = self" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "DATASET_NAME = 'SBM_CLUSTER'\n", + "dataset = SBMsDatasetDGL(DATASET_NAME) \n", + "\n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000\n", + "1000\n", + "1000\n", + "(DGLGraph(num_nodes=117, num_edges=4104,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 3, 3, 0, 4, 3, 0, 2, 0, 0, 0, 2, 2, 0, 1, 5, 3, 0, 2, 4, 2, 3, 2, 4,\n", + " 3, 1, 3, 5, 2, 3, 0, 0, 3, 5, 2, 5, 3, 2, 0, 3, 0, 3, 3, 3, 0, 3, 2, 0,\n", + " 3, 5, 2, 4, 1, 1, 3, 4, 4, 3, 3, 3, 0, 5, 2, 4, 3, 0, 0, 4, 3, 0, 0, 1,\n", + " 4, 2, 3, 2, 0, 0, 0, 4, 2, 2, 3, 3, 3, 0, 0, 2, 2, 5, 4, 0, 2, 5, 4, 0,\n", + " 0, 2, 0, 0, 0, 3, 3, 2, 2, 1, 2, 0, 0, 0, 5, 3, 1, 4, 3, 3, 5],\n", + " dtype=torch.int16))\n", + "(DGLGraph(num_nodes=90, num_edges=2396,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([1, 0, 0, 4, 4, 0, 5, 3, 4, 0, 3, 1, 0, 5, 5, 5, 1, 3, 3, 4, 1, 2, 5, 4,\n", + " 5, 5, 2, 0, 5, 3, 2, 5, 5, 5, 5, 0, 3, 3, 0, 2, 3, 3, 3, 3, 5, 3, 1, 1,\n", + " 5, 2, 5, 1, 1, 4, 5, 2, 0, 4, 4, 0, 3, 4, 0, 0, 2, 3, 5, 3, 3, 4, 0, 5,\n", + " 1, 0, 0, 0, 0, 2, 4, 0, 5, 0, 3, 0, 5, 3, 4, 3, 0, 5],\n", + " dtype=torch.int16))\n", + "(DGLGraph(num_nodes=134, num_edges=5570,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([2, 5, 4, 4, 4, 5, 2, 1, 5, 0, 0, 1, 5, 5, 4, 2, 5, 5, 0, 0, 3, 0, 1, 2,\n", + " 2, 5, 0, 2, 0, 5, 1, 5, 5, 1, 0, 0, 5, 2, 2, 5, 5, 1, 4, 0, 0, 5, 1, 0,\n", + " 3, 0, 5, 1, 5, 4, 0, 4, 5, 1, 5, 4, 4, 0, 2, 5, 2, 5, 0, 1, 0, 1, 2, 0,\n", + " 2, 2, 0, 3, 2, 4, 0, 5, 2, 0, 2, 2, 5, 4, 2, 0, 4, 0, 0, 5, 1, 0, 5, 3,\n", + " 2, 3, 5, 0, 1, 5, 2, 0, 1, 4, 0, 3, 2, 1, 0, 2, 1, 4, 2, 5, 2, 0, 5, 2,\n", + " 5, 5, 0, 1, 5, 4, 2, 2, 2, 0, 1, 0, 2, 1], dtype=torch.int16))\n" + ] + } + ], + "source": [ + "print(len(dataset.train))\n", + "print(len(dataset.val))\n", + "print(len(dataset.test))\n", + "\n", + "print(dataset.train[0])\n", + "print(dataset.val[0])\n", + "print(dataset.test[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "with open('data/SBMs/SBM_CLUSTER.pkl','wb') as f:\n", + " pickle.dump([dataset.train,dataset.val,dataset.test],f)\n", + " \n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Load function" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[I] Loading dataset SBM_CLUSTER...\n", + "train, test, val sizes : 10000 1000 1000\n", + "[I] Finished loading.\n", + "[I] Data load time: 15.4165s\n" + ] + } + ], + "source": [ + "DATASET_NAME = 'SBM_CLUSTER'\n", + "dataset = LoadData(DATASET_NAME) \n", + "trainset, valset, testset = dataset.train, dataset.val, dataset.test" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time (sec): 0.00022411346435546875\n" + ] + } + ], + "source": [ + "start = time.time()\n", + "\n", + "batch_size = 10\n", + "collate = SBMsDataset.collate\n", + "train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)\n", + "\n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/data/SBMs/prepare_SBM_PATTERN.ipynb b/data/SBMs/prepare_SBM_PATTERN.ipynb new file mode 100644 index 000000000..4898a91af --- /dev/null +++ b/data/SBMs/prepare_SBM_PATTERN.ipynb @@ -0,0 +1,259 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook for preparing and saving SBM_PATTERN graphs in DGL form" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import pickle\n", + "import time\n", + "import os\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download SBM_PATTERN dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.isfile('SBM_PATTERN.zip'):\n", + " print('downloading..')\n", + " !curl https://www.dropbox.com/s/qvu0r11tjyt6jyb/SBM_PATTERN.zip?dl=1 -o SBM_PATTERN.zip -J -L -k\n", + " !unzip SBM_PATTERN.zip -d ./\n", + " !rm -r __MACOSX/\n", + "else:\n", + " print('File already downloaded')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert to DGL format and save with pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/vijay/graphdeeplearning/benchmarking-gnns\n" + ] + } + ], + "source": [ + "import os\n", + "os.chdir('../../') # go to root folder of the project\n", + "print(os.getcwd())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The OGB package is out of date. Your version is 1.2.2, while the latest version is 1.2.3.\n" + ] + } + ], + "source": [ + "import pickle\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from data.SBMs import SBMsDatasetDGL \n", + "\n", + "from data.data import LoadData\n", + "from torch.utils.data import DataLoader\n", + "from data.SBMs import SBMsDataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class DotDict(dict):\n", + " def __init__(self, **kwds):\n", + " self.update(kwds)\n", + " self.__dict__ = self" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "DATASET_NAME = 'SBM_PATTERN'\n", + "dataset = SBMsDatasetDGL(DATASET_NAME) \n", + "\n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000\n", + "2000\n", + "2000\n", + "(DGLGraph(num_nodes=108, num_edges=4884,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0], dtype=torch.int16))\n", + "(DGLGraph(num_nodes=108, num_edges=4738,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,\n", + " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n", + " 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1], dtype=torch.int16))\n", + "(DGLGraph(num_nodes=94, num_edges=3772,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), tensor([0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,\n", + " 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],\n", + " dtype=torch.int16))\n" + ] + } + ], + "source": [ + "print(len(dataset.train))\n", + "print(len(dataset.val))\n", + "print(len(dataset.test))\n", + "\n", + "print(dataset.train[0])\n", + "print(dataset.val[0])\n", + "print(dataset.test[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "\n", + "with open('data/SBMs/SBM_PATTERN.pkl','wb') as f:\n", + " pickle.dump([dataset.train,dataset.val,dataset.test],f)\n", + " \n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[I] Loading dataset SBM_PATTERN...\n", + "train, test, val sizes : 10000 2000 2000\n", + "[I] Finished loading.\n", + "[I] Data load time: 22.8220s\n" + ] + } + ], + "source": [ + "DATASET_NAME = 'SBM_PATTERN'\n", + "dataset = LoadData(DATASET_NAME) \n", + "trainset, valset, testset = dataset.train, dataset.val, dataset.test" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time (sec): 0.8683583736419678\n" + ] + } + ], + "source": [ + "start = time.time()\n", + "\n", + "batch_size = 10\n", + "collate = SBMsDataset.collate\n", + "train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)\n", + "\n", + "print('Time (sec):',time.time() - start) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/data/data.py b/data/data.py index 125d33704..215274d5f 100644 --- a/data/data.py +++ b/data/data.py @@ -21,7 +21,7 @@ def LoadData(DATASET_NAME): return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset - if DATASET_NAME == 'ZINC': + if DATASET_NAME == 'ZINC' or DATASET_NAME == 'ZINC-full': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets diff --git a/data/molecules.py b/data/molecules.py index 5bcc90d9f..d9fd2f62c 100644 --- a/data/molecules.py +++ b/data/molecules.py @@ -20,7 +20,7 @@ class MoleculeDGL(torch.utils.data.Dataset): - def __init__(self, data_dir, split, num_graphs): + def __init__(self, data_dir, split, num_graphs=None): self.data_dir = data_dir self.split = split self.num_graphs = num_graphs @@ -28,12 +28,13 @@ def __init__(self, data_dir, split, num_graphs): with open(data_dir + "/%s.pickle" % self.split,"rb") as f: self.data = pickle.load(f) - # loading the sampled indices from file ./zinc_molecules/.index - with open(data_dir + "/%s.index" % self.split,"r") as f: - data_idx = [list(map(int, idx)) for idx in csv.reader(f)] - self.data = [ self.data[i] for i in data_idx[0] ] - - assert len(self.data)==num_graphs, "Sample num_graphs again; available idx: train/val/test => 10k/1k/1k" + if self.num_graphs in [10000, 1000]: + # loading the sampled indices from file ./zinc_molecules/.index + with open(data_dir + "/%s.index" % self.split,"r") as f: + data_idx = [list(map(int, idx)) for idx in csv.reader(f)] + self.data = [ self.data[i] for i in data_idx[0] ] + + assert len(self.data)==num_graphs, "Sample num_graphs again; available idx: train/val/test => 10k/1k/1k" """ data is a list of Molecule dict objects with following attributes @@ -103,10 +104,15 @@ def __init__(self, name='Zinc'): self.num_bond_type = 4 # known meta-info about the zinc dataset; can be calculated as well data_dir='./data/molecules' - - self.train = MoleculeDGL(data_dir, 'train', num_graphs=10000) - self.val = MoleculeDGL(data_dir, 'val', num_graphs=1000) - self.test = MoleculeDGL(data_dir, 'test', num_graphs=1000) + if self.name == 'ZINC-full': + data_dir='./data/molecules/zinc_full' + self.train = MoleculeDGL(data_dir, 'train', num_graphs=220011) + self.val = MoleculeDGL(data_dir, 'val', num_graphs=24445) + self.test = MoleculeDGL(data_dir, 'test', num_graphs=5000) + else: + self.train = MoleculeDGL(data_dir, 'train', num_graphs=10000) + self.val = MoleculeDGL(data_dir, 'val', num_graphs=1000) + self.test = MoleculeDGL(data_dir, 'test', num_graphs=1000) print("Time taken: {:.4f}s".format(time.time()-t0)) diff --git a/data/molecules/prepare_molecules_ZINC_full.ipynb b/data/molecules/prepare_molecules_ZINC_full.ipynb new file mode 100644 index 000000000..a47a1f2b3 --- /dev/null +++ b/data/molecules/prepare_molecules_ZINC_full.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook for preparing and saving MOLECULAR (ZINC full) graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import pickle\n", + "import time\n", + "import os\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download ZINC full dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "downloading..\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", + "100 43.6M 100 43.6M 0 0 7898k 0 0:00:05 0:00:05 --:--:-- 10.3M\n", + "Archive: molecules_zinc_full.zip\n", + " creating: ../molecules/zinc_full/\n", + " inflating: ../__MACOSX/molecules/._zinc_full \n", + " inflating: ../molecules/zinc_full/test.pickle \n", + " inflating: ../__MACOSX/molecules/zinc_full/._test.pickle \n", + " inflating: ../molecules/zinc_full/.DS_Store \n", + " inflating: ../__MACOSX/molecules/zinc_full/._.DS_Store \n", + " inflating: ../molecules/zinc_full/val.pickle \n", + " inflating: ../__MACOSX/molecules/zinc_full/._val.pickle \n", + " inflating: ../molecules/zinc_full/atom_dict.pickle \n", + " inflating: ../__MACOSX/molecules/zinc_full/._atom_dict.pickle \n", + " inflating: ../molecules/zinc_full/bond_dict.pickle \n", + " inflating: ../__MACOSX/molecules/zinc_full/._bond_dict.pickle \n", + " inflating: ../molecules/zinc_full/train.pickle \n", + " inflating: ../__MACOSX/molecules/zinc_full/._train.pickle \n" + ] + } + ], + "source": [ + "if not os.path.isfile('molecules_zinc_full.zip'):\n", + " print('downloading..')\n", + " !curl https://www.dropbox.com/s/grhitgnuuixoxwl/molecules_zinc_full.zip?dl=1 -o molecules_zinc_full.zip -J -L -k\n", + " !unzip molecules_zinc_full.zip -d ../\n", + " !rm -r ../__MACOSX/\n", + " # !tar -xvf molecules_zinc_full.zip -C ../\n", + "else:\n", + " print('File already downloaded')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert to DGL format and save with pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/vijay/graphdeeplearning/benchmarking-gnns\n" + ] + } + ], + "source": [ + "import os\n", + "os.chdir('../../') # go to root folder of the project\n", + "print(os.getcwd())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The OGB package is out of date. Your version is 1.2.2, while the latest version is 1.2.3.\n" + ] + } + ], + "source": [ + "import pickle\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from data.molecules import MoleculeDatasetDGL \n", + "\n", + "from data.data import LoadData\n", + "from torch.utils.data import DataLoader\n", + "from data.molecules import MoleculeDataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "preparing 220011 graphs for the TRAIN set...\n", + "preparing 24445 graphs for the VAL set...\n", + "preparing 5000 graphs for the TEST set...\n", + "Time taken: 1075.5591s\n" + ] + } + ], + "source": [ + "DATASET_NAME = 'ZINC-full'\n", + "dataset = MoleculeDatasetDGL(DATASET_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min/max : 6 38\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAARaElEQVR4nO3df6xfdX3H8efL8kPjjxWkEGyZF2fjRLOhqcCicwQMP82KCRhwm5WQVBPMMFsyi1uCv5qUxYmaKIYNtBqxdKCDiIk2/JhzGWD54Q/oCAWqVBitKyCESQK898f3U3Ip92fv5d775fN8JDffcz7nc873fU7o63v4nHO+31QVkqQ+vGy+C5AkzR1DX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+NEVJjk2yfb7rkGbC0JfmUJKRJJVkn/muRX0y9CWpI4a+upNkTZIr92j7YpIvJTk7yZYkjye5L8mHJ9jOx5P8uvW9O8nxrf1l7T3uTfK/STYmObCt9qP2+miSJ5L8yYuzl9LYDH316NvAKUleA5BkEfB+4HJgB/Be4DXA2cBFSd6+5waSvAn4KPCOqno1cCKwrS3+a+A04M+A1wGPAF9uy97dXhdX1auq6r9mfe+kCRj66k5V/RK4jUEwAxwHPFlVN1XVtVV1bw38O/BD4E/H2MwzwP7AEUn2raptVXVvW/Zh4O+rantVPQV8EjjdcXwtBIa+enU5cFab/kCbJ8nJSW5KsivJo8ApwEF7rlxVW4GPMQj0HUk2JHldW/x64LtJHm3b2MLgQ+KQF3OHpKkw9NWrfwWOTbIMeB9weZL9gauAzwGHVNVi4PtAxtpAVV1eVe9iEPIFXNgWPQCcXFWLR/29vKp+3fpJ88bQV5eqaidwI/A14P6q2gLsx2DIZifwdJKTgRPGWj/Jm5Ic1z4ofgf8H4OzeYCvAmuTvL71XZJkZVu2E3gWeMOLsmPSJAx99exy4D3tlap6nMFF2I0MLr5+ALhmnHX3B9YBvwH+BzgY+ERb9sW23g+TPA7cBBzd3uNJYC3wn23455jZ3y1pfPFHVCSpH57pS1JHDH1J6oihL0kdMfQlqSML+gnBgw46qEZGRua7DEkaKrfeeutvqmrJWMsWdOiPjIywefPm+S5DkoZKkl+Ot8zhHUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6siCfiJXeqkaWXPtXq+7bd2ps1iJeuOZviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVkyqGfZFGS25N8r80fnuTmJPckuSLJfq19/za/tS0fGbWN81v73UlOnO2dkSRNbDpn+ucBW0bNXwhcVFXLgUeAc1r7OcAjVfVG4KLWjyRHAGcCbwFOAr6SZNHMypckTceUQj/JMuBU4F/afIDjgCtbl/XAaW16ZZunLT++9V8JbKiqp6rqfmArcNRs7IQkaWqmeqb/BeDvgGfb/GuBR6vq6Ta/HVjappcCDwC05Y+1/s+1j7GOJGkOTBr6Sd4L7KiqW0c3j9G1Jlk20Tqj3291ks1JNu/cuXOy8iRJ0zCVM/13An+eZBuwgcGwzheAxUn2aX2WAQ+26e3AYQBt+e8Bu0a3j7HOc6rqkqpaUVUrlixZMu0dkiSNb9LQr6rzq2pZVY0wuBB7fVX9BXADcHrrtgq4uk1f0+Zpy6+vqmrtZ7a7ew4HlgO3zNqeSJImtc/kXcb1cWBDks8CtwOXtvZLgW8m2crgDP9MgKq6M8lG4C7gaeDcqnpmBu8vSZqmaYV+Vd0I3Nim72OMu2+q6nfAGeOsvxZYO90ipYVoZM21812CNG0+kStJHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI/vMdwHSTIysuXZG629bd+osVSINB8/0Jakjhr4kdcTQl6SOTBr6SV6e5JYkP01yZ5JPtfbDk9yc5J4kVyTZr7Xv3+a3tuUjo7Z1fmu/O8mJL9ZOSZLGNpUz/aeA46rqj4EjgZOSHANcCFxUVcuBR4BzWv9zgEeq6o3ARa0fSY4AzgTeApwEfCXJotncGUnSxCYN/Rp4os3u2/4KOA64srWvB05r0yvbPG358UnS2jdU1VNVdT+wFThqVvZCkjQlUxrTT7IoyR3ADmATcC/waFU93bpsB5a26aXAAwBt+WPAa0e3j7HO6PdanWRzks07d+6c/h5JksY1pdCvqmeq6khgGYOz8zeP1a29Zpxl47Xv+V6XVNWKqlqxZMmSqZQnSZqiad29U1WPAjcCxwCLk+x+uGsZ8GCb3g4cBtCW/x6wa3T7GOtIkubAVO7eWZJkcZt+BfAeYAtwA3B667YKuLpNX9Pmacuvr6pq7We2u3sOB5YDt8zWjkiSJjeVr2E4FFjf7rR5GbCxqr6X5C5gQ5LPArcDl7b+lwLfTLKVwRn+mQBVdWeSjcBdwNPAuVX1zOzujiRpIpOGflX9DHjbGO33McbdN1X1O+CMcba1Flg7/TIlSbPBJ3IlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI1P5uURJLxEja66d0frb1p06S5VovnimL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHVk0tBPcliSG5JsSXJnkvNa+4FJNiW5p70e0NqT5EtJtib5WZK3j9rWqtb/niSrXrzdkiSNZSpn+k8Df1tVbwaOAc5NcgSwBriuqpYD17V5gJOB5e1vNXAxDD4kgAuAo4GjgAt2f1BIkubGpKFfVQ9V1W1t+nFgC7AUWAmsb93WA6e16ZXAN2rgJmBxkkOBE4FNVbWrqh4BNgEnzereSJImNK0x/SQjwNuAm4FDquohGHwwAAe3bkuBB0attr21jde+53usTrI5yeadO3dOpzxJ0iT2mWrHJK8CrgI+VlW/TTJu1zHaaoL25zdUXQJcArBixYoXLJdm08iaa+e7BGlOTelMP8m+DAL/W1X1ndb8cBu2ob3uaO3bgcNGrb4MeHCCdknSHJnK3TsBLgW2VNXnRy26Bth9B84q4OpR7R9sd/EcAzzWhn9+AJyQ5IB2AfeE1iZJmiNTGd55J/BXwM+T3NHaPgGsAzYmOQf4FXBGW/Z94BRgK/AkcDZAVe1K8hngJ63fp6tq16zshdQRh6Q0E5OGflX9mLHH4wGOH6N/AeeOs63LgMumU6Akafb4RK4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI5M5TdypUnN5Hdbt607dRYrkTQRz/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkf8amXNu5l8LbOk6fFMX5I6YuhLUkcMfUnqyKRj+kkuA94L7Kiqt7a2A4ErgBFgG/D+qnokSYAvAqcATwIfqqrb2jqrgH9om/1sVa2f3V2RtJD5k5oLw1TO9L8OnLRH2xrguqpaDlzX5gFOBpa3v9XAxfDch8QFwNHAUcAFSQ6YafGSpOmZNPSr6kfArj2aVwK7z9TXA6eNav9GDdwELE5yKHAisKmqdlXVI8AmXvhBIkl6ke3tmP4hVfUQQHs9uLUvBR4Y1W97axuvXZI0h2b7Qm7GaKsJ2l+4gWR1ks1JNu/cuXNWi5Ok3u1t6D/chm1orzta+3bgsFH9lgEPTtD+AlV1SVWtqKoVS5Ys2cvyJElj2dvQvwZY1aZXAVePav9gBo4BHmvDPz8ATkhyQLuAe0JrkyTNoancsvlt4FjgoCTbGdyFsw7YmOQc4FfAGa379xncrrmVwS2bZwNU1a4knwF+0vp9uqr2vDgsSXqRTRr6VXXWOIuOH6NvAeeOs53LgMumVZ0kaVb5RK4kdcTQl6SOGPqS1BFDX5I6YuhLUkf85SxJU+avnA0/z/QlqSOGviR1xNCXpI44pq/nOF4rvfR5pi9JHTH0Jakjhr4kdcQxfUkL3kyuN21bd+osVjL8PNOXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRfy5xgZnJz8JJ0mQMfUkvaf6+7vMZ+pI0jpn+n/dC/NBwTF+SOmLoS1JHDH1J6sich36Sk5LcnWRrkjVz/f6S1LM5Df0ki4AvAycDRwBnJTliLmuQpJ7N9d07RwFbq+o+gCQbgJXAXXNcx6S8X17STC3E20XnOvSXAg+Mmt8OHD26Q5LVwOo2+0SSu8fYzkHAb16UCufGMNc/zLXDcNc/zLXDcNc/57Xnwhmt/vrxFsx16GeMtnreTNUlwCUTbiTZXFUrZrOwuTTM9Q9z7TDc9Q9z7TDc9Q9z7Xua6wu524HDRs0vAx6c4xokqVtzHfo/AZYnOTzJfsCZwDVzXIMkdWtOh3eq6ukkHwV+ACwCLquqO/diUxMO/wyBYa5/mGuH4a5/mGuH4a5/mGt/nlTV5L0kSS8JPpErSR0x9CWpI0MX+km2Jfl5kjuSbJ7veiaS5LIkO5L8YlTbgUk2JbmnvR4wnzVOZJz6P5nk1+3435HklPmscTxJDktyQ5ItSe5Mcl5rH4rjP0H9C/74J3l5kluS/LTV/qnWfniSm9uxv6LdzLHgTFD/15PcP+rYHznfte6NoRvTT7INWFFVC/4hjyTvBp4AvlFVb21t/wjsqqp17buHDqiqj89nneMZp/5PAk9U1efms7bJJDkUOLSqbkvyauBW4DTgQwzB8Z+g/vezwI9/kgCvrKonkuwL/Bg4D/gb4DtVtSHJV4GfVtXF81nrWCao/yPA96rqynktcIaG7kx/mFTVj4BdezSvBNa36fUM/iEvSOPUPxSq6qGquq1NPw5sYfBE+FAc/wnqX/Bq4Ik2u2/7K+A4YHdgLuRjP179LwnDGPoF/DDJre0rG4bNIVX1EAz+YQMHz3M9e+OjSX7Whn8W5PDIaElGgLcBNzOEx3+P+mEIjn+SRUnuAHYAm4B7gUer6unWZTsL+ENsz/qravexX9uO/UVJ9p/HEvfaMIb+O6vq7Qy+qfPcNgShuXMx8AfAkcBDwD/NbzkTS/Iq4CrgY1X12/muZ7rGqH8ojn9VPVNVRzJ46v4o4M1jdZvbqqZuz/qTvBU4H/hD4B3AgcCCGxaciqEL/ap6sL3uAL7L4D+oYfJwG6/dPW67Y57rmZaqerj9g3gW+GcW8PFv47FXAd+qqu+05qE5/mPVP0zHH6CqHgVuBI4BFifZ/UDoUHwFy6j6T2pDblVVTwFfY4Ef+/EMVegneWW7qEWSVwInAL+YeK0F5xpgVZteBVw9j7VM2+7AbN7HAj3+7WLcpcCWqvr8qEVDcfzHq38Yjn+SJUkWt+lXAO9hcE3iBuD01m0hH/ux6v/vUScLYXA9YsEd+6kYqrt3kryBwdk9DL5C4vKqWjuPJU0oybeBYxl8LevDwAXAvwEbgd8HfgWcUVUL8mLpOPUfy2BooYBtwId3j5EvJEneBfwH8HPg2db8CQbj4gv++E9Q/1ks8OOf5I8YXKhdxODEcmNVfbr9+93AYGjkduAv21nzgjJB/dcDSxh8W/AdwEdGXfAdGkMV+pKkmRmq4R1J0swY+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakj/w/W4xvb2cxkgQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min/max : 6 38\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAQZ0lEQVR4nO3df4ylVX3H8fdHEH8guvwYCO6uHaxENMQqWRGrsURsKqAuTcUfFV0MLSXVFoutrKaJ2sR0bayoscGiaFdrRIJEqBANFWw1xq0LIgqrYYsrrKwwBBZEoxX59o97Ng7L/Li7M7Mz98z7lUzmuec597nfw7PzmcO5z3MnVYUkqS+PWewCJEnzz3CXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLc1Z0k25K8bI7HODPJN5ZKPdKeMtwlqUOGu7qS5DPA04D/SPJgknckOSHJN5PsTPLdJCdO6n9mktuS/CzJj5K8IcmzgI8BL2zH2Nn6npLkltb3J0n+dtJxXpHkxvYa30zynOnq2Yf/ObSMxY8fUG+SbAP+rKr+M8lK4CbgjcCXgZOAS4BjgF8AO4DnV9UPkxwJHFJVNyc5sx3jxZOOuwN4TVV9PcnBwFFVdUOS49qxXwlsBs4A3gs8s6p+NbmefTF+CZy5q39nAFdX1dVV9XBVXcMggE9p+x8Gjk3yhKraUVU3z3CsXwPPTvLkqrqvqm5o7X8O/GtVbaqq31TVRuBXwAkLNCZpVoa7evc7wOltuWRnW2J5MXBkVf0ceC1wDrAjyVVJjpnhWH/C4JfCj5P8V5IXTnqNt+/2GquBpy7YqKRZGO7q0eS1xjuAz1TViklfB1bVBoCq+kpV/SFwJPAD4ONTHIPW99tVtRY4HPgicOmk13jfbq/xxKr63HTHkhaa4a4e3QU8vW3/O/DKJH+UZL8kj09yYpJVSY5I8qokBzJYRnkQ+M2kY6xKcgBAkgPam61PqapfAw9M6vtx4JwkL8jAgUlOTXLQFPVI+4Thrh79I/D3bXnktcBa4F3ABINZ9t8x+Lf/GODtwJ3AvcAfAH/ZjnEtcDPw0yT3tLY3AtuSPMBgKecMgKrazGDd/aPAfcBW4Myp6pl8hY20kLxaRpI65MxdkjpkuEtShwx3SeqQ4S5JHdp/sQsAOOyww2p8fHyxy5CkkXL99dffU1VjU+1bEuE+Pj7O5s2bF7sMSRopSX483T6XZSSpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUNL4g5VqVfj66/a6+du23DqPFai5caZuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjRUuCf5myQ3J/l+ks8leXySo5JsSnJrks8nOaD1fVx7vLXtH1/IAUiSHm3WcE+yEvhrYE1VHQvsB7wOeD9wQVUdDdwHnNWechZwX1U9A7ig9ZMk7UPDLsvsDzwhyf7AE4EdwEuBy9r+jcBpbXtte0zbf1KSzE+5kqRhzBruVfUT4APA7QxC/X7gemBnVT3Uum0HVrbtlcAd7bkPtf6H7n7cJGcn2Zxk88TExFzHIUmaZJhlmYMZzMaPAp4KHAicPEXX2vWUGfb9tqHqoqpaU1VrxsbGhq9YkjSrYZZlXgb8qKomqurXwOXA7wMr2jINwCrgzra9HVgN0PY/Bbh3XquWJM1omHC/HTghyRPb2vlJwC3AdcCrW591wBVt+8r2mLb/2qp61MxdkrRwhllz38TgjdEbgO+151wEnA+cl2QrgzX1i9tTLgYObe3nAesXoG5J0gz2n70LVNW7gXfv1nwbcPwUfX8JnD730iRJe8s7VCWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1aKiPH5AW2/j6q/b6uds2nDqPlUijwZm7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDg0V7klWJLksyQ+SbEnywiSHJLkmya3t+8Gtb5J8JMnWJDclOW5hhyBJ2t2wM/cPA1+uqmOA3wO2AOuBr1bV0cBX22OAk4Gj29fZwIXzWrEkaVazhnuSJwMvAS4GqKr/q6qdwFpgY+u2ETitba8FPl0D3wJWJDly3iuXJE1rmJn704EJ4FNJvpPkE0kOBI6oqh0A7fvhrf9K4I5Jz9/e2h4hydlJNifZPDExMadBSJIeaZhw3x84Driwqp4H/JzfLsFMJVO01aMaqi6qqjVVtWZsbGyoYiVJwxkm3LcD26tqU3t8GYOwv2vXckv7fvek/qsnPX8VcOf8lCtJGsas4V5VPwXuSPLM1nQScAtwJbCuta0DrmjbVwJvalfNnADcv2v5RpK0b+w/ZL+/Aj6b5ADgNuDNDH4xXJrkLOB24PTW92rgFGAr8IvWV5K0Dw0V7lV1I7Bmil0nTdG3gLfMsS5J0hx4h6okdchwl6QODbvmLmmEjK+/aq+fu23DqfNYiRaLM3dJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkd2n+xC5CWsvH1Vy12CdJeceYuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjR0uCfZL8l3knypPT4qyaYktyb5fJIDWvvj2uOtbf/4wpQuSZrOnszczwW2THr8fuCCqjoauA84q7WfBdxXVc8ALmj9JEn70FA3MSVZBZwKvA84L0mAlwJ/2rpsBN4DXAisbdsAlwEfTZKqqvkrW4tlLjf1bNtw6jxWImkmw96h+iHgHcBB7fGhwM6qeqg93g6sbNsrgTsAquqhJPe3/vfMS8XSHvIuUy1Hsy7LJHkFcHdVXT+5eYquNcS+ycc9O8nmJJsnJiaGKlaSNJxh1txfBLwqyTbgEgbLMR8CViTZNfNfBdzZtrcDqwHa/qcA9+5+0Kq6qKrWVNWasbGxOQ1CkvRIs4Z7Vb2zqlZV1TjwOuDaqnoDcB3w6tZtHXBF276yPabtv9b1dknat+Zynfv5DN5c3cpgTf3i1n4xcGhrPw9YP7cSJUl7ao8+8reqvgZ8rW3fBhw/RZ9fAqfPQ22SpL3kHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KH9uizZSTtO/6REc2FM3dJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIS+F1D7jpX3SvuPMXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtSh2YN9ySrk1yXZEuSm5Oc29oPSXJNklvb94Nbe5J8JMnWJDclOW6hByFJeqRh/obqQ8Dbq+qGJAcB1ye5BjgT+GpVbUiyHlgPnA+cDBzdvl4AXNi+S1oG5vK3crdtOHUeK1neZp25V9WOqrqhbf8M2AKsBNYCG1u3jcBpbXst8Oka+BawIsmR8165JGlae7TmnmQceB6wCTiiqnbA4BcAcHjrthK4Y9LTtre23Y91dpLNSTZPTEzseeWSpGkNHe5JngR8AXhbVT0wU9cp2upRDVUXVdWaqlozNjY2bBmSpCEMFe5JHssg2D9bVZe35rt2Lbe073e39u3A6klPXwXcOT/lSpKGMczVMgEuBrZU1Qcn7boSWNe21wFXTGp/U7tq5gTg/l3LN5KkfWOYq2VeBLwR+F6SG1vbu4ANwKVJzgJuB05v+64GTgG2Ar8A3jyvFUuSZjVruFfVN5h6HR3gpCn6F/CWOdYlSZoD71CVpA4Z7pLUoWHW3CUtI3O5w1RLh+E+gry9W9JsXJaRpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOeYfqMuOt5dLy4MxdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CFvYpLUBf/85CM5c5ekDjlzl7Rk+PEY88eZuyR1yHCXpA4Z7pLUIcNdkjpkuEtSh7xaZpF4VYCkheTMXZI65Mx9Dpx9S1qqnLlLUocMd0nqkOEuSR1yzV3SstfjJ0ouSLgneTnwYWA/4BNVtWEhXkeSFttcL6xYqF8O874sk2Q/4F+Ak4FnA69P8uz5fh1J0vQWYuZ+PLC1qm4DSHIJsBa4ZQFey8sRJWkKCxHuK4E7Jj3eDrxg905JzgbObg8fTPLDBahlbx0G3LPYRcyz3sbU23igvzH1Nh5YgDHl/XN6+u9Mt2Mhwj1TtNWjGqouAi5agNefsySbq2rNYtcxn3obU2/jgf7G1Nt4YLTGtBCXQm4HVk96vAq4cwFeR5I0jYUI928DRyc5KskBwOuAKxfgdSRJ05j3ZZmqeijJW4GvMLgU8pNVdfN8v84CW5LLRXPU25h6Gw/0N6bexgMjNKZUPWo5XJI04vz4AUnqkOEuSR1a9uGe5JNJ7k7y/UlthyS5Jsmt7fvBi1njnppmTO9J8pMkN7avUxazxj2RZHWS65JsSXJzknNb+0iepxnGM8rn6PFJ/ifJd9uY3tvaj0qyqZ2jz7eLLJa8Gcbzb0l+NOkcPXexa53Osl9zT/IS4EHg01V1bGv7J+DeqtqQZD1wcFWdv5h17olpxvQe4MGq+sBi1rY3khwJHFlVNyQ5CLgeOA04kxE8TzOM5zWM7jkKcGBVPZjkscA3gHOB84DLq+qSJB8DvltVFy5mrcOYYTznAF+qqssWtcAhLPuZe1X9N3Dvbs1rgY1teyODH7yRMc2YRlZV7aiqG9r2z4AtDO6EHsnzNMN4RlYNPNgePrZ9FfBSYFcQjtI5mm48I2PZh/s0jqiqHTD4QQQOX+R65stbk9zUlm1GYgljd0nGgecBm+jgPO02Hhjhc5RkvyQ3AncD1wD/C+ysqodal+2M0C+x3cdTVbvO0fvaObogyeMWscQZGe7Lx4XA7wLPBXYA/7y45ey5JE8CvgC8raoeWOx65mqK8Yz0Oaqq31TVcxnclX488Kypuu3bqvbe7uNJcizwTuAY4PnAIcCSXQY03Kd2V1sX3bU+evci1zNnVXVX+8f6MPBxBj98I6Ote34B+GxVXd6aR/Y8TTWeUT9Hu1TVTuBrwAnAiiS7bpYcyY8imTSel7cltaqqXwGfYgmfI8N9alcC69r2OuCKRaxlXuwKweaPge9P13epaW9uXQxsqaoPTto1kudpuvGM+DkaS7KibT8BeBmD9xKuA17duo3SOZpqPD+YNJkIg/cPluw58mqZ5HPAiQw+yvMu4N3AF4FLgacBtwOnV9XIvEE5zZhOZPC/+wVsA/5i13r1UpfkxcDXge8BD7fmdzFYpx658zTDeF7P6J6j5zB4w3Q/BpPGS6vqH5I8HbiEwRLGd4Az2qx3SZthPNcCYww+/fZG4JxJb7wuKcs+3CWpRy7LSFKHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUof8HI7e4iGV4ejgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "min/max : 8 37\n" + ] + } + ], + "source": [ + "def plot_histo_graphs(dataset, title):\n", + " # histogram of graph sizes\n", + " graph_sizes = []\n", + " for graph in dataset:\n", + " graph_sizes.append(graph[0].number_of_nodes())\n", + " plt.figure(1)\n", + " plt.hist(graph_sizes, bins=20)\n", + " plt.title(title)\n", + " plt.show()\n", + " graph_sizes = torch.Tensor(graph_sizes)\n", + " print('min/max :',graph_sizes.min().long().item(),graph_sizes.max().long().item())\n", + " \n", + "plot_histo_graphs(dataset.train,'trainset')\n", + "plot_histo_graphs(dataset.val,'valset')\n", + "plot_histo_graphs(dataset.test,'testset')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "220011\n", + "24445\n", + "5000\n", + "(DGLGraph(num_nodes=33, num_edges=72,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), tensor([3.0464]))\n", + "(DGLGraph(num_nodes=21, num_edges=44,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), tensor([2.0992]))\n", + "(DGLGraph(num_nodes=24, num_edges=52,\n", + " ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}\n", + " edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}), tensor([3.1382]))\n" + ] + } + ], + "source": [ + "print(len(dataset.train))\n", + "print(len(dataset.val))\n", + "print(len(dataset.test))\n", + "\n", + "print(dataset.train[0])\n", + "print(dataset.val[0])\n", + "print(dataset.test[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "num_atom_type = 28\n", + "num_bond_type = 4\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vijay/miniconda3/envs/benchmark_gnn/lib/python3.7/site-packages/torch/storage.py:34: FutureWarning: pickle support for Storage will be removed in 1.5. Use `torch.save` instead\n", + " warnings.warn(\"pickle support for Storage will be removed in 1.5. Use `torch.save` instead\", FutureWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Time (sec): 131.04525065422058\n" + ] + } + ], + "source": [ + "start = time.time()\n", + "with open('data/molecules/ZINC-full.pkl','wb') as f:\n", + " pickle.dump([dataset.train,dataset.val,dataset.test,num_atom_type,num_bond_type],f)\n", + "print('Time (sec):',time.time() - start) #131s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test load function" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[I] Loading dataset ZINC-full...\n", + "train, test, val sizes : 220011 5000 24445\n", + "[I] Finished loading.\n", + "[I] Data load time: 154.6146s\n" + ] + } + ], + "source": [ + "DATASET_NAME = 'ZINC-full'\n", + "dataset = LoadData(DATASET_NAME)\n", + "trainset, valset, testset = dataset.train, dataset.val, dataset.test #154s" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "batch_size = 10\n", + "collate = MoleculeDataset.collate\n", + "print(MoleculeDataset)\n", + "train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=collate)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/data/script_download_all_datasets.sh b/data/script_download_all_datasets.sh index b0f6e292e..e5947ae9f 100644 --- a/data/script_download_all_datasets.sh +++ b/data/script_download_all_datasets.sh @@ -20,6 +20,14 @@ else curl https://www.dropbox.com/s/bhimk9p1xst6dvo/ZINC.pkl?dl=1 -o ZINC.pkl -J -L -k fi +FILE=ZINC-full.pkl +if test -f "$FILE"; then + echo -e "$FILE already downloaded." +else + echo -e "\ndownloading $FILE..." + curl https://www.dropbox.com/s/2m4iywux4debbvy/ZINC-full.pkl?dl=1 -o ZINC-full.pkl -J -L -k +fi + cd .. diff --git a/data/script_download_molecules.sh b/data/script_download_molecules.sh index e7a35b73c..3e49a102d 100644 --- a/data/script_download_molecules.sh +++ b/data/script_download_molecules.sh @@ -17,5 +17,10 @@ else fi - - +FILE=ZINC-full.pkl +if test -f "$FILE"; then + echo -e "$FILE already downloaded." +else + echo -e "\ndownloading $FILE..." + curl https://www.dropbox.com/s/2m4iywux4debbvy/ZINC-full.pkl?dl=1 -o ZINC-full.pkl -J -L -k +fi \ No newline at end of file diff --git a/docs/02_download_datasets.md b/docs/02_download_datasets.md index 911133217..dae2136fb 100644 --- a/docs/02_download_datasets.md +++ b/docs/02_download_datasets.md @@ -30,12 +30,13 @@ Script [script_download_superpixels.sh](../data/script_download_superpixels.sh) ## 3. ZINC molecular dataset ZINC size is 58.9MB. +ZINC-full size is 1.14GB. ``` # At the root of the project cd data/ bash script_download_molecules.sh ``` -Script [script_download_molecules.sh](../data/script_download_molecules.sh) is located here. Code to reproduce the ZINC dataset is [here](../data/molecules/prepare_molecules.ipynb). +Script [script_download_molecules.sh](../data/script_download_molecules.sh) is located here. Code to reproduce the ZINC dataset is [here](../data/molecules/prepare_molecules.ipynb) and the ZINC-full dataset is [here](../data/molecules/prepare_molecules_ZINC_full.ipynb).(../data/molecules/prepare_molecules.ipynb).
diff --git a/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_100k.sh b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_100k.sh new file mode 100644 index 000000000..9d7cf8684 --- /dev/null +++ b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_100k.sh @@ -0,0 +1,116 @@ +#!/bin/bash + + +############ +# Usage +############ + +# bash script_main_molecules_graph_regression_ZINC-full_100k.sh + + + +############ +# GNNs +############ + +#MLP +#GCN +#GraphSage +#GatedGCN +#GAT +#MoNet +#GIN +#3WLGNN +#RingGNN + + + +############ +# ZINC-full - 4 RUNS +############ + +seed0=41 +seed1=95 +seed2=12 +seed3=35 +code=main_molecules_graph_regression.py +dataset=ZINC-full +out_dir=out/molecules_graph_regression/ZINC_full/ +tmux new -s benchmark -d +tmux send-keys "source activate benchmark_gnn" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_MLP_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_MLP_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_MLP_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_MLP_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GCN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GraphSage_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GraphSage_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GraphSage_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GraphSage_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GAT_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GAT_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GAT_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GAT_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_MoNet_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_MoNet_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_MoNet_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_MoNet_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GIN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GIN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GIN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GIN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_100k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_100k.json' & +wait" C-m +tmux send-keys "tmux kill-session -t benchmark" C-m + + + diff --git a/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_500k.sh b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_500k.sh new file mode 100644 index 000000000..7532cae48 --- /dev/null +++ b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_500k.sh @@ -0,0 +1,119 @@ +#!/bin/bash + + +############ +# Usage +############ + +# bash script_main_molecules_graph_regression_ZINC-full_500k.sh + + + +############ +# GNNs +############ + +#MLP +#GCN +#GraphSage +#GatedGCN +#GAT +#MoNet +#GIN +#3WLGNN +#RingGNN + + + +############ +# ZINC-full - 4 RUNS +############ + +seed0=41 +seed1=95 +seed2=12 +seed3=35 +code=main_molecules_graph_regression.py +dataset=ZINC-full +out_dir=out/molecules_graph_regression/ZINC_full/ +tmux new -s benchmark -d +tmux send-keys "source activate benchmark_gnn" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GCN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GraphSage_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GraphSage_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GraphSage_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GraphSage_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GAT_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GAT_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GAT_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GAT_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_MoNet_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_MoNet_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_MoNet_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_MoNet_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_GIN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_GIN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_GIN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_GIN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_3WLGNN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_RingGNN_ZINC_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --init_lr 1e-4 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --init_lr 1e-4 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --init_lr 1e-4 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --init_lr 1e-4 --config 'configs/molecules_graph_regression_3WLGNN_ZINC_L8_500k.json' & +wait" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --init_lr 1e-5 --config 'configs/molecules_graph_regression_RingGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --init_lr 1e-5 --config 'configs/molecules_graph_regression_RingGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --init_lr 1e-5 --config 'configs/molecules_graph_regression_RingGNN_ZINC_L8_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --init_lr 1e-5 --config 'configs/molecules_graph_regression_RingGNN_ZINC_L8_500k.json' & +wait" C-m +tmux send-keys "tmux kill-session -t benchmark" C-m diff --git a/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_PE_GatedGCN_500k.sh b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_PE_GatedGCN_500k.sh new file mode 100644 index 000000000..38ff30276 --- /dev/null +++ b/scripts/ZINC-full/script_main_molecules_graph_regression_ZINC-full_PE_GatedGCN_500k.sh @@ -0,0 +1,47 @@ +#!/bin/bash + + +############ +# Usage +############ + +# bash script_main_molecules_graph_regression_ZINC-full_PE_GatedGCN_500k.sh + + + +############ +# GNNs +############ + +#MLP +#GCN +#GraphSage +#GatedGCN +#GAT +#MoNet +#GIN +#3WLGNN +#RingGNN + + + +############ +# ZINC-full - 4 RUNS +############ + +seed0=41 +seed1=95 +seed2=12 +seed3=35 +code=main_molecules_graph_regression.py +dataset=ZINC-full +out_dir=out/molecules_graph_regression/ZINC_full/ +tmux new -s benchmark -d +tmux send-keys "source activate benchmark_gnn" C-m +tmux send-keys " +python $code --dataset $dataset --out_dir $out_dir --gpu_id 0 --seed $seed0 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_PE_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 1 --seed $seed1 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_PE_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 2 --seed $seed2 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_PE_500k.json' & +python $code --dataset $dataset --out_dir $out_dir --gpu_id 3 --seed $seed3 --edge_feat True --config 'configs/molecules_graph_regression_GatedGCN_ZINC_PE_500k.json' & +wait" C-m +tmux send-keys "tmux kill-session -t benchmark" C-m