Skip to content

Commit

Permalink
Merge branch 'v1.0.0' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
laserkelvin committed Aug 31, 2023
2 parents ad85d3f + 02aa1d1 commit e3e8c8b
Show file tree
Hide file tree
Showing 268 changed files with 186,479 additions and 5,279 deletions.
9 changes: 8 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@

recursive-include ocpmodels/datasets/dev-min *
recursive-include matsciml/datasets/dev-s2ef-dgl *
recursive-include matsciml/datasets/dev-is2re-dgl *
recursive-include matsciml/datasets/materials_project/devset *
recursive-include matsciml/datasets/lips/devset *
recursive-include matsciml/datasets/carolina_db/devset *
recursive-include matsciml/datasets/nomad/devset *
recursive-include matsciml/datasets/oqmd/devset *
recursive-include matsciml/datasets/symmetry/devset *
327 changes: 226 additions & 101 deletions README.md

Large diffs are not rendered by default.

46 changes: 23 additions & 23 deletions docker/Dockerfile.cobalt_dl → docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@
# SPDX-License-Identifier: MIT License

# CUDA version from nvidia docker: https://hub.docker.com/r/nvidia/cuda/tags
ARG CUDA_VERSION=11.3.0-cudnn8-runtime-ubuntu20.04
ARG CUDA_VERSION=11.8.0-cudnn8-runtime-ubuntu20.04
FROM nvidia/cuda:$CUDA_VERSION

RUN rm /etc/apt/sources.list.d/cuda.list
RUN rm /etc/apt/sources.list.d/nvidia-ml.list

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
# Avoids some interactive prompts during apt-get install
ARG DEBIAN_FRONTEND=noninteractive
Expand All @@ -17,7 +14,6 @@ RUN apt-get update && \
apt-get autoremove -y && \
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*


# Install system tools
RUN apt-get update --fix-missing && \
apt-get install -yq --no-install-recommends \
Expand All @@ -32,20 +28,21 @@ RUN apt-get update --fix-missing && \
openssh-client && \
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*



# install system tools - round 2
RUN apt-get update --fix-missing && \
apt-get install -yq --no-install-recommends \
apt-utils \
python3-pip \
python3-dev \
sudo \
software-properties-common \
python3.9 \
python3-pip \
virtualenv && \
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*

RUN apt-get update \
&& apt-get install -y wget tmux vim libxrender-dev libsm6 libxext6 && \
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*

# Create py3 virtualenv in order to use python3 for the project
RUN virtualenv -p python3 /venv
RUN virtualenv -p python3.9 /venv
ENV PATH=/venv/bin:$PATH
RUN pip install --upgrade pip
RUN pip install -q \
Expand All @@ -54,14 +51,17 @@ RUN pip install -q \
matplotlib \
scikit-learn

COPY requirements_clean.txt .
RUN pip install -r requirements_clean.txt


RUN apt-get update \
&& apt-get install -y wget tmux vim libxrender-dev libsm6 libxext6 && \
apt-get clean && rm -rf /var/cache/apt/archives /var/lib/apt/lists/*




RUN pip install numba lmdb munch geometric_algebra_attention jsonargparse[signatures] sympy pymatgen
RUN pip install torch==2.0.0+cu118 --index-url https://download.pytorch.org/whl/cu118
RUN pip install dgl==0.9.1 -f https://data.dgl.ai/wheels/cu118/repo.html
RUN pip install dglgo -f https://data.dgl.ai/wheels-test/repo.html
RUN pip install dgllife==0.3.2 rdkit==2023.3.1 rowan
RUN pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
RUN pip install torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
RUN pip install torch-geometric
RUN pip install smact
RUN pip install matminer
RUN pip install p_tqdm
RUN pip install -U pytorch-lightning==1.8.6
RUN pip install -U torchmetrics==0.11.4
RUN pip install -U pytest
24 changes: 0 additions & 24 deletions docker/requirements_clean.txt

This file was deleted.

Binary file added docs/MatSci-ML-Benchmark-Table.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 68 additions & 0 deletions examples/datasets/carolina_db/single_task_devset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import pytorch_lightning as pl
from torch.nn import LayerNorm, SiLU

from matsciml.lightning.data_utils import MatSciMLDataModule
from matsciml.models import PLEGNNBackbone
from matsciml.models.base import ScalarRegressionTask
from matsciml.datasets.transforms import PointCloudToGraphTransform


# configure a simple model for testing
model_args = {
"embed_in_dim": 128,
"embed_hidden_dim": 32,
"embed_out_dim": 128,
"embed_depth": 5,
"embed_feat_dims": [128, 128, 128],
"embed_message_dims": [128, 128, 128],
"embed_position_dims": [64, 64],
"embed_edge_attributes_dim": 0,
"embed_activation": "relu",
"embed_residual": True,
"embed_normalize": True,
"embed_tanh": True,
"embed_activate_last": False,
"embed_k_linears": 1,
"embed_use_attention": False,
"embed_attention_norm": "sigmoid",
"readout": "sum",
"node_projection_depth": 3,
"node_projection_hidden_dim": 128,
"node_projection_activation": "relu",
"prediction_out_dim": 1,
"prediction_depth": 3,
"prediction_hidden_dim": 128,
"prediction_activation": "relu",
"encoder_only": True,
}

model = PLEGNNBackbone(**model_args)
task = ScalarRegressionTask(
model,
output_kwargs={
"norm": LayerNorm(128),
"hidden_dim": 128,
"activation": SiLU,
"lazy": False,
"input_dim": 128,
},
lr=1e-3,
task_keys=["energy"],
)

# configure materials project from devset
dm = MatSciMLDataModule.from_devset(
"CMDataset",
dset_kwargs={
"transforms": [
PointCloudToGraphTransform(
"dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
)
]
},
)

# run 10 steps for funsies
trainer = pl.Trainer(fast_dev_run=10, enable_checkpointing=False, logger=False)

trainer.fit(task, datamodule=dm)
36 changes: 36 additions & 0 deletions examples/datasets/materials_project/single_task_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pytorch_lightning as pl
from torch.nn import LayerNorm, SiLU

from matsciml.lightning.data_utils import MatSciMLDataModule
from matsciml.datasets.transforms import PointCloudToGraphTransform
from matsciml.models import GraphConvModel
from matsciml.models.base import ScalarRegressionTask

pl.seed_everything(21616)


model = GraphConvModel(100, 1, encoder_only=True)
task = ScalarRegressionTask(
model,
output_kwargs={
"norm": LayerNorm(128),
"hidden_dim": 128,
"activation": SiLU,
"lazy": False,
"input_dim": 1,
},
lr=1e-3,
task_keys=["band_gap"],
)


dm = MatSciMLDataModule(
"MaterialsProjectDataset",
train_path="./matsciml/datasets/materials_project/devset",
dset_kwargs={"transforms": [PointCloudToGraphTransform("dgl", cutoff_dist=20.0)]},
val_split=0.2,
)

trainer = pl.Trainer(max_epochs=10, enable_checkpointing=False)

trainer.fit(task, datamodule=dm)
22 changes: 22 additions & 0 deletions examples/datasets/materials_project/single_task_devset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytorch_lightning as pl

from matsciml.lightning.data_utils import MatSciMLDataModule
from matsciml.models import GraphConvModel
from matsciml.models.base import ScalarRegressionTask
from matsciml.datasets.transforms import PointCloudToGraphTransform


# configure a simple model for testing
model = GraphConvModel(100, 1, encoder_only=True)
task = ScalarRegressionTask(model, task_keys=["band_gap"])

# configure materials project from devset
dm = MatSciMLDataModule.from_devset(
"MaterialsProjectDataset",
dset_kwargs={"transforms": [PointCloudToGraphTransform("dgl", cutoff_dist=20.0)]},
)

# run 10 steps for funsies
trainer = pl.Trainer(fast_dev_run=10, enable_checkpointing=False, logger=False)

trainer.fit(task, datamodule=dm)
74 changes: 74 additions & 0 deletions examples/datasets/materials_project/single_task_egnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytorch_lightning as pl
from torch.nn import LayerNorm, SiLU

from matsciml.lightning.data_utils import MatSciMLDataModule
from matsciml.datasets.transforms import PointCloudToGraphTransform
from matsciml.models import PLEGNNBackbone
from matsciml.models.base import ScalarRegressionTask, BinaryClassificationTask

pl.seed_everything(21616)

model_args = {
"embed_in_dim": 128,
"embed_hidden_dim": 32,
"embed_out_dim": 128,
"embed_depth": 5,
"embed_feat_dims": [128, 128, 128],
"embed_message_dims": [128, 128, 128],
"embed_position_dims": [64, 64],
"embed_edge_attributes_dim": 0,
"embed_activation": "relu",
"embed_residual": True,
"embed_normalize": True,
"embed_tanh": True,
"embed_activate_last": False,
"embed_k_linears": 1,
"embed_use_attention": False,
"embed_attention_norm": "sigmoid",
"readout": "sum",
"node_projection_depth": 3,
"node_projection_hidden_dim": 128,
"node_projection_activation": "relu",
"prediction_out_dim": 1,
"prediction_depth": 3,
"prediction_hidden_dim": 128,
"prediction_activation": "relu",
"encoder_only": True,
}

model = PLEGNNBackbone(**model_args)
task = ScalarRegressionTask(
model,
output_kwargs={
"norm": LayerNorm(128),
"hidden_dim": 128,
"activation": SiLU,
"lazy": False,
"input_dim": 128,
},
lr=1e-3,
task_keys=["band_gap"],
)

dm = MatSciMLDataModule(
dataset="MaterialsProjectDataset",
train_path="./matsciml/datasets/materials_project/devset",
dset_kwargs={
"transforms": [
PointCloudToGraphTransform(
"dgl", cutoff_dist=20.0, node_keys=["pos", "atomic_numbers"]
)
]
},
val_split=0.2,
batch_size=16,
num_workers=0,
)

trainer = pl.Trainer(
fast_dev_run=100,
accelerator="cpu",
devices=1,
)

trainer.fit(task, datamodule=dm)
62 changes: 62 additions & 0 deletions examples/datasets/materials_project/single_task_gala.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytorch_lightning as pl
from torch.nn import LayerNorm, SiLU

from matsciml.lightning.data_utils import MatSciMLDataModule
from matsciml.models import GalaPotential
from matsciml.models.base import ScalarRegressionTask


model_args = {
"D_in": 100,
"hidden_dim": 128,
"merge_fun": "concat",
"join_fun": "concat",
"invariant_mode": "full",
"covariant_mode": "full",
"include_normalized_products": True,
"invar_value_normalization": "momentum",
"eqvar_value_normalization": "momentum_layer",
"value_normalization": "layer",
"score_normalization": "layer",
"block_normalization": "layer",
"equivariant_attention": False,
"tied_attention": True,
"encoder_only": True,
}

mp_norms = {
"formation_energy_per_atom_mean": -1.454,
"formation_energy_per_atom_std": 1.206,
}

task = ScalarRegressionTask(
mp_norms,
encoder_class=GalaPotential,
encoder_kwargs=model_args,
output_kwargs={
"norm": LayerNorm(128),
"hidden_dim": 128,
"activation": SiLU,
"lazy": False,
"input_dim": 128,
},
lr=1e-4,
task_keys=["band_gap"],
)


dm = MatSciMLDataModule(
dataset="MaterialsProjectDataset",
train_path="./matsciml/datasets/materials_project/devset",
val_split=0.2,
batch_size=16,
num_workers=0,
)

trainer = pl.Trainer(
limit_train_batches=2,
limit_val_batches=2,
max_epochs=2,
accelerator="cpu",
)
trainer.fit(task, datamodule=dm)
Loading

0 comments on commit e3e8c8b

Please sign in to comment.