Skip to content

Commit

Permalink
feat: start working on docker (#5)
Browse files Browse the repository at this point in the history
* feat: start working on docker

* fix: paths git clone certs etc.

* fix: docker base img

* ci: add docker workflow

* fix: path

* refactor: put pipeline in separate script

* ci: run script

* ci: dont run pipeline in build

* ci: use quay

* ci: entry point script

* ci: entrypoint

* fix: forgot shebang

* ci: cleanup

* ci: use secrets
  • Loading branch information
fiskrt authored Jul 1, 2024
1 parent a6123ed commit d6d5a27
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 70 deletions.
78 changes: 8 additions & 70 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,77 +6,15 @@ on:
jobs:
build:
runs-on: ubuntu-latest

defaults:
run:
shell: bash -el {0}

steps:
- uses: actions/checkout@v4

- name: Clone GT4SD
run: |
git clone https://github.com/GT4SD/gt4sd-core.git
- uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: gt4sd
environment-file: gt4sd-core/conda_cpu_linux.yml
python-version: 3.8
auto-activate-base: false
use-only-tar-bz2: true

- name: Install dependencies
run: |
conda activate gt4sd
cd gt4sd-core
pip install --no-deps .
pip uninstall --yes toxsmi && pip install toxsmi
- name: Load datasets
run: |
conda activate gt4sd
mkdir data
python scripts/load_data.py \
--uniprot P05067 \
--affinity_type IC50 \
--affinity_cutoff 10000 \
--output_dir data/ \
--train_size 0.8 \
--binary_labels
- name: Train toxsmi model
run: |
conda activate gt4sd
python scripts/train_toxsmi.py \
--train data/train.csv \
--test data/valid.csv \
--smi data/mols.smi \
--language tokenizer \
--params config/toxsmi_conf.json \
--model models \
--name toxsmi_model
- name: Checkout code
uses: actions/checkout@v2

- name: Generate molecules with MoLeR
run: |
conda activate gt4sd
head -n 5 data/mols.smi > data/good_docks.smi
python scripts/moler_generate_toxsmi.py \
--smi_path data/good_docks.smi \
--param_path config/moler_conf.json \
--output_path data/moler_filtered \
--predictor_path models/toxsmi_model/weights/best_F1.pt
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Generate more diverse molecules with Regression Transformer
run: |
conda activate gt4sd
python scripts/prepare_rt_data.py \
--smi_path data/moler_filtered/generated.csv \
--output_path data/moler_filtered/generated_qed.csv
head -n 10 data/moler_filtered/generated_qed.csv > data/moler_filtered/generated_qed_selected.csv
python scripts/rt_generate.py \
--smi_path data/moler_filtered/generated_qed_selected.csv \
--param_path config/rt_conf.json \
--output_path data/rt
- name: Build Docker image
run: docker build -t my-image:latest .

- name: Run Docker container
run: docker run -e API_KEY=${{ secrets.RXN_API_KEY }} -e PROJ_ID=${{ secrets.RXN_PROJ_ID }} my-image:latest
28 changes: 28 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Base image containing the installed gt4sd environment
#FROM drugilsberg/gt4sd-base:v1.4.2-cpu
FROM quay.io/gt4sd/gt4sd-base:v1.4.2-cpu


# Certs for git clone
RUN apt-get update && \
apt-get install -y git ca-certificates && \
apt-get clean

RUN git clone --branch add-docker https://github.com/GT4SD/molecular-design.git
WORKDIR /workspace/molecular-design

# hack: We need to use the pypi toxsmi package, not the default one
RUN pip uninstall --yes toxsmi && pip install toxsmi && mkdir data

# hack: should be done in base gt4sd
RUN pip uninstall --yes torch-scatter torch-sparse torch-cluster torch-geometric && \
pip install --no-index torch-scatter -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html && \
pip install --no-index torch-sparse -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html && \
pip install --no-index torch-cluster -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html && \
pip install torch-geometric -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html

RUN chmod +x example_pipeline.sh

ENTRYPOINT ["./example_pipeline.sh"]

#CMD ["bash"]
60 changes: 60 additions & 0 deletions example_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash

python scripts/load_data.py \
--uniprot P05067 \
--affinity_type IC50 \
--affinity_cutoff 10000 \
--output_dir data/ \
--train_size 0.8 \
--binary_labels

# Train toxsmi model
python scripts/train_toxsmi.py \
--train data/train.csv \
--test data/valid.csv \
--smi data/mols.smi \
--language tokenizer \
--params config/toxsmi_conf.json \
--model models \
--name toxsmi_model

# Generate molecules with MoLeR
head -n 5 data/mols.smi > data/good_docks.smi
python scripts/moler_generate_toxsmi.py \
--smi_path data/good_docks.smi \
--param_path config/moler_conf.json \
--output_path data/moler_filtered \
--predictor_path models/toxsmi_model/weights/best_F1.pt

# Generate more diverse molecules with Regression Transformer
python scripts/prepare_rt_data.py \
--smi_path data/moler_filtered/generated.csv \
--output_path data/moler_filtered/generated_qed.csv && \
head -n 10 data/moler_filtered/generated_qed.csv > data/moler_filtered/generated_qed_selected.csv
python scripts/rt_generate.py \
--smi_path data/moler_filtered/generated_qed_selected.csv \
--param_path config/rt_conf.json \
--output_path data/rt

python scripts/inference_dataset.py -i data/rt/qed_rt_conf_generated_qed_selected/generated.csv

python scripts/test_toxsmi.py \
--model_path models/toxsmi_model \
--smi_filepath generated.smi \
--label_filepath dummy_data.csv \
--checkpoint_name F1

# Calculate properties
python scripts/mol_properties.py \
--smi_path models/toxsmi_model/results/dummy_data_F1_results_flat.csv \
--output_path mol_props.csv

# Run RXN
pip install rxn4chemistry
head -n 2 data/rt/qed_rt_conf_generated_qed_selected/generated.csv > selected_for_retro.csv
python scripts/retrosynthesis.py selected_for_retro.csv \
--api_key $API_KEY \
--project_id $PROJ_ID \
--steps 4 \
--timeout 100 \
--name my_retrosynthesis

0 comments on commit d6d5a27

Please sign in to comment.