Skip to content

Commit

Permalink
Merge pull request #92 from Irlirion/fix_deepchem
Browse files Browse the repository at this point in the history
fix deepchem
  • Loading branch information
InnopolisUni authored Jan 17, 2023
2 parents 50496f4 + 678af7e commit f0bfa99
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 27 deletions.
49 changes: 24 additions & 25 deletions innofw/core/datasets/smiles_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,60 +3,59 @@
from numbers import Number
from typing import List, Optional, Sequence

import deepchem as dc
import numpy as np
import pandas as pd
from innofw.utils.data_utils.preprocessing import clean_salts
from torch.utils.data import Dataset
from tqdm import tqdm

from innofw.utils.data_utils.preprocessing import clean_salts

logging.getLogger("deepchem").propagate = False


class SmilesDataset(Dataset):
"""
A class to represent SMILES Dataset.
https://www.kaggle.com/c/smiles/data
A class to represent SMILES Dataset.
https://www.kaggle.com/c/smiles/data
smiles: Sequence[str]
property_list: Sequence[Number]
property_name: str
smiles: Sequence[str]
property_list: Sequence[Number]
property_name: str
Methods
-------
__getitem__(self, idx):
returns X - features and Y - targets
Methods
-------
__getitem__(self, idx):
returns X - features and Y - targets
generate_descriptors(self, featurizers: List[dc.feat.MolecularFeaturizer]):
creates descriptions out of featurizers
init_features(self, features: Optional[List[str]] = None):
initialize X-features
from_df(cls, df: pd.DataFrame, property_name: str, smiles_col: str = "smiles", property_col: Optional[str] = None):
initializes class object using data frame
generate_descriptors(self, featurizers: List[dc.feat.MolecularFeaturizer]):
creates descriptions out of featurizers
init_features(self, features: Optional[List[str]] = None):
initialize X-features
from_df(cls, df: pd.DataFrame, property_name: str, smiles_col: str = "smiles", property_col: Optional[str] = None):
initializes class object using data frame
"""

cf_featurizer = dc.feat.CircularFingerprint(size=1024)
maccs_descriptor = dc.feat.MACCSKeysFingerprint()

def __init__(
self, smiles: Sequence[str], property_list: Sequence[Number], property_name: str
):
import deepchem.feat

cf_featurizer = deepchem.feat.CircularFingerprint(size=1024)
maccs_descriptor = deepchem.feat.MACCSKeysFingerprint()
self.smiles = smiles
self.y = np.array(property_list)
self.property_name = property_name

self._convert_smiles()

self.generate_descriptors([self.cf_featurizer, self.maccs_descriptor])
self.generate_descriptors([cf_featurizer, maccs_descriptor])

def _convert_smiles(self):
with Pool(cpu_count()) as pool:
pre_clean = tqdm(
zip(
pool.map(clean_salts, self.smiles), self.y, self.smiles
),
zip(pool.map(clean_salts, self.smiles), self.y, self.smiles),
desc="Cleaning salts...",
total=len(self.smiles),
)
Expand All @@ -76,7 +75,7 @@ def __getitem__(self, idx):
def __len__(self):
return len(self.y)

def generate_descriptors(self, featurizers: List[dc.feat.MolecularFeaturizer]):
def generate_descriptors(self, featurizers):
self.smiles_features = {}
self.featurizer_names = []
with Pool(cpu_count()) as pool:
Expand Down
4 changes: 2 additions & 2 deletions run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
export PYTHONPATH=.
python -m pytest --cov=innofw --cov-report=xml --junitxml=/code/out_report.xml
# coverage report --fail-under=40
python -m pytest --cov=innofw --cov-report=xml --junitxml=out_report.xml
coverage report --fail-under=40
if [ $? -ne 0 ]; then
echo ERROR
exit 2
Expand Down

0 comments on commit f0bfa99

Please sign in to comment.