update function name

sky1ove · May 15, 2024 · 4bda744 · 4bda744
1 parent a580f7e
commit 4bda744
Show file tree

Hide file tree

Showing 8 changed files with 3,797 additions and 6,623 deletions.
diff --git a/katlas/core.py b/katlas/core.py
diff --git a/katlas/dl.py b/katlas/dl.py
@@ -1,8 +1,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_DL.ipynb.
 
 # %% auto 0
-__all__ = ['seed_everything', 'GeneralDataset', 'get_sampler', 'MLP_1', 'CNN1D_1', 'init_weights', 'lin_wn', 'conv_wn', 'CNN1D_2',
-           'train_dl', 'train_dl_cv', 'predict_dl']
+__all__ = ['def_device', 'seed_everything', 'GeneralDataset', 'get_sampler', 'MLP_1', 'CNN1D_1', 'init_weights', 'lin_wn',
+           'conv_wn', 'CNN1D_2', 'train_dl', 'train_dl_cv', 'predict_dl']
 
 # %% ../nbs/04_DL.ipynb 4
 from fastbook import *
@@ -30,7 +30,10 @@ def seed_everything(seed=123):
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
 
-# %% ../nbs/04_DL.ipynb 11
+# %% ../nbs/04_DL.ipynb 8
+def_device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'
+
+# %% ../nbs/04_DL.ipynb 13
 class GeneralDataset:
     def __init__(self, 
                  df, # a dataframe of values
@@ -57,7 +60,7 @@ def __getitem__(self, index):
             y = torch.Tensor(self.y[index])
             return X, y
 
-# %% ../nbs/04_DL.ipynb 15
+# %% ../nbs/04_DL.ipynb 17
 def get_sampler(info,col):
 
     "For imbalanced data, get higher weights for less-represented samples"
@@ -77,7 +80,7 @@ def get_sampler(info,col):
 
     return sampler
 
-# %% ../nbs/04_DL.ipynb 21
+# %% ../nbs/04_DL.ipynb 23
 def MLP_1(num_features, 
           num_targets,
           hidden_units = [512, 218],
@@ -107,7 +110,7 @@ def MLP_1(num_features,
 
     return model
 
-# %% ../nbs/04_DL.ipynb 27
+# %% ../nbs/04_DL.ipynb 29
 class CNN1D_1(Module):
 
     def __init__(self, 
@@ -132,12 +135,12 @@ def forward(self, x):
         x = self.fc2(x)
         return x
 
-# %% ../nbs/04_DL.ipynb 31
+# %% ../nbs/04_DL.ipynb 33
 def init_weights(m, leaky=0.):
     "Initiate any Conv layer with Kaiming norm."
     if isinstance(m, (nn.Conv1d,nn.Conv2d,nn.Conv3d)): init.kaiming_normal_(m.weight, a=leaky)
 
-# %% ../nbs/04_DL.ipynb 32
+# %% ../nbs/04_DL.ipynb 34
 def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
     "Weight norm of linear."
     layers =  nn.Sequential(
@@ -147,7 +150,7 @@ def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
     if act: layers.append(act())
     return layers
 
-# %% ../nbs/04_DL.ipynb 33
+# %% ../nbs/04_DL.ipynb 35
 def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
     "Weight norm of conv."
     layers =  nn.Sequential(
@@ -157,7 +160,7 @@ def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
     if act: layers.append(act())
     return layers
 
-# %% ../nbs/04_DL.ipynb 34
+# %% ../nbs/04_DL.ipynb 36
 class CNN1D_2(nn.Module):
 
     def __init__(self, ni, nf, amp_scale = 16):
@@ -207,7 +210,7 @@ def forward(self, x):
 
         return x
 
-# %% ../nbs/04_DL.ipynb 39
+# %% ../nbs/04_DL.ipynb 40
 def train_dl(df, 
             feat_col, 
             target_col,
@@ -244,7 +247,7 @@ def train_dl(df,
 
     model = model_func()
 
-    learn = Learner(dls.cuda(), model.cuda(), loss, 
+    learn = Learner(dls.to(def_device), model.to(def_device), loss, 
                     metrics= [PearsonCorrCoef(),SpearmanCorrCoef()],
                     cbs = [GradientClip(1.0)] # prevent overfitting
                    )
@@ -270,7 +273,7 @@ def train_dl(df,
 
     return target, pred
 
-# %% ../nbs/04_DL.ipynb 43
+# %% ../nbs/04_DL.ipynb 45
 @fc.delegates(train_dl)
 def train_dl_cv(df, 
                 feat_col, 
@@ -320,7 +323,7 @@ def train_dl_cv(df,
 
     return oof, metrics
 
-# %% ../nbs/04_DL.ipynb 51
+# %% ../nbs/04_DL.ipynb 53
 def predict_dl(df, 
                feat_col, 
                target_col,
@@ -334,7 +337,7 @@ def predict_dl(df,
     test_dl = DataLoader(test_dset,bs=512)
 
 
-    learn = Learner(None, model.cuda(), loss_func=1)
+    learn = Learner(None, model.to(def_device), loss_func=1)
     learn.load(model_pth)
 
     learn.model.eval()

diff --git a/katlas/feature.py b/katlas/feature.py
@@ -50,10 +50,11 @@ def get_rdkit(df: pd.DataFrame, # a dataframe that contains smiles
 # %% ../nbs/01_feature.ipynb 11
 def get_morgan(df: pd.DataFrame, # a dataframe that contains smiles
                col: str = "SMILES", # colname of smile
+               radius=3
               ):
     "Get 2048 morgan fingerprint (binary feature) from smiles in a dataframe"
     mols = [Chem.MolFromSmiles(smi) for smi in df[col]]
-    morgan_fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) for mol in mols]
+    morgan_fps = [AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=2048) for mol in mols]
     fp_df = pd.DataFrame(np.array(morgan_fps), index=df.index)
     fp_df.columns = "morgan_" + fp_df.columns.astype(str)
     return fp_df
@@ -209,7 +210,7 @@ def T5_embeddings_bfd(sequence, device = 'cuda'):
 
     return T5_feature
 
-# %% ../nbs/01_feature.ipynb 27
+# %% ../nbs/01_feature.ipynb 26
 def reduce_feature(df: pd.DataFrame, 
                    method: str='pca', # dimensionality reduction method, accept both capital and lower case
                    complexity: int=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
@@ -255,7 +256,7 @@ def reduce_feature(df: pd.DataFrame,
 
     return embedding_df
 
-# %% ../nbs/01_feature.ipynb 30
+# %% ../nbs/01_feature.ipynb 29
 def remove_hi_corr(df: pd.DataFrame, 
                    thr: float=0.98 # threshold
                    ):
@@ -275,7 +276,7 @@ def remove_hi_corr(df: pd.DataFrame,
 
     return df
 
-# %% ../nbs/01_feature.ipynb 34
+# %% ../nbs/01_feature.ipynb 33
 def preprocess(df: pd.DataFrame,
                thr: float=0.98):