Skip to content

Commit

Permalink
update function name
Browse files Browse the repository at this point in the history
  • Loading branch information
sky1ove committed May 15, 2024
1 parent a580f7e commit 4bda744
Show file tree
Hide file tree
Showing 8 changed files with 3,797 additions and 6,623 deletions.
485 changes: 174 additions & 311 deletions katlas/core.py

Large diffs are not rendered by default.

33 changes: 18 additions & 15 deletions katlas/dl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_DL.ipynb.

# %% auto 0
__all__ = ['seed_everything', 'GeneralDataset', 'get_sampler', 'MLP_1', 'CNN1D_1', 'init_weights', 'lin_wn', 'conv_wn', 'CNN1D_2',
'train_dl', 'train_dl_cv', 'predict_dl']
__all__ = ['def_device', 'seed_everything', 'GeneralDataset', 'get_sampler', 'MLP_1', 'CNN1D_1', 'init_weights', 'lin_wn',
'conv_wn', 'CNN1D_2', 'train_dl', 'train_dl_cv', 'predict_dl']

# %% ../nbs/04_DL.ipynb 4
from fastbook import *
Expand Down Expand Up @@ -30,7 +30,10 @@ def seed_everything(seed=123):
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# %% ../nbs/04_DL.ipynb 11
# %% ../nbs/04_DL.ipynb 8
def_device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'

# %% ../nbs/04_DL.ipynb 13
class GeneralDataset:
def __init__(self,
df, # a dataframe of values
Expand All @@ -57,7 +60,7 @@ def __getitem__(self, index):
y = torch.Tensor(self.y[index])
return X, y

# %% ../nbs/04_DL.ipynb 15
# %% ../nbs/04_DL.ipynb 17
def get_sampler(info,col):

"For imbalanced data, get higher weights for less-represented samples"
Expand All @@ -77,7 +80,7 @@ def get_sampler(info,col):

return sampler

# %% ../nbs/04_DL.ipynb 21
# %% ../nbs/04_DL.ipynb 23
def MLP_1(num_features,
num_targets,
hidden_units = [512, 218],
Expand Down Expand Up @@ -107,7 +110,7 @@ def MLP_1(num_features,

return model

# %% ../nbs/04_DL.ipynb 27
# %% ../nbs/04_DL.ipynb 29
class CNN1D_1(Module):

def __init__(self,
Expand All @@ -132,12 +135,12 @@ def forward(self, x):
x = self.fc2(x)
return x

# %% ../nbs/04_DL.ipynb 31
# %% ../nbs/04_DL.ipynb 33
def init_weights(m, leaky=0.):
"Initiate any Conv layer with Kaiming norm."
if isinstance(m, (nn.Conv1d,nn.Conv2d,nn.Conv3d)): init.kaiming_normal_(m.weight, a=leaky)

# %% ../nbs/04_DL.ipynb 32
# %% ../nbs/04_DL.ipynb 34
def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
"Weight norm of linear."
layers = nn.Sequential(
Expand All @@ -147,7 +150,7 @@ def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
if act: layers.append(act())
return layers

# %% ../nbs/04_DL.ipynb 33
# %% ../nbs/04_DL.ipynb 35
def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
"Weight norm of conv."
layers = nn.Sequential(
Expand All @@ -157,7 +160,7 @@ def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
if act: layers.append(act())
return layers

# %% ../nbs/04_DL.ipynb 34
# %% ../nbs/04_DL.ipynb 36
class CNN1D_2(nn.Module):

def __init__(self, ni, nf, amp_scale = 16):
Expand Down Expand Up @@ -207,7 +210,7 @@ def forward(self, x):

return x

# %% ../nbs/04_DL.ipynb 39
# %% ../nbs/04_DL.ipynb 40
def train_dl(df,
feat_col,
target_col,
Expand Down Expand Up @@ -244,7 +247,7 @@ def train_dl(df,

model = model_func()

learn = Learner(dls.cuda(), model.cuda(), loss,
learn = Learner(dls.to(def_device), model.to(def_device), loss,
metrics= [PearsonCorrCoef(),SpearmanCorrCoef()],
cbs = [GradientClip(1.0)] # prevent overfitting
)
Expand All @@ -270,7 +273,7 @@ def train_dl(df,

return target, pred

# %% ../nbs/04_DL.ipynb 43
# %% ../nbs/04_DL.ipynb 45
@fc.delegates(train_dl)
def train_dl_cv(df,
feat_col,
Expand Down Expand Up @@ -320,7 +323,7 @@ def train_dl_cv(df,

return oof, metrics

# %% ../nbs/04_DL.ipynb 51
# %% ../nbs/04_DL.ipynb 53
def predict_dl(df,
feat_col,
target_col,
Expand All @@ -334,7 +337,7 @@ def predict_dl(df,
test_dl = DataLoader(test_dset,bs=512)


learn = Learner(None, model.cuda(), loss_func=1)
learn = Learner(None, model.to(def_device), loss_func=1)
learn.load(model_pth)

learn.model.eval()
Expand Down
9 changes: 5 additions & 4 deletions katlas/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ def get_rdkit(df: pd.DataFrame, # a dataframe that contains smiles
# %% ../nbs/01_feature.ipynb 11
def get_morgan(df: pd.DataFrame, # a dataframe that contains smiles
col: str = "SMILES", # colname of smile
radius=3
):
"Get 2048 morgan fingerprint (binary feature) from smiles in a dataframe"
mols = [Chem.MolFromSmiles(smi) for smi in df[col]]
morgan_fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) for mol in mols]
morgan_fps = [AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=2048) for mol in mols]
fp_df = pd.DataFrame(np.array(morgan_fps), index=df.index)
fp_df.columns = "morgan_" + fp_df.columns.astype(str)
return fp_df
Expand Down Expand Up @@ -209,7 +210,7 @@ def T5_embeddings_bfd(sequence, device = 'cuda'):

return T5_feature

# %% ../nbs/01_feature.ipynb 27
# %% ../nbs/01_feature.ipynb 26
def reduce_feature(df: pd.DataFrame,
method: str='pca', # dimensionality reduction method, accept both capital and lower case
complexity: int=20, # None for PCA; perfplexity for TSNE, recommend: 30; n_neigbors for UMAP, recommend: 15
Expand Down Expand Up @@ -255,7 +256,7 @@ def reduce_feature(df: pd.DataFrame,

return embedding_df

# %% ../nbs/01_feature.ipynb 30
# %% ../nbs/01_feature.ipynb 29
def remove_hi_corr(df: pd.DataFrame,
thr: float=0.98 # threshold
):
Expand All @@ -275,7 +276,7 @@ def remove_hi_corr(df: pd.DataFrame,

return df

# %% ../nbs/01_feature.ipynb 34
# %% ../nbs/01_feature.ipynb 33
def preprocess(df: pd.DataFrame,
thr: float=0.98):

Expand Down
Loading

0 comments on commit 4bda744

Please sign in to comment.