Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix: Add fingerprint, radius commands #21

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions chemplot/chemplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class Plotter(object):

_target_types = {'R', 'C'}

def __init__(self, encoding_list, target, target_type, sim_type, get_desc, get_fingerprints):
def __init__(self, encoding_list, target, target_type, sim_type, get_desc, get_fingerprints, radius, nBits):

# Error handeling sym_type
if sim_type not in self._sim_types:
Expand Down Expand Up @@ -136,7 +136,7 @@ def __init__(self, encoding_list, target, target_type, sim_type, get_desc, get_f
raise Exception("Descriptors could not be computed for given molecules")
self.__df_descriptors, self.__target = desc.select_descriptors_lasso(df_descriptors,target,kind=self.__target_type)
elif self.__sim_type == "structural":
self.__mols, self.__df_descriptors, self.__target = get_fingerprints(encoding_list,target,2,2048)
self.__mols, self.__df_descriptors, self.__target = get_fingerprints(encoding_list,target, radius, nBits)

if len(self.__mols) < 2 or len(self.__df_descriptors.columns) < 2:
raise Exception("Plotter object cannot be instantiated for given molecules")
Expand All @@ -146,14 +146,16 @@ def __init__(self, encoding_list, target, target_type, sim_type, get_desc, get_f


@classmethod
def from_smiles(cls, smiles_list, target=[], target_type=None, sim_type=None):
def from_smiles(cls, smiles_list, target=[], target_type=None, sim_type=None, radius=2, nBits=2048):
"""
Class method to construct a Plotter object from a list of SMILES.

:param smile_list: List of the SMILES representation of the molecules to plot.
:param target: target values
:param target_type: target type R (regression) or C (classificatino)
:param sim_type: similarity type structural or tailored
:param sim_type: similarity type structural or tailored
:param radius: The ECPF fingerprints radius.
:param nBits: The number of bits of the fingerprint vector.
:type smile_list: list
:type target: list
:type target_type: string
Expand All @@ -162,11 +164,11 @@ def from_smiles(cls, smiles_list, target=[], target_type=None, sim_type=None):
:rtype: Plotter
"""

return cls(smiles_list, target, target_type, sim_type, desc.get_mordred_descriptors, desc.get_ecfp)
return cls(smiles_list, target, target_type, sim_type, desc.get_mordred_descriptors, desc.get_ecfp, radius, nBits)


@classmethod
def from_inchi(cls, inchi_list, target=[], target_type=None, sim_type=None):
def from_inchi(cls, inchi_list, target=[], target_type=None, sim_type=None, radius=2, nBits=2048):
"""
Class method to construct a Plotter object from a list of InChi.

Expand All @@ -178,11 +180,15 @@ def from_inchi(cls, inchi_list, target=[], target_type=None, sim_type=None):
:type target_type: string
:param sim_type: similarity type structural or tailored
:type sim_type: string
:param radius: The ECPF fingerprints radius.
:type radius: int
:param nBits: The number of bits of the fingerprint vector.
:type nBits: int
:returns: A Plotter object for the molecules given as input.
:rtype: Plotter
"""

return cls(inchi_list, target, target_type, sim_type, desc.get_mordred_descriptors_from_inchi, desc.get_ecfp_from_inchi)
return cls(inchi_list, target, target_type, sim_type, desc.get_mordred_descriptors_from_inchi, desc.get_ecfp_from_inchi, radius, nBits)


def pca(self, **kwargs):
Expand Down Expand Up @@ -257,7 +263,7 @@ def tsne(self, perplexity=None, pca=False, random_state=None, **kwargs):

# Embed the data in two dimensions
self.tsne_fit = TSNE(n_components=2, perplexity=perplexity, random_state=random_state, **kwargs)
ecfp_tsne_embedding = self.tsne_fit.fit_transform(self.__data)
ecfp_tsne_embedding = self.tsne_fit.fit_transform(np.array(self.__data))
# Create a dataframe containinting the first 2 TSNE components of ECFP
self.__df_2_components = pd.DataFrame(data = ecfp_tsne_embedding
, columns = ['t-SNE-1', 't-SNE-2'])
Expand Down Expand Up @@ -722,4 +728,4 @@ def __open_plot(self, p):
show(p)

def get_target(self):
return self.__target
return self.__target
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="chemplot",
version="1.2.1",
version="1.2.2",
author="Murat Cihan Sorkun, Dajt Mullaj",
author_email="[email protected], [email protected]",
description="A python library for chemical space visualization.",
Expand Down Expand Up @@ -35,10 +35,10 @@
install_requires=[
"pandas>=1.1.3",
"numpy>=1.19.2",
"matplotlib==3.3.2",
"seaborn==0.11.1",
"matplotlib>=3.3.2",
"seaborn>=0.11.1",
"umap-learn>=0.5.1",
"scikit-learn==0.24.2",
"scikit-learn>=0.24.2",
"bokeh>=2.2.3",
"scipy>=1.5.2",
"mordred>=1.2.0",
Expand Down