Skip to content

Commit

Permalink
feat: add more tools
Browse files Browse the repository at this point in the history
  • Loading branch information
MrtinoRG committed Jan 7, 2025
1 parent e7d8611 commit 5700055
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 32 deletions.
6 changes: 6 additions & 0 deletions src/chemenv/modal_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
_get_molecular_properties,
_has_substructure,
_get_substructure_count,
_pka_from_smiles,
)
from chemenv.tools.pubchem import (
PubChem,
Expand Down Expand Up @@ -120,6 +121,11 @@ def get_substructure_count(*args, **kwargs):
return _get_substructure_count(*args, **kwargs)


@app.function(image=rdkit_image)
def pka_from_smiles(*args, **kwargs):
return _pka_from_smiles(*args, **kwargs)


@app.function(image=mendeleev_image)
def get_element_info(*args, **kwargs):
return _get_element_info(*args, **kwargs)
Expand Down
20 changes: 20 additions & 0 deletions src/chemenv/tools/cheminformatics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
with rdkit_image.imports():
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from rdkit.Chem import Crippen
import numpy as np

with mendeleev_image.imports():
Expand Down Expand Up @@ -565,3 +566,22 @@ def _get_substructure_count(smiles: str, substructure_smarts: str) -> int:
return len(mol.GetSubstructMatches(pattern))
except Exception as e:
raise ValueError(f"Error in substructure matching: {e}")


def _pka_from_smiles(smiles: str) -> float:
"""
Calculate the pKa of a molecule given its SMILES string.
Args:
smiles (str): The SMILES string of the molecule.
Returns:
float: The pKa of the molecule.
Raises:
ValueError: If the SMILES string is invalid.
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError("Invalid SMILES string")
return Crippen.MolLogP(mol)
86 changes: 54 additions & 32 deletions src/chemenv/tools/pubchem.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,6 @@
import os
from modal import Image
from typing import Optional, List, Dict, Any

converters_image = (
Image.debian_slim(python_version="3.12")
.pip_install(
[
"rdkit",
"selfies",
"deepsmiles",
"aiohttp",
"backoff",
"loguru",
]
)
.env({"PRIVATE_API_URL": os.environ.get("PRIVATE_API_URL", "")})
)

with converters_image.imports():
import backoff
from rdkit import Chem
from urllib.parse import quote
from typing import Optional, List
from loguru import logger
import aiohttp
import asyncio


pubchem_image = Image.debian_slim(python_version="3.12").pip_install(
"backoff",
Expand Down Expand Up @@ -414,7 +389,7 @@ async def _get_number_chiral_atoms(self) -> Optional[int]:
logger.info(f"Number of chiral atoms for CID {self.cid}: {data}")
return data

async def _format_long_url(self, heading):
async def _format_long_url(self, heading: str) -> Dict[str, Any]:
"""
Format the long URL to get specific information from PubChem and return the data.
Expand Down Expand Up @@ -507,7 +482,7 @@ async def _format_nmr_spectra(self, data):
dict: Formatted NMR spectra data
Raises:
ValueError: If the data could not be retrieved
ValueError: If the data could nt be retrieved
"""
try:
information = data["Information"]
Expand Down Expand Up @@ -546,7 +521,7 @@ async def _format_nmr_spectra(self, data):

return results

async def _get_c_nmr_spectra(self):
async def _get_c_nmr_spectra(self) -> Dict[str, Any]:
"""
Get the C-NMR spectra for a compound from PubChem.
Expand All @@ -562,7 +537,7 @@ async def _get_c_nmr_spectra(self):
except Exception:
raise ValueError("No C-NMR spectra found. {e}")

async def _get_h_nmr_spectra(self):
async def _get_h_nmr_spectra(self) -> Dict[str, Any]:
"""
Get the 1H NMR spectra for a compound from PubChem.
Expand All @@ -578,7 +553,7 @@ async def _get_h_nmr_spectra(self):
except Exception:
raise ValueError("No 1H NMR spectra found. {e}")

async def _get_uv_spectra(self):
async def _get_uv_spectra(self) -> str:
"""
Get the UV spectra for a compound from PubChem.
Expand Down Expand Up @@ -618,7 +593,7 @@ async def _get_uv_spectra(self):
except Exception:
raise ValueError("No UV spectra found")

async def _get_ms_spectra(self):
async def _get_ms_spectra(self) -> Dict[str, Any]:
"""
Get the MS spectra for a compound from PubChem.
Expand All @@ -645,7 +620,7 @@ async def _get_ms_spectra(self):
except Exception:
raise ValueError("No MS spectra found")

async def _get_ghs_classification(self):
async def _get_ghs_classification(self) -> Dict[str, List[str]]:
"""
Get the GHS classification for a compound from PubChem.
Expand Down Expand Up @@ -676,3 +651,50 @@ async def _get_ghs_classification(self):
except Exception:
logger.error("Failed to get GHS classification")
raise ValueError("Failed to get GHS classification")

async def _get_patent_count(self) -> int:
"""
Get the number of patents for a compound from PubChem.
Returns:
int: Number of patents for the compound.
Raises:
ValueError: If the data could not be retrieved
"""
url = [
"/compound/cid/",
"/property/PatentCount/JSON",
]
logger.info(f"Getting number of patents for CID {self.cid}")
try:
data = (await self.get_compound_data(url))["PropertyTable"]["Properties"][
0
]["PatentCount"]
except Exception as e:
logger.error(f"No patents found. {e}")
raise ValueError(f"No patents found. {e}")
logger.info(f"Number of patents for CID {self.cid}: {data}")
return data

async def return_physical_property(self):
"""
Get the physical properties for a compound from PubChem.
Returns:
dict: Physical properties data
Raises:
ValueError: If the data could not be retrieved
"""
data = await self._format_long_url("Experimental%20Properties")
results = {}
for section in data["Record"]["Section"]:
heading = section["TOCHeading"]

results[heading] = []
for info in data["Information"]:
for string_markup in info["Value"]["StringWithMarkup"]:
results[heading].append(string_markup["String"])

return results
54 changes: 54 additions & 0 deletions src/chemenv/tools/util_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from modal import Image

is_patented_image = Image.debian_slim(python_version=3.12).pip_install(
["molbloom", "loguru"]
)

with is_patented_image.imports():
import molbloom
from loguru import logger


def _is_patented(smiles: str) -> bool:
"""
Check if a molecule is patented using Molbloom
Args:
smiles (str): SMILES string of the molecule
Returns:
str: "Patented" if the molecule is patented, "Novel" otherwise
Raises:
ValueError: If an error occurs while checking if the molecule is patented
"""
logger.debug(f"Checking if {smiles} is patented")
try:
r = molbloom.buy(smiles, canonicalize=True, catalog="surechembl")
except Exception as e:
raise ValueError(f"Error while checking if {smiles} is patented: {e}")
if r:
return True
else:
return False


def _is_buyable(smiles: str) -> bool:
"""
Check if a molecule is buyable using Molbloom
Args:
smiles (str): SMILES string of the molecule
Returns:
str: "Buyable" if the molecule is buyable, "Not buyable" otherwise
"""
logger.debug(f"Checking if {smiles} is buyable")
try:
r = molbloom.buy(smiles, canonicalize=True)
except Exception as e:
raise ValueError(f"Error while checking if {smiles} is buyable: {e}")
if r:
return True
else:
return False

0 comments on commit 5700055

Please sign in to comment.