diff --git a/src/chemenv/modal_app.py b/src/chemenv/modal_app.py index ad2bce9..81daa30 100644 --- a/src/chemenv/modal_app.py +++ b/src/chemenv/modal_app.py @@ -20,6 +20,7 @@ _get_molecular_properties, _has_substructure, _get_substructure_count, + _pka_from_smiles, ) from chemenv.tools.pubchem import ( PubChem, @@ -120,6 +121,11 @@ def get_substructure_count(*args, **kwargs): return _get_substructure_count(*args, **kwargs) +@app.function(image=rdkit_image) +def pka_from_smiles(*args, **kwargs): + return _pka_from_smiles(*args, **kwargs) + + @app.function(image=mendeleev_image) def get_element_info(*args, **kwargs): return _get_element_info(*args, **kwargs) diff --git a/src/chemenv/tools/cheminformatics.py b/src/chemenv/tools/cheminformatics.py index c0001fa..12cbc1a 100644 --- a/src/chemenv/tools/cheminformatics.py +++ b/src/chemenv/tools/cheminformatics.py @@ -8,6 +8,7 @@ with rdkit_image.imports(): from rdkit import Chem, DataStructs from rdkit.Chem import AllChem + from rdkit.Chem import Crippen import numpy as np with mendeleev_image.imports(): @@ -565,3 +566,22 @@ def _get_substructure_count(smiles: str, substructure_smarts: str) -> int: return len(mol.GetSubstructMatches(pattern)) except Exception as e: raise ValueError(f"Error in substructure matching: {e}") + + +def _pka_from_smiles(smiles: str) -> float: + """ + Calculate the pKa of a molecule given its SMILES string. + + Args: + smiles (str): The SMILES string of the molecule. + + Returns: + float: The pKa of the molecule. + + Raises: + ValueError: If the SMILES string is invalid. + """ + mol = Chem.MolFromSmiles(smiles) + if mol is None: + raise ValueError("Invalid SMILES string") + return Crippen.MolLogP(mol) diff --git a/src/chemenv/tools/pubchem.py b/src/chemenv/tools/pubchem.py index e40958d..79f7f99 100644 --- a/src/chemenv/tools/pubchem.py +++ b/src/chemenv/tools/pubchem.py @@ -1,31 +1,6 @@ -import os from modal import Image from typing import Optional, List, Dict, Any -converters_image = ( - Image.debian_slim(python_version="3.12") - .pip_install( - [ - "rdkit", - "selfies", - "deepsmiles", - "aiohttp", - "backoff", - "loguru", - ] - ) - .env({"PRIVATE_API_URL": os.environ.get("PRIVATE_API_URL", "")}) -) - -with converters_image.imports(): - import backoff - from rdkit import Chem - from urllib.parse import quote - from typing import Optional, List - from loguru import logger - import aiohttp - import asyncio - pubchem_image = Image.debian_slim(python_version="3.12").pip_install( "backoff", @@ -414,7 +389,7 @@ async def _get_number_chiral_atoms(self) -> Optional[int]: logger.info(f"Number of chiral atoms for CID {self.cid}: {data}") return data - async def _format_long_url(self, heading): + async def _format_long_url(self, heading: str) -> Dict[str, Any]: """ Format the long URL to get specific information from PubChem and return the data. @@ -507,7 +482,7 @@ async def _format_nmr_spectra(self, data): dict: Formatted NMR spectra data Raises: - ValueError: If the data could not be retrieved + ValueError: If the data could nt be retrieved """ try: information = data["Information"] @@ -546,7 +521,7 @@ async def _format_nmr_spectra(self, data): return results - async def _get_c_nmr_spectra(self): + async def _get_c_nmr_spectra(self) -> Dict[str, Any]: """ Get the C-NMR spectra for a compound from PubChem. @@ -562,7 +537,7 @@ async def _get_c_nmr_spectra(self): except Exception: raise ValueError("No C-NMR spectra found. {e}") - async def _get_h_nmr_spectra(self): + async def _get_h_nmr_spectra(self) -> Dict[str, Any]: """ Get the 1H NMR spectra for a compound from PubChem. @@ -578,7 +553,7 @@ async def _get_h_nmr_spectra(self): except Exception: raise ValueError("No 1H NMR spectra found. {e}") - async def _get_uv_spectra(self): + async def _get_uv_spectra(self) -> str: """ Get the UV spectra for a compound from PubChem. @@ -618,7 +593,7 @@ async def _get_uv_spectra(self): except Exception: raise ValueError("No UV spectra found") - async def _get_ms_spectra(self): + async def _get_ms_spectra(self) -> Dict[str, Any]: """ Get the MS spectra for a compound from PubChem. @@ -645,7 +620,7 @@ async def _get_ms_spectra(self): except Exception: raise ValueError("No MS spectra found") - async def _get_ghs_classification(self): + async def _get_ghs_classification(self) -> Dict[str, List[str]]: """ Get the GHS classification for a compound from PubChem. @@ -676,3 +651,50 @@ async def _get_ghs_classification(self): except Exception: logger.error("Failed to get GHS classification") raise ValueError("Failed to get GHS classification") + + async def _get_patent_count(self) -> int: + """ + Get the number of patents for a compound from PubChem. + + Returns: + int: Number of patents for the compound. + + Raises: + ValueError: If the data could not be retrieved + """ + url = [ + "/compound/cid/", + "/property/PatentCount/JSON", + ] + logger.info(f"Getting number of patents for CID {self.cid}") + try: + data = (await self.get_compound_data(url))["PropertyTable"]["Properties"][ + 0 + ]["PatentCount"] + except Exception as e: + logger.error(f"No patents found. {e}") + raise ValueError(f"No patents found. {e}") + logger.info(f"Number of patents for CID {self.cid}: {data}") + return data + + async def return_physical_property(self): + """ + Get the physical properties for a compound from PubChem. + + Returns: + dict: Physical properties data + + Raises: + ValueError: If the data could not be retrieved + """ + data = await self._format_long_url("Experimental%20Properties") + results = {} + for section in data["Record"]["Section"]: + heading = section["TOCHeading"] + + results[heading] = [] + for info in data["Information"]: + for string_markup in info["Value"]["StringWithMarkup"]: + results[heading].append(string_markup["String"]) + + return results diff --git a/src/chemenv/tools/util_tool.py b/src/chemenv/tools/util_tool.py new file mode 100644 index 0000000..631a48a --- /dev/null +++ b/src/chemenv/tools/util_tool.py @@ -0,0 +1,54 @@ +from modal import Image + +is_patented_image = Image.debian_slim(python_version=3.12).pip_install( + ["molbloom", "loguru"] +) + +with is_patented_image.imports(): + import molbloom + from loguru import logger + + +def _is_patented(smiles: str) -> bool: + """ + Check if a molecule is patented using Molbloom + + Args: + smiles (str): SMILES string of the molecule + + Returns: + str: "Patented" if the molecule is patented, "Novel" otherwise + + Raises: + ValueError: If an error occurs while checking if the molecule is patented + """ + logger.debug(f"Checking if {smiles} is patented") + try: + r = molbloom.buy(smiles, canonicalize=True, catalog="surechembl") + except Exception as e: + raise ValueError(f"Error while checking if {smiles} is patented: {e}") + if r: + return True + else: + return False + + +def _is_buyable(smiles: str) -> bool: + """ + Check if a molecule is buyable using Molbloom + + Args: + smiles (str): SMILES string of the molecule + + Returns: + str: "Buyable" if the molecule is buyable, "Not buyable" otherwise + """ + logger.debug(f"Checking if {smiles} is buyable") + try: + r = molbloom.buy(smiles, canonicalize=True) + except Exception as e: + raise ValueError(f"Error while checking if {smiles} is buyable: {e}") + if r: + return True + else: + return False