feat: add more tools

ur-whitelab · Jan 7, 2025 · 5700055 · 5700055
1 parent e7d8611
commit 5700055
Show file tree

Hide file tree

Showing 4 changed files with 134 additions and 32 deletions.
diff --git a/src/chemenv/modal_app.py b/src/chemenv/modal_app.py
@@ -20,6 +20,7 @@
     _get_molecular_properties,
     _has_substructure,
     _get_substructure_count,
+    _pka_from_smiles,
 )
 from chemenv.tools.pubchem import (
     PubChem,
@@ -120,6 +121,11 @@ def get_substructure_count(*args, **kwargs):
     return _get_substructure_count(*args, **kwargs)
 
 
+@app.function(image=rdkit_image)
+def pka_from_smiles(*args, **kwargs):
+    return _pka_from_smiles(*args, **kwargs)
+
+
 @app.function(image=mendeleev_image)
 def get_element_info(*args, **kwargs):
     return _get_element_info(*args, **kwargs)

diff --git a/src/chemenv/tools/cheminformatics.py b/src/chemenv/tools/cheminformatics.py
@@ -8,6 +8,7 @@
 with rdkit_image.imports():
     from rdkit import Chem, DataStructs
     from rdkit.Chem import AllChem
+    from rdkit.Chem import Crippen
     import numpy as np
 
 with mendeleev_image.imports():
@@ -565,3 +566,22 @@ def _get_substructure_count(smiles: str, substructure_smarts: str) -> int:
         return len(mol.GetSubstructMatches(pattern))
     except Exception as e:
         raise ValueError(f"Error in substructure matching: {e}")
+
+
+def _pka_from_smiles(smiles: str) -> float:
+    """
+    Calculate the pKa of a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        float: The pKa of the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+    """
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        raise ValueError("Invalid SMILES string")
+    return Crippen.MolLogP(mol)
diff --git a/src/chemenv/tools/pubchem.py b/src/chemenv/tools/pubchem.py
@@ -1,31 +1,6 @@
-import os
 from modal import Image
 from typing import Optional, List, Dict, Any
 
-converters_image = (
-    Image.debian_slim(python_version="3.12")
-    .pip_install(
-        [
-            "rdkit",
-            "selfies",
-            "deepsmiles",
-            "aiohttp",
-            "backoff",
-            "loguru",
-        ]
-    )
-    .env({"PRIVATE_API_URL": os.environ.get("PRIVATE_API_URL", "")})
-)
-
-with converters_image.imports():
-    import backoff
-    from rdkit import Chem
-    from urllib.parse import quote
-    from typing import Optional, List
-    from loguru import logger
-    import aiohttp
-    import asyncio
-
 
 pubchem_image = Image.debian_slim(python_version="3.12").pip_install(
     "backoff",
@@ -414,7 +389,7 @@ async def _get_number_chiral_atoms(self) -> Optional[int]:
         logger.info(f"Number of chiral atoms for CID {self.cid}: {data}")
         return data
 
-    async def _format_long_url(self, heading):
+    async def _format_long_url(self, heading: str) -> Dict[str, Any]:
         """
         Format the long URL to get specific information from PubChem and return the data.
 
@@ -507,7 +482,7 @@ async def _format_nmr_spectra(self, data):
             dict: Formatted NMR spectra data
 
         Raises:
-            ValueError: If the data could not be retrieved
+            ValueError: If the data could nt be retrieved
         """
         try:
             information = data["Information"]
@@ -546,7 +521,7 @@ async def _format_nmr_spectra(self, data):
 
         return results
 
-    async def _get_c_nmr_spectra(self):
+    async def _get_c_nmr_spectra(self) -> Dict[str, Any]:
         """
         Get the C-NMR spectra for a compound from PubChem.
 
@@ -562,7 +537,7 @@ async def _get_c_nmr_spectra(self):
         except Exception:
             raise ValueError("No C-NMR spectra found. {e}")
 
-    async def _get_h_nmr_spectra(self):
+    async def _get_h_nmr_spectra(self) -> Dict[str, Any]:
         """
         Get the 1H NMR spectra for a compound from PubChem.
 
@@ -578,7 +553,7 @@ async def _get_h_nmr_spectra(self):
         except Exception:
             raise ValueError("No 1H NMR spectra found. {e}")
 
-    async def _get_uv_spectra(self):
+    async def _get_uv_spectra(self) -> str:
         """
         Get the UV spectra for a compound from PubChem.
 
@@ -618,7 +593,7 @@ async def _get_uv_spectra(self):
         except Exception:
             raise ValueError("No UV spectra found")
 
-    async def _get_ms_spectra(self):
+    async def _get_ms_spectra(self) -> Dict[str, Any]:
         """
         Get the MS spectra for a compound from PubChem.
 
@@ -645,7 +620,7 @@ async def _get_ms_spectra(self):
         except Exception:
             raise ValueError("No MS spectra found")
 
-    async def _get_ghs_classification(self):
+    async def _get_ghs_classification(self) -> Dict[str, List[str]]:
         """
         Get the GHS classification for a compound from PubChem.
 
@@ -676,3 +651,50 @@ async def _get_ghs_classification(self):
         except Exception:
             logger.error("Failed to get GHS classification")
             raise ValueError("Failed to get GHS classification")
+
+    async def _get_patent_count(self) -> int:
+        """
+        Get the number of patents for a compound from PubChem.
+
+        Returns:
+            int: Number of patents for the compound.
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        url = [
+            "/compound/cid/",
+            "/property/PatentCount/JSON",
+        ]
+        logger.info(f"Getting number of patents for CID {self.cid}")
+        try:
+            data = (await self.get_compound_data(url))["PropertyTable"]["Properties"][
+                0
+            ]["PatentCount"]
+        except Exception as e:
+            logger.error(f"No patents found. {e}")
+            raise ValueError(f"No patents found. {e}")
+        logger.info(f"Number of patents for CID {self.cid}: {data}")
+        return data
+
+    async def return_physical_property(self):
+        """
+        Get the physical properties for a compound from PubChem.
+
+        Returns:
+            dict: Physical properties data
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        data = await self._format_long_url("Experimental%20Properties")
+        results = {}
+        for section in data["Record"]["Section"]:
+            heading = section["TOCHeading"]
+
+            results[heading] = []
+            for info in data["Information"]:
+                for string_markup in info["Value"]["StringWithMarkup"]:
+                    results[heading].append(string_markup["String"])
+
+        return results
diff --git a/src/chemenv/tools/util_tool.py b/src/chemenv/tools/util_tool.py
@@ -0,0 +1,54 @@
+from modal import Image
+
+is_patented_image = Image.debian_slim(python_version=3.12).pip_install(
+    ["molbloom", "loguru"]
+)
+
+with is_patented_image.imports():
+    import molbloom
+    from loguru import logger
+
+
+def _is_patented(smiles: str) -> bool:
+    """
+    Check if a molecule is patented using Molbloom
+
+    Args:
+        smiles (str): SMILES string of the molecule
+
+    Returns:
+        str: "Patented" if the molecule is patented, "Novel" otherwise
+
+    Raises:
+        ValueError: If an error occurs while checking if the molecule is patented
+    """
+    logger.debug(f"Checking if {smiles} is patented")
+    try:
+        r = molbloom.buy(smiles, canonicalize=True, catalog="surechembl")
+    except Exception as e:
+        raise ValueError(f"Error while checking if {smiles} is patented: {e}")
+    if r:
+        return True
+    else:
+        return False
+
+
+def _is_buyable(smiles: str) -> bool:
+    """
+    Check if a molecule is buyable using Molbloom
+
+    Args:
+        smiles (str): SMILES string of the molecule
+
+    Returns:
+        str: "Buyable" if the molecule is buyable, "Not buyable" otherwise
+    """
+    logger.debug(f"Checking if {smiles} is buyable")
+    try:
+        r = molbloom.buy(smiles, canonicalize=True)
+    except Exception as e:
+        raise ValueError(f"Error while checking if {smiles} is buyable: {e}")
+    if r:
+        return True
+    else:
+        return False