From e7d8611b84308c7ce0242553e78c13ba5a3177e7 Mon Sep 17 00:00:00 2001
From: MrtinoRG <martinriosgarcia@gmail.com>
Date: Tue, 7 Jan 2025 16:23:01 +0100
Subject: [PATCH] feat: add rdkit functions + add pubchem spectra and GHS
 functions

---
 src/chemenv/modal_app.py             | 174 ++++++++--
 src/chemenv/tools/cheminformatics.py | 453 ++++++++++++++++++++++++++-
 src/chemenv/tools/pubchem.py         | 335 +++++++++++++++-----
 3 files changed, 843 insertions(+), 119 deletions(-)

diff --git a/src/chemenv/modal_app.py b/src/chemenv/modal_app.py
index dff20ab..ad2bce9 100644
--- a/src/chemenv/modal_app.py
+++ b/src/chemenv/modal_app.py
@@ -1,25 +1,32 @@
-from modal import App, Image
+from modal import App
 from chemenv.tools.cheminformatics import (
+    rdkit_image,
+    mendeleev_image,
     get_tanimoto_similarity as _get_tanimoto_similarity,
     get_number_of_topologically_distinct_atoms as _get_topologically_distinct_atoms,
     get_element_info as _get_element_info,
+    _get_number_atoms,
+    _get_number_heavy_atoms,
+    _get_canonical_smiles,
+    _get_compound_charge,
+    _get_number_rings,
+    _get_number_aromatic_rings,
+    _get_aromatic_rings,
+    _get_ring_sizes,
+    _get_chiral_centers,
+    _get_number_chiral_centers,
+    _get_number_cis_bonds,
+    _get_number_trans_bonds,
+    _get_molecular_properties,
+    _has_substructure,
+    _get_substructure_count,
 )
 from chemenv.tools.pubchem import (
     PubChem,
     pubchem_image as _pubchem_image,
 )
-from chemenv.tools.pubchem import (
-    Smiles2Name as _Smiles2Name,
-    converters_image as _converters_image,
-)
 import os
 
-# Define the images
-rdkit_image = (
-    Image.debian_slim(python_version="3.12").pip_install("rdkit").pip_install("numpy")
-)
-mendeleev_image = Image.debian_slim().pip_install("mendeleev")
-
 chemenv_name = os.getenv("CHEMENV_NAME", "")
 if chemenv_name and not chemenv_name.startswith("-"):
     chemenv_name = f"-{chemenv_name}"
@@ -38,13 +45,88 @@ def get_number_of_topologically_distinct_atoms(*args, **kwargs):
     return _get_topologically_distinct_atoms(*args, **kwargs)
 
 
+@app.function(image=rdkit_image)
+def get_number_atoms(*args, **kwargs):
+    return _get_number_atoms(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_heavy_atoms(*args, **kwargs):
+    return _get_number_heavy_atoms(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_canonical_smiles(*args, **kwargs):
+    return _get_canonical_smiles(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_compound_charge(*args, **kwargs):
+    return _get_compound_charge(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_rings(*args, **kwargs):
+    return _get_number_rings(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_ring_sizes(*args, **kwargs):
+    return _get_ring_sizes(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_aromatic_rings(*args, **kwargs):
+    return _get_number_aromatic_rings(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_aromatic_rings(*args, **kwargs):
+    return _get_aromatic_rings(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_chiral_centers(*args, **kwargs):
+    return _get_chiral_centers(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_chiral_centers(*args, **kwargs):
+    return _get_number_chiral_centers(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_cis_bonds(*args, **kwargs):
+    return _get_number_cis_bonds(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_number_trans_bonds(*args, **kwargs):
+    return _get_number_trans_bonds(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_molecular_properties(*args, **kwargs):
+    return _get_molecular_properties(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def has_substructure(*args, **kwargs):
+    return _has_substructure(*args, **kwargs)
+
+
+@app.function(image=rdkit_image)
+def get_substructure_count(*args, **kwargs):
+    return _get_substructure_count(*args, **kwargs)
+
+
 @app.function(image=mendeleev_image)
 def get_element_info(*args, **kwargs):
     return _get_element_info(*args, **kwargs)
 
 
 @app.function(image=_pubchem_image)
-async def get_number_atoms(compound_id: str) -> int:
+async def get_number_atoms_pubchem(compound_id: str) -> int:
     pubchem = await PubChem.create(compound_id)
     number_atoms = await pubchem._get_number_atoms()
     if number_atoms is None:
@@ -53,7 +135,7 @@ async def get_number_atoms(compound_id: str) -> int:
 
 
 @app.function(image=_pubchem_image)
-async def get_isomeric_smiles(compound_id: str) -> str:
+async def get_isomeric_smiles_pubchem(compound_id: str) -> str:
     pubchem = await PubChem.create(compound_id)
     isomeric_smiles = await pubchem._get_isomeric_smiles()
     if isomeric_smiles is None:
@@ -62,7 +144,7 @@ async def get_isomeric_smiles(compound_id: str) -> str:
 
 
 @app.function(image=_pubchem_image)
-async def get_canonical_smiles(compound_id: str) -> str:
+async def get_canonical_smiles_pubchem(compound_id: str) -> str:
     pubchem = await PubChem.create(compound_id)
     smiles = await pubchem._get_canonical_smiles()
     if smiles is None:
@@ -71,7 +153,7 @@ async def get_canonical_smiles(compound_id: str) -> str:
 
 
 @app.function(image=_pubchem_image)
-async def get_compound_mass(compound_id: str) -> float:
+async def get_compound_mass_pubchem(compound_id: str) -> float:
     pubchem = await PubChem.create(compound_id)
     molecular_weight = await pubchem._get_compound_mass()
     if molecular_weight is None:
@@ -80,7 +162,7 @@ async def get_compound_mass(compound_id: str) -> float:
 
 
 @app.function(image=_pubchem_image)
-async def get_compound_charge(compound_id: str) -> int:
+async def get_compound_charge_pubchem(compound_id: str) -> int:
     pubchem = await PubChem.create(compound_id)
     charge = await pubchem._get_compound_charge()
     if charge is None:
@@ -89,7 +171,7 @@ async def get_compound_charge(compound_id: str) -> int:
 
 
 @app.function(image=_pubchem_image)
-async def get_compound_formula(compound_id: str) -> str:
+async def get_compound_formula_pubchem(compound_id: str) -> str:
     pubchem = await PubChem.create(compound_id)
     formula = await pubchem._get_compound_formula()
     if formula is None:
@@ -98,7 +180,7 @@ async def get_compound_formula(compound_id: str) -> str:
 
 
 @app.function(image=_pubchem_image)
-async def get_number_isomers(compound_id: str) -> int:
+async def get_number_isomers_pubchem(compound_id: str) -> int:
     pubchem = await PubChem.create(compound_id)
     number_isomers = await pubchem._get_number_isomers()
     if number_isomers is None:
@@ -107,7 +189,7 @@ async def get_number_isomers(compound_id: str) -> int:
 
 
 @app.function(image=_pubchem_image, timeout=86399)
-async def get_compound_isomers(*args, **kwargs):
+async def get_compound_isomers_pubchem(*args, **kwargs):
     pubchem = await PubChem.create(*args, **kwargs)
     data = await pubchem._get_compound_isomers()
     if data is None:
@@ -116,7 +198,7 @@ async def get_compound_isomers(*args, **kwargs):
 
 
 @app.function(image=_pubchem_image)
-async def get_number_heavy_atoms(compound_id: str) -> int:
+async def get_number_heavy_atoms_pubchem(compound_id: str) -> int:
     pubchem = await PubChem.create(compound_id)
     number_heavy_atoms = await pubchem._get_number_heavy_atoms()
     if number_heavy_atoms is None:
@@ -125,7 +207,7 @@ async def get_number_heavy_atoms(compound_id: str) -> int:
 
 
 @app.function(image=_pubchem_image)
-async def _get_number_chiral_atoms(compound_id: str) -> int:
+async def get_number_chiral_atoms_pubchem(compound_id: str) -> int:
     pubchem = await PubChem.create(compound_id)
     number_chiral_atoms = await pubchem._get_number_chiral_atoms()
     if number_chiral_atoms is None:
@@ -133,10 +215,46 @@ async def _get_number_chiral_atoms(compound_id: str) -> int:
     return number_chiral_atoms
 
 
-@app.function(image=_converters_image)
-async def get_iupac_name(smiles: str) -> str:
-    converter = _Smiles2Name(smiles)
-    name = await converter.get_name()
-    if name is None:
-        return ""
-    return name
+@app.function(image=_pubchem_image)
+async def get_c_nmr_spectra_pubchem(compound_id: str) -> dict:
+    pubchem = await PubChem.create(compound_id)
+    c_nmr_spectra = await pubchem._get_c_nmr_spectra()
+    if c_nmr_spectra is None:
+        raise ValueError("No C-NMR spectra found")
+    return c_nmr_spectra
+
+
+@app.function(image=_pubchem_image)
+async def get_h_nmr_spectra_pubchem(compound_id: str) -> dict:
+    pubchem = await PubChem.create(compound_id)
+    h_nmr_spectra = await pubchem._get_h_nmr_spectra()
+    if h_nmr_spectra is None:
+        raise ValueError("No H-NMR spectra found")
+    return h_nmr_spectra
+
+
+@app.function(image=_pubchem_image)
+async def get_uv_spectra_pubchem(compound_id: str) -> dict:
+    pubchem = await PubChem.create(compound_id)
+    uv_spectra = await pubchem._get_uv_spectra()
+    if uv_spectra is None:
+        raise ValueError("No UV spectra found")
+    return uv_spectra
+
+
+@app.function(image=_pubchem_image)
+async def get_ms_spectra_pubchem(compound_id: str) -> dict:
+    pubchem = await PubChem.create(compound_id)
+    ms_spectra = await pubchem._get_ms_spectra()
+    if ms_spectra is None:
+        raise ValueError("No MS spectra found")
+    return ms_spectra
+
+
+@app.function(image=_pubchem_image)
+async def get_ghs_classification_pubchem(compound_id: str) -> str:
+    pubchem = await PubChem.create(compound_id)
+    ghs_classification = await pubchem._get_ghs_classification()
+    if ghs_classification is None:
+        raise ValueError("No GHS classification found")
+    return ghs_classification
diff --git a/src/chemenv/tools/cheminformatics.py b/src/chemenv/tools/cheminformatics.py
index afffe19..c0001fa 100644
--- a/src/chemenv/tools/cheminformatics.py
+++ b/src/chemenv/tools/cheminformatics.py
@@ -1,3 +1,19 @@
+from modal import Image
+
+rdkit_image = (
+    Image.debian_slim(python_version="3.12").pip_install("rdkit").pip_install("numpy")
+)
+mendeleev_image = Image.debian_slim().pip_install("mendeleev")
+
+with rdkit_image.imports():
+    from rdkit import Chem, DataStructs
+    from rdkit.Chem import AllChem
+    import numpy as np
+
+with mendeleev_image.imports():
+    from mendeleev import element
+
+
 def get_tanimoto_similarity(s1: str, s2: str) -> float:
     """
     Calculate the Tanimoto similarity of two SMILES strings.
@@ -20,9 +36,6 @@ def get_tanimoto_similarity(s1: str, s2: str) -> float:
         >>> get_tanimoto_similarity("CCO", "CC")
         0.143
     """
-    from rdkit import Chem, DataStructs
-    from rdkit.Chem import AllChem
-
     try:
         mol1 = Chem.MolFromSmiles(s1)
         mol2 = Chem.MolFromSmiles(s2)
@@ -53,10 +66,6 @@ def get_number_of_topologically_distinct_atoms(smiles: str, atomic_number: int =
         >>> get_number_of_topologically_distinct_atoms("CCO", 6)
         2
     """
-
-    from rdkit import Chem
-    import numpy as np
-
     try:
         molecule = Chem.MolFromSmiles(smiles)
 
@@ -104,8 +113,6 @@ def get_element_info(identifier: str) -> dict:
         >>> get_element_info("H")["name"]
         'Hydrogen'
     """
-    from mendeleev import element
-
     try:
         # Try to get the element
         if isinstance(identifier, int) or identifier.isdigit():
@@ -130,3 +137,431 @@ def get_element_info(identifier: str) -> dict:
 
     except ValueError:
         raise ValueError(f"Error: '{identifier}' is not a valid element identifier.")
+
+
+def _get_number_atoms(smiles: str) -> int:
+    """
+    Get the number of atoms in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of atoms in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_atoms("CCO")
+        3
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        return mol.GetNumAtoms()
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_heavy_atoms(smiles: str) -> int:
+    """
+    Get the number of heavy atoms in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of heavy atoms in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_heavy_atoms("CCO")
+        3
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        return mol.GetNumHeavyAtoms()
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_canonical_smiles(smiles: str) -> str:
+    """
+    Get the canonical SMILES string of a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        str: The canonical SMILES string of the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _canonical_smiles("CCO")
+        'CCO'
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        return Chem.MolToSmiles(mol)
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_compound_charge(smiles: str) -> int:
+    """
+    Get the charge of a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The charge of the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_compound_charge("CCO")
+        0
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        return Chem.GetFormalCharge(mol)
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_rings(smiles: str) -> int:
+    """
+    Get the number of aromatic rings in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of aromatic rings in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_aromatic_rings("c1ccccc1")
+        1
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        rings = Chem.GetRingInfo(mol)
+        return rings.numRings()
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_ring_sizes(smiles: str) -> list:
+    """
+    Get the sizes of rings in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        list: A list of ring sizes in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_ring_sizes("C1CCCCC1")
+        [6]
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        rings = Chem.GetRingInfo(mol)
+        return [len(ring) for ring in rings.AtomRings()]
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_aromatic_rings(smiles: str) -> int:
+    """
+    Get the number of aromatic rings in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of aromatic rings in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_aromatic_rings("c1ccccc1")
+        1
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        rings = Chem.GetRingInfo(mol)
+        return len(
+            [
+                ring
+                for ring in rings.AtomRings()
+                if all(mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in ring)
+            ]
+        )
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_aromatic_rings(smiles: str) -> list:
+    """
+    Get the aromatic rings in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        list: A list of aromatic rings in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_aromatic_rings("c1ccccc1")
+        [[0, 1, 2, 3, 4, 5]]
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        rings = Chem.GetRingInfo(mol)
+        return [
+            ring
+            for ring in rings.AtomRings()
+            if all(mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in ring)
+        ]
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_chiral_centers(smiles: str) -> int:
+    """
+    Get the number of chiral centers in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of chiral centers in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_chiral_centers("CCO")
+        0
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        Chem.AssignStereochemistry(mol)
+        return Chem.FindMolChiralCenters(mol, includeUnassigned=True)
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_chiral_centers(smiles: str) -> int:
+    """
+    Get the number of chiral centers in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of chiral centers in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_chiral_centers("CCO")
+        0
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        Chem.AssignStereochemistry(mol)
+        return len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_cis_bonds(smiles: str) -> int:
+    """
+    Get the number of cis bonds in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of cis bonds in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_cis_bonds("C/C=C/C")
+        1
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        Chem.AssignStereochemistry(mol)
+        return len(
+            [
+                bond
+                for bond in mol.GetBonds()
+                if bond.GetStereo() == Chem.BondStereo.STEREOCIS
+            ]
+        )
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_number_trans_bonds(smiles: str) -> int:
+    """
+    Get the number of trans bonds in a molecule given its SMILES string.
+
+    Args:
+        smiles (str): The SMILES string of the molecule.
+
+    Returns:
+        int: The number of trans bonds in the molecule.
+
+    Raises:
+        ValueError: If the SMILES string is invalid.
+
+    Example:
+        >>> _get_number_trans_bonds("C/C=C/C")
+        1
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        Chem.AssignStereochemistry(mol)
+        return len(
+            [
+                bond
+                for bond in mol.GetBonds()
+                if bond.GetStereo() == Chem.BondStereo.STEREOTRANS
+            ]
+        )
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _get_molecular_properties(smiles: str) -> dict:
+    """Get basic molecular properties.
+
+    Args:
+        smiles (str): SMILES string of the molecule
+
+    Returns:
+        dict: Dictionary containing properties like LogP, TPSA, etc.
+
+    Raises:
+        ValueError: If the SMILES string is invalid
+
+    Example:
+        >>> _get_molecular_properties("CCO")
+        {'logp': 0.22399999999999998, 'tpsa': 20.23, 'molecular_weight': 46.069, 'rotatable_bonds': 1, 'hbd': 1, 'hba': 1}
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            raise ValueError("Invalid SMILES string")
+        return {
+            "logp": Chem.Descriptors.MolLogP(mol),
+            "tpsa": Chem.Descriptors.TPSA(mol),
+            "molecular_weight": Chem.Descriptors.ExactMolWt(mol),
+            "rotatable_bonds": Chem.Descriptors.NumRotatableBonds(mol),
+            "hbd": Chem.Descriptors.NumHDonors(mol),
+            "hba": Chem.Descriptors.NumHAcceptors(mol),
+        }
+    except Exception as e:
+        raise ValueError(f"Invalid SMILES string: {e}")
+
+
+def _has_substructure(smiles: str, substructure_smarts: str) -> bool:
+    """Check if a molecule contains a specific substructure.
+
+    Args:
+        smiles (str): SMILES string of the molecule
+        substructure_smarts (str): SMARTS pattern of the substructure
+
+    Returns:
+        bool: True if substructure is present
+
+    Raises:
+        ValueError: If the SMILES or SMARTS pattern is invalid
+
+    Example:
+        >>> _has_substructure("CCO", "CO")
+        True
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        pattern = Chem.MolFromSmarts(substructure_smarts)
+        if mol is None or pattern is None:
+            raise ValueError("Invalid SMILES or SMARTS pattern")
+        return mol.HasSubstructMatch(pattern)
+    except Exception as e:
+        raise ValueError(f"Error in substructure matching: {e}")
+
+
+def _get_substructure_count(smiles: str, substructure_smarts: str) -> int:
+    """Get the count of a specific substructure in a molecule.
+
+    Args:
+        smiles (str): SMILES string of the molecule
+        substructure_smarts (str): SMARTS pattern of the substructure
+
+    Returns:
+        int: Number of occurrences of the substructure
+
+    Raises:
+        ValueError: If the SMILES string or SMARTS pattern is invalid
+
+    Example:
+        >>> _get_substructure_count("CCO", "C")
+        2
+    """
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        pattern = Chem.MolFromSmarts(substructure_smarts)
+        if mol is None or pattern is None:
+            raise ValueError("Invalid SMILES or SMARTS pattern")
+        return len(mol.GetSubstructMatches(pattern))
+    except Exception as e:
+        raise ValueError(f"Error in substructure matching: {e}")
diff --git a/src/chemenv/tools/pubchem.py b/src/chemenv/tools/pubchem.py
index 383974c..e40958d 100644
--- a/src/chemenv/tools/pubchem.py
+++ b/src/chemenv/tools/pubchem.py
@@ -43,6 +43,7 @@
     import pubchempy as pcp
     from rdkit import Chem
     from urllib.parse import quote
+    from time import sleep
     from loguru import logger
 
 
@@ -59,6 +60,7 @@ def __init__(self):
         """
         self.cid = None
         self.base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
+        self.long_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON/?response_type=display&heading="
         logger.info(f"Initialized PubChem handler for CID {self.cid}")
 
     @classmethod
@@ -330,7 +332,7 @@ async def _get_number_isomers(self) -> Optional[int]:
         logger.info(f"Number of compound isomers for CID {self.cid}: {data}")
         return data
 
-    async def _get_compound_isomers(self) -> List[Optional[str]]:
+    async def _get_compound_isomers(self) -> List[str]:
         """
         Get the compound isomers for a compound from PubChem.
         This function can take some time depending on the number of isomers.
@@ -349,9 +351,19 @@ async def _get_compound_isomers(self) -> List[Optional[str]]:
             isomers_cids = (await self.get_data_from_url(url))["IdentifierList"]["CID"]
             data = []
             for i in isomers_cids:
+                sleep(0.5)
                 self.cid = i
-                # Isomeric SMILES to capture enantiomers
-                data.append(await self._get_isomeric_smiles())
+                try:
+                    # Isomeric SMILES to capture enantiomers
+                    smiles = await self._get_isomeric_smiles()
+                    if smiles:
+                        data.append(smiles)
+                except ValueError as ve:
+                    logger.warning(f"Could not get SMILES for CID {i}: {ve}")
+                    continue
+                except Exception as e:
+                    logger.error(f"Unexpected error getting SMILES for CID {i}: {e}")
+                    continue
         except Exception as e:
             logger.error(f"No compound isomers found. {e}")
             raise ValueError(f"No compound isomers found. {e}")
@@ -402,106 +414,265 @@ async def _get_number_chiral_atoms(self) -> Optional[int]:
         logger.info(f"Number of chiral atoms for CID {self.cid}: {data}")
         return data
 
+    async def _format_long_url(self, heading):
+        """
+        Format the long URL to get specific information from PubChem and return the data.
 
-class Smiles2Name:
-    def __init__(self, smiles):
-        """Initialize Name2Smiles converter with a chemical compound name.
-        Takes a chemical compound name and prepares it for API queries by URL-encoding.
-        Sets default timeout for API requests to 10 seconds.
         Args:
-            name (str): Chemical compound name to convert to SMILES notation.
-                Should be a valid IUPAC or common chemical name.
+            heading (str): Heading of the information to retrieve from PubChem
+
+        Returns:
+            dict: Data retrieved from PubChem
+
         Raises:
-            ValueError: If the name cannot be URL-encoded or contains invalid characters.
+            ValueError: If the data could not be retrieved
+
         Example:
-            >>> converter = Name2Smiles("ethanol")
-            >>> await converter.get_smiles()
-            'CCO'
+            >>> await self._format_long_url("Mass Spectrometry")
+            {'Information': [{'Name': 'Mass bank ID', 'ReferenceNumber': 1, 'Value': {'StringWithMarkup': [{'String...
         """
-        mol = Chem.MolFromSmiles(smiles)
-        if mol is None:
-            raise ValueError(f"Invalid SMILES: {smiles}")
+        url = self.long_url.format(cid=self.cid) + quote(heading)
+        logger.info(f"Getting spectral information for CID {self.cid}")
+        try:
+            return await self.get_data_from_url(url)
+        except Exception as e:
+            logger.error(f"Failed to get spectral information: {str(e)}")
+            raise ValueError(f"Failed to get spectral information: {str(e)}")
 
-        self.smiles = smiles
-        self.timeout = 10  # seconds
+    async def _format_ms_spectra(self, data):
+        """
+        Format the MS spectra data retrieved from PubChem.
 
-    @backoff.on_exception(
-        backoff.expo,
-        (aiohttp.ClientError, asyncio.TimeoutError),
-        max_time=10,
-        logger=logger,
-    )
-    async def pubchem(self) -> Optional[str]:
+        Args:
+            data (dict): Data retrieved from PubChem
+
+        Returns:
+            dict: Formatted MS spectra data
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        try:
+            information = data["Information"]
+        except KeyError:
+            raise ValueError("No MS spectra found")
+
+        field_mapping = {
+            "Mass bank ID": "MoNA ID",
+            "Spectra type": "MS Category",
+            "MS Type": "MS Type",
+            "MS Level": "MS Level",
+            "Instrument": "Instrument",
+            "Instrument Type": "Instrument Type",
+            "Ionization Mode": "Ionization Mode",
+            "Top Peaks": "StringWithMarkup",
+        }
+
+        results = {}
+
+        counter = 0
+        for info in information:
+            if counter > 5:
+                return results
+            ref_num = info.get("ReferenceNumber")
+            if not ref_num:
+                continue
+
+            value = info.get("Value", {})
+
+            for orig_key, mapped_key in field_mapping.items():
+                if mapped_key == "StringWithMarkup":
+                    string_with_markup = value.get("StringWithMarkup", [])
+                    peaks = [item["String"] for item in string_with_markup]
+                    results[orig_key] = peaks
+                else:
+                    string_with_markup = value.get("StringWithMarkup", [])
+                    if string_with_markup:
+                        results[orig_key] = string_with_markup[0]["String"]
+                    else:
+                        results[orig_key] = None
+
+            counter += 1
+
+        return results
+
+    async def _format_nmr_spectra(self, data):
         """
-        Query PubChem API to get IUPAC name from SMILES.
+        Format the NMR spectra data retrieved from PubChem.
+
+        Args:
+            data (dict): Data retrieved from PubChem
+
         Returns:
-            Optional[str]: IUPAC name if found, None if the query failed.
+            dict: Formatted NMR spectra data
+
         Raises:
-            aiohttp.ClientError: If the API request fails.
-            asyncio.TimeoutError: If the request times out.
+            ValueError: If the data could not be retrieved
         """
-        smiles = quote(self.smiles)
-        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/property/IUPACName/TXT"
-        async with aiohttp.ClientSession() as session:
-            try:
-                async with session.get(url, timeout=self.timeout) as response:
-                    if response.status == 200:
-                        return await response.text()
-                    raise ValueError(
-                        f"PubChem API failed with status {response.status}"
-                    )
-            except Exception as e:
-                raise e
+        try:
+            information = data["Information"]
+        except KeyError:
+            raise ValueError("No 1H NMR spectra found")
+
+        # Define field mapping
+        field_mapping = {
+            "Instrument Type": "instrument",
+            "Frequency": "frequency",
+            "Solvent": "solvent",
+            "pH": "ph",
+            "Shifts [ppm]:Intensity": "shifts",
+        }
+
+        results = {}
+
+        for info in information:
+            ref_num = info.get("ReferenceNumber")
+            if not ref_num:
+                continue
 
-    @backoff.on_exception(
-        backoff.expo,
-        (aiohttp.ClientError, asyncio.TimeoutError),
-        max_time=10,
-        logger=logger,
-    )
-    async def cactus(self) -> Optional[str]:
+            name = info.get("Name")
+            string_value = (
+                info.get("Value", {}).get("StringWithMarkup", [{}])[0].get("String")
+            )
+
+            if not (name and string_value):
+                continue
+
+            if ref_num not in results:
+                results[ref_num] = {}
+
+            if name in field_mapping:
+                results[ref_num][field_mapping[name]] = string_value
+
+        return results
+
+    async def _get_c_nmr_spectra(self):
         """
-        Query CACTUS API to get IUPAC name from SMILES.
+        Get the C-NMR spectra for a compound from PubChem.
+
         Returns:
-            Optional[str]: IUPAC name if found, None if the query failed.
+            dict: C-NMR spectra data
+
         Raises:
-            aiohttp.ClientError: If the API request fails.
-            asyncio.TimeoutError: If the request times out.
+            ValueError: If the data could not be retrieved
         """
-        inchi = Chem.MolToInchi(Chem.MolFromSmiles(self.smiles))
-        url = f"https://cactus.nci.nih.gov/chemical/structure/{inchi}/iupac_name"
+        try:
+            data = await self._format_long_url("13C NMR Spectra")
+            return self._format_nmr_spectra(data)
+        except Exception:
+            raise ValueError("No C-NMR spectra found. {e}")
 
-        async with aiohttp.ClientSession() as session:
-            try:
-                async with session.get(url, timeout=self.timeout) as response:
-                    if response.status == 200:
-                        return await response.text()
-                    raise ValueError(f"CACTUS API failed with status {response.status}")
-            except Exception as e:
-                raise e
+    async def _get_h_nmr_spectra(self):
+        """
+        Get the 1H NMR spectra for a compound from PubChem.
+
+        Returns:
+            dict: 1H NMR spectra data
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        try:
+            data = await self._format_long_url("1H NMR Spectra")
+            return self._format_nmr_spectra(data)
+        except Exception:
+            raise ValueError("No 1H NMR spectra found. {e}")
 
-    async def get_name(self) -> Optional[str]:
+    async def _get_uv_spectra(self):
         """
-        Query multiple chemical APIs in parallel to get IUPAC name.
-        Attempts to retrieve the IUPAC name by querying multiple chemical databases
-        concurrently (CACTUS and PubChem). Returns the first successful result.
+        Get the UV spectra for a compound from PubChem.
+
         Returns:
-            str: The IUPAC name of the chemical compound.
+            str: UV spectra data
+
         Raises:
-            ValueError: If no name could be found in any of the chemical databases.
+            ValueError: If the data could not be retrieved
         """
-        tasks = [
-            self.cactus(),
-            self.pubchem(),
-        ]
+        data = await self._format_long_url("UV Spectra")
+        results = {}
+        try:
+            for info in data["Information"]:
+                ref_num = info["ReferenceNumber"]
+                string_value = info["Value"]["StringWithMarkup"][0]["String"]
 
-        for result in asyncio.as_completed(tasks):
-            try:
-                name = await result
-                if name:
-                    return name.strip()
-            except Exception:
-                continue
+                if ref_num not in results:
+                    results[ref_num] = ""
+
+                if (
+                    "MAX ABSORPTION" in string_value.upper()
+                    or "UV MAX" in string_value.upper()
+                ):
+                    if results[ref_num]:
+                        results[ref_num] += "\n"
+                    results[ref_num] += string_value
+
+            if not results:
+                raise ValueError("No UV spectra found")
+
+            output = []
+            for ref_num, value in sorted(results.items()):
+                output.append(f"Reference {ref_num}:\n{value}")
+
+            return "\n\n".join(output)
+
+        except Exception:
+            raise ValueError("No UV spectra found")
+
+    async def _get_ms_spectra(self):
+        """
+        Get the MS spectra for a compound from PubChem.
+
+        Returns:
+            dict: MS spectra data
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        try:
+            data = await self._format_long_url("Mass Spectrometry")
+            results = {}
+            for section in data["Record"]["Section"]:
+                title = section["TOCHeading"]
+                spectra = self._format_ms_spectra(section)
+                if spectra:
+                    results[title] = spectra
+
+            if not results:
+                raise ValueError("No MS spectra found")
+
+            return results
+
+        except Exception:
+            raise ValueError("No MS spectra found")
+
+    async def _get_ghs_classification(self):
+        """
+        Get the GHS classification for a compound from PubChem.
+
+        Returns:
+            dict: GHS classification data
+
+        Raises:
+            ValueError: If the data could not be retrieved
+        """
+        data = await self._format_long_url("GHS%20Classification")
+        logger.info(f"Getting GHS classification for CID {self.cid}")
+        try:
+            information_list = data["Record"]["Section"][0]["Section"][0]["Section"][0][
+                "Information"
+            ]
+
+            hazard_statements = {}
+
+            for info in information_list:
+                if info.get("Name") == "GHS Hazard Statements":
+                    ref_number = info.get("ReferenceNumber")
+                    string_values = [
+                        markup["String"] for markup in info["Value"]["StringWithMarkup"]
+                    ]
+                    hazard_statements[ref_number] = string_values
 
-        logger.error(f"Could not find name for {self.smiles}")
-        raise ValueError(f"Could not find name for {self.smiles}")
+            return hazard_statements
+        except Exception:
+            logger.error("Failed to get GHS classification")
+            raise ValueError("Failed to get GHS classification")