ur-whitelab · kjappelbaum · Jul 15, 2024 · Jul 15, 2024 · Aug 10, 2024
diff --git a/tools/cheminf/cheminf.py b/tools/cheminf/cheminf.py
@@ -5,6 +5,7 @@
 with rdkit_image.imports():
     from rdkit import Chem, DataStructs
     from rdkit.Chem import AllChem
+    import numpy as np
 
 
 app = App()
@@ -21,3 +22,51 @@ def tanimoto(s1: str, s2: str) -> float:
         return DataStructs.TanimotoSimilarity(fp1, fp2)
     except (TypeError, ValueError, AttributeError) as e:
         raise ValueError("Invalid SMILES strings") from e
+
+
+@app.function(image=rdkit_image)
+def get_number_of_topologically_distinct_atoms(smiles: str, atomic_number: int = 1):
+    """Return the number of unique `element` environments based on environmental topology.
+    This corresponds to the number of peaks one could maximally observe in an NMR spectrum.
+
+    Example:
+
+        get_number_of_topologically_distinct_atoms("CCO", 6)
+        # Output: 2
+        # Explanation: There are two unique carbon environments in ethanol.
+
+        get_number_of_topologically_distinct_atoms("CCO", 1)
+        # Output: 3
+        # Explanation: There are three unique hydrogen environments in ethanol.
+    Args:
+        smiles (str): SMILES string
+        atomic_number (int, optional): Atomic number. Defaults to 1.
+
+    Returns:
+        int: Number of unique environments.
+    """
+    try:
+        molecule = Chem.MolFromSmiles(smiles)
+
+        if atomic_number == 1:
+            # add hydrogen
+            mol = Chem.AddHs(molecule)
+        else:
+            mol = molecule
+
+        # Get unique canonical atom rankings
+        atom_ranks = list(Chem.rdmolfiles.CanonicalRankAtoms(mol, breakTies=False))
+
+        # Select the unique element environments
+        atom_ranks = np.array(atom_ranks)
+
+        # Atom indices
+        atom_indices = [
+            atom.GetIdx()
+            for atom in mol.GetAtoms()
+            if atom.GetAtomicNum() == atomic_number
+        ]
+        # Count them
+        return len(set(atom_ranks[atom_indices]))
+    except (TypeError, ValueError, AttributeError):
+        return "Error: Not a valid SMILES string"