Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement 1H NMR endpoint #4

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions tools/cheminf/cheminf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
with rdkit_image.imports():
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import numpy as np


app = App()
Expand All @@ -21,3 +22,51 @@ def tanimoto(s1: str, s2: str) -> float:
return DataStructs.TanimotoSimilarity(fp1, fp2)
except (TypeError, ValueError, AttributeError) as e:
raise ValueError("Invalid SMILES strings") from e


@app.function(image=rdkit_image)
def get_number_of_topologically_distinct_atoms(smiles: str, atomic_number: int = 1):
"""Return the number of unique `element` environments based on environmental topology.
This corresponds to the number of peaks one could maximally observe in an NMR spectrum.

Example:

get_number_of_topologically_distinct_atoms("CCO", 6)
# Output: 2
# Explanation: There are two unique carbon environments in ethanol.

get_number_of_topologically_distinct_atoms("CCO", 1)
# Output: 3
# Explanation: There are three unique hydrogen environments in ethanol.
Args:
smiles (str): SMILES string
atomic_number (int, optional): Atomic number. Defaults to 1.

Returns:
int: Number of unique environments.
"""
try:
molecule = Chem.MolFromSmiles(smiles)

if atomic_number == 1:
# add hydrogen
mol = Chem.AddHs(molecule)
else:
mol = molecule

# Get unique canonical atom rankings
atom_ranks = list(Chem.rdmolfiles.CanonicalRankAtoms(mol, breakTies=False))

# Select the unique element environments
atom_ranks = np.array(atom_ranks)

# Atom indices
atom_indices = [
atom.GetIdx()
for atom in mol.GetAtoms()
if atom.GetAtomicNum() == atomic_number
]
# Count them
return len(set(atom_ranks[atom_indices]))
except (TypeError, ValueError, AttributeError):
return "Error: Not a valid SMILES string"
Loading