Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow ccd residues with missing coords #730

Merged
merged 6 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/biotite/structure/info/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# fmt: on


def residue(res_name):
def residue(res_name, allow_missing_coord=False):
"""
Get an atom array, representing the residue with the given name.

Expand All @@ -30,6 +30,11 @@ def residue(res_name):
----------
res_name : str
The up to 3-letter name of the residue.
allow_missing_coord: bool, optional
Whether to allow missing coordinate values in the residue.
If ``True``, these will be represented as ``nan`` values.
If ``False``, a ``ValueError`` is raised when missing coordinates
are encountered.

Returns
-------
Expand Down Expand Up @@ -74,7 +79,11 @@ def residue(res_name):
from biotite.structure.io.pdbx import get_component

try:
component = get_component(get_ccd(), res_name=res_name)
component = get_component(
get_ccd(),
res_name=res_name,
allow_missing_coord=allow_missing_coord,
)
except KeyError:
raise KeyError(f"No atom information found for residue '{res_name}' in CCD")
component.hetero[:] = res_name not in NON_HETERO_RESIDUES
Expand Down
32 changes: 25 additions & 7 deletions src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,13 @@ def _filter_canonical_links(array, bond_array):
) # fmt: skip


def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
def get_component(
pdbx_file,
data_block=None,
use_ideal_coord=True,
res_name=None,
allow_missing_coord=False,
):
"""
Create an :class:`AtomArray` for a chemical component from the
``chem_comp_atom`` and, if available, the ``chem_comp_bond``
Expand Down Expand Up @@ -1209,6 +1215,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
In this case, the component with the given residue name is
read.
By default, all rows would be read in this case.
allow_missing_coord: bool, optional
Whether to allow missing coordinate values in components.
If ``True``, these will be represented as ``nan`` values.
If ``False``, a ``ValueError`` is raised when missing coordinates
are encountered.

Returns
-------
Expand Down Expand Up @@ -1299,7 +1310,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
else:
raise
array.coord = _parse_component_coordinates(
[atom_category[field] for field in alt_coord_fields]
[atom_category[field] for field in alt_coord_fields],
allow_missing=allow_missing_coord,
)

try:
Expand Down Expand Up @@ -1330,14 +1342,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
return array


def _parse_component_coordinates(coord_columns):
def _parse_component_coordinates(coord_columns, allow_missing=False):
coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
for i, column in enumerate(coord_columns):
if column.mask is not None and column.mask.array.any():
raise ValueError(
"Missing coordinates for some atoms",
)
coord[:, i] = column.as_array(np.float32)
if allow_missing:
warnings.warn(
"Missing coordinates for some atoms. Those will be set to nan",
UserWarning,
)
else:
raise ValueError(
"Missing coordinates for some atoms",
)
coord[:, i] = column.as_array(np.float32, masked_value=np.nan)
return coord


Expand Down
21 changes: 21 additions & 0 deletions tests/structure/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,24 @@ def test_set_ccd_path(fake_ccd_path):

# The new fake CCD has only a single compound
assert strucinfo.all_residues() == ["FOO"]


@pytest.mark.parametrize(
"res_name, allow_missing_coord",
[
("ALA", False),
("A1IQW", True),
("RRE", True),
],
)
def test_residue(res_name, allow_missing_coord):
"""
Test if the residue function returns an atom array or not.
ALA --> standard amino acid, yes even when allow_missing_coord=False
A1IQW --> yes only with allow_missing_coord=True (as of Jan 6, 2025)
RRE --> yes only with allow_missing_coord=True (as of Jan 6, 2025)
"""
result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord)
assert isinstance(result, struc.AtomArray)
assert result.array_length() > 0
assert np.all(result.res_name == res_name)
Loading