Skip to content

Commit

Permalink
changing Intensities (work in progress)
Browse files Browse the repository at this point in the history
store isym operator in Refln

changed functions that get intensities from Mtz/Refln,
for instance, this:
  intensities.read_merged_intensities_from_mtz(mtz);
was replaced with:
  intensities.read_mtz(mtz, gemmi::DataType::MergedAM);

MergedAM = anomalous intensities if available, otherwise mean
MergedMA = mean intensities if available, otherwise anomalous

and various related changes
  • Loading branch information
wojdyr committed Feb 10, 2025
1 parent 17f8333 commit 5c50955
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 84 deletions.
2 changes: 1 addition & 1 deletion docs/gemmi-help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Commands:
map print info or modify a CCP4 map
map2sf transform CCP4 map to map coefficients (in MTZ or mmCIF)
mask make a bulk-solvent mask in the CCP4 format
merge merge intensities from multi-record reflection file
merge merge or compare intensities from reflection file(s)
mondiff compare two monomer CIF files
mtz print info about MTZ reflection file
mtz2cif convert MTZ to structure factor mmCIF
Expand Down
85 changes: 54 additions & 31 deletions include/gemmi/intensit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ GEMMI_DLL std::string read_staraniso_b_from_mtz(const Mtz& mtz, SMat33<double>&
struct GEMMI_DLL Intensities {
struct Refl {
Miller hkl;
short isign; // 1 for I(+), -1 for I(-), 0 for mean
std::int8_t isign; // 1 for I(+), -1 for I(-), 0 for mean or unmerged
std::int8_t isym; // for unmerged data: encodes symmetry op like M/ISYM in MTZ
short nobs;
double value;
double sigma;
Expand Down Expand Up @@ -63,14 +64,17 @@ struct GEMMI_DLL Intensities {
double unit_cell_rmsd[6] = {0., 0., 0., 0., 0., 0.};
double wavelength;
DataType type = DataType::Unknown;
std::vector<Op> isym_ops;
AnisoScaling staraniso_b;

static const char* type_str(DataType data_type) {
switch (data_type) {
case DataType::Unknown: return "n/a";
case DataType::Unmerged: return "I";
case DataType::Mean: return "<I>";
case DataType::Anomalous: return "I+/I-";
case DataType::MergedAM:
case DataType::MergedMA:
case DataType::Unknown: return "n/a";
}
unreachable();
}
Expand All @@ -85,6 +89,13 @@ struct GEMMI_DLL Intensities {
// pre: both are sorted
Correlation calculate_correlation(const Intensities& other) const;

void add_if_valid(const Miller& hkl, int8_t isign, int8_t isym, double value, double sigma) {
// XDS marks rejected reflections with negative sigma.
// Sigma 0.0 rarely happens (e.g. 5tkn), but is also problematic.
if (!std::isnan(value) && sigma > 0)
data.push_back({hkl, isign, isym, /*nobs=*/0, value, sigma});
}

void remove_systematic_absences() {
if (!spacegroup)
return;
Expand All @@ -96,52 +107,64 @@ struct GEMMI_DLL Intensities {

void merge_in_place(DataType data_type);

// for unmerged centric reflections set isign=1.
void switch_to_asu_indices(bool merged=false);
void switch_to_asu_indices();

void read_unmerged_intensities_from_mtz(const Mtz& mtz);
void read_mean_intensities_from_mtz(const Mtz& mtz);
// with check_complete=true, throw if anomalous data is null where it shouldn't be
void read_anomalous_intensities_from_mtz(const Mtz& mtz, bool check_complete=false);

void read_merged_intensities_from_mtz(const Mtz& mtz) {
if (mtz.column_with_label("I(+)"))
read_anomalous_intensities_from_mtz(mtz, true);
else
read_mean_intensities_from_mtz(mtz);
}

void read_mtz(const Mtz& mtz, DataType data_type) {
switch (data_type) {
case DataType::Unmerged: read_unmerged_intensities_from_mtz(mtz); break;
case DataType::Mean: read_mean_intensities_from_mtz(mtz); break;
case DataType::Anomalous: read_anomalous_intensities_from_mtz(mtz); break;
case DataType::Unknown: assert(0);
bool check_anom_complete = false;
if (data_type == DataType::Unknown)
data_type = mtz.batches.empty() ? DataType::MergedMA : DataType::Unmerged;
if (data_type == DataType::MergedAM) {
data_type = mtz.iplus_column() ? DataType::Anomalous : DataType::Mean;
// if I(+) and I(-) is empty where IMEAN is not, throw error
check_anom_complete = true;
}
if (data_type == DataType::MergedMA)
data_type = mtz.imean_column() ? DataType::Mean : DataType::Anomalous;

if (data_type == DataType::Unmerged)
read_unmerged_intensities_from_mtz(mtz);
else if (data_type == DataType::Mean)
read_mean_intensities_from_mtz(mtz);
else // (data_type == DataType::Anomalous)
read_anomalous_intensities_from_mtz(mtz, check_anom_complete);
}

void read_unmerged_intensities_from_mmcif(const ReflnBlock& rb);
void read_mean_intensities_from_mmcif(const ReflnBlock& rb);
void read_anomalous_intensities_from_mmcif(const ReflnBlock& rb, bool check_complete=false);

void read_merged_intensities_from_mmcif(const ReflnBlock& rb) {
if (rb.find_column_index("pdbx_I_plus") != -1)
read_anomalous_intensities_from_mmcif(rb, true);
else if (rb.find_column_index("intensity_meas") != -1)
read_mean_intensities_from_mmcif(rb);
else
fail("Intensities not found in the mmCIF file, block ", rb.block.name,
" has neither intensity_meas nor pdbx_I_plus/minus");
}

void read_f_squared_from_mmcif(const ReflnBlock& rb);

void read_mmcif(const ReflnBlock& rb, DataType data_type) {
switch (data_type) {
case DataType::Unmerged: read_unmerged_intensities_from_mmcif(rb); break;
case DataType::Mean: read_mean_intensities_from_mmcif(rb); break;
case DataType::Anomalous: read_anomalous_intensities_from_mmcif(rb); break;
case DataType::Unknown: break;
bool check_anom_complete = false;
if (data_type == DataType::Unknown)
data_type = rb.is_unmerged() ? DataType::Unmerged : DataType::MergedMA;

if (data_type == DataType::MergedAM || data_type == DataType::MergedMA) {
bool has_anom = rb.find_column_index("pdbx_I_plus") != -1;
bool has_mean = rb.find_column_index("intensity_meas") != -1;
if (!has_anom && !has_mean)
fail("Intensities not found in the mmCIF file, block ", rb.block.name,
" has neither intensity_meas nor pdbx_I_plus/minus");
if (data_type == DataType::MergedAM) {
data_type = has_anom ? DataType::Anomalous : DataType::Mean;
// if both I(+) and I(-) is empty where IMEAN has value, throw error
check_anom_complete = true;
}
if (data_type == DataType::MergedMA)
data_type = has_mean ? DataType::Mean : DataType::Anomalous;
}
if (data_type == DataType::Unmerged)
read_unmerged_intensities_from_mmcif(rb);
else if (data_type == DataType::Mean)
read_mean_intensities_from_mmcif(rb);
else // (data_type == DataType::Anomalous)
read_anomalous_intensities_from_mmcif(rb, check_anom_complete);
}

void read_unmerged_intensities_from_xds(const XdsAscii& xds);
Expand Down
5 changes: 4 additions & 1 deletion include/gemmi/refln.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@

namespace gemmi {

enum class DataType { Unknown, Unmerged, Mean, Anomalous };
// If used to request a particular data type:
// MergedMA = Mean if available, otherwise Anomalous,
// MergedAM = Anomalous if available, otherwise Mean.
enum class DataType { Unknown, Unmerged, Mean, Anomalous, MergedMA, MergedAM };

struct ReflnBlock {
cif::Block block;
Expand Down
11 changes: 8 additions & 3 deletions include/gemmi/symmetry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#include "fail.hpp" // for fail, unreachable

// we use brace elision with std:array's
// we use brace elision for Op::Rot = std:array<std::array<int,3>,3>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-braces"
Expand Down Expand Up @@ -1456,9 +1456,9 @@ struct ReciprocalAsu {

/// Returns hkl in asu and MTZ ISYM - 2*n-1 for reflections in the positive
/// asu (I+ of a Friedel pair), 2*n for reflections in the negative asu (I-).
std::pair<Op::Miller,int> to_asu(const Op::Miller& hkl, const GroupOps& gops) const {
std::pair<Op::Miller,int> to_asu(const Op::Miller& hkl, const std::vector<Op>& sym_ops) const {
int isym = 0;
for (const Op& op : gops.sym_ops) {
for (const Op& op : sym_ops) {
++isym;
Op::Miller new_hkl = op.apply_to_hkl_without_division(hkl);
if (is_in(new_hkl))
Expand All @@ -1470,6 +1470,11 @@ struct ReciprocalAsu {
}
fail("Oops, maybe inconsistent GroupOps?");
}

std::pair<Op::Miller,int> to_asu(const Op::Miller& hkl, const GroupOps& gops) const {
return to_asu(hkl, gops.sym_ops);
}

/// Similar to to_asu(), but the second returned value is sign: true for + or centric
std::pair<Op::Miller,bool> to_asu_sign(const Op::Miller& hkl, const GroupOps& gops) const {
std::pair<Op::Miller,bool> neg = {{0,0,0}, true};
Expand Down
2 changes: 1 addition & 1 deletion prog/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ SubCmd subcommands[] = {
CMD(map, "print info or modify a CCP4 map"),
CMD(map2sf, "transform CCP4 map to map coefficients (in MTZ or mmCIF)"),
CMD(mask, "make a bulk-solvent mask in the CCP4 format"),
CMD(merge, "merge intensities from multi-record reflection file"),
CMD(merge, "merge or compare intensities from reflection file(s)"),
CMD(mondiff, "compare two monomer CIF files"),
CMD(mtz, "print info about MTZ reflection file"),
CMD(mtz2cif, "convert MTZ to structure factor mmCIF"),
Expand Down
5 changes: 3 additions & 2 deletions prog/mtz2cif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <gemmi/intensit.hpp> // for Intensities
#include <gemmi/read_cif.hpp> // for read_cif_gz
#include <gemmi/xds_ascii.hpp>
#include "gemmi/refln.hpp"
#define GEMMI_PROG mtz2cif
#include "options.h"

Expand Down Expand Up @@ -279,7 +280,7 @@ int GEMMI_MAIN(int argc, char **argv) {
else
std::cerr << ". B tensor is unknown. Intensities won't be checked.\n";
}
mi.read_merged_intensities_from_mtz(*mtz[0]);
mi.read_mtz(*mtz[0], gemmi::DataType::MergedAM);
} else {
gemmi::ReflnBlock rblock = gemmi::get_refln_block(
gemmi::read_cif_from_memory(cif_buf.data(), cif_buf.size(), cif_input).blocks,
Expand All @@ -290,7 +291,7 @@ int GEMMI_MAIN(int argc, char **argv) {
mtz_to_cif.entry_id = rblock.entry_id;
}
mi.take_staraniso_b_from_mmcif(rblock.block);
mi.read_merged_intensities_from_mmcif(rblock);
mi.read_mmcif(rblock, gemmi::DataType::MergedAM);
}
gemmi::Intensities ui;
if (mtz[1])
Expand Down
6 changes: 2 additions & 4 deletions python/hkl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,9 @@ void add_hkl(nb::module_& m) {
self.data.clear();
self.data.reserve(h.shape(0));
for (size_t i = 0; i < h.shape(0); ++i)
if (!std::isnan(v(i)) && s(i) > 0)
self.data.push_back({{{h(i, 0), h(i, 1), h(i, 2)}}, 1, 0, v(i), s(i)});

self.switch_to_asu_indices();
self.add_if_valid({h(i, 0), h(i, 1), h(i, 2)}, 0, 0, v(i), s(i));
self.type = DataType::Unmerged;
self.switch_to_asu_indices();
}, nb::arg("cell"), nb::arg("sg").none(false),
nb::arg("miller_array"), nb::arg("value_array"), nb::arg("sigma_array"))
;
Expand Down
4 changes: 3 additions & 1 deletion python/sym.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ void add_symmetry(nb::module_& m) {
.def(nb::init<const SpaceGroup*, bool>(), nb::arg(), nb::arg("tnt")=false)
.def("is_in", &ReciprocalAsu::is_in, nb::arg("hkl"))
.def("condition_str", &ReciprocalAsu::condition_str)
.def("to_asu", &ReciprocalAsu::to_asu, nb::arg("hkl"), nb::arg("group_ops"))
.def("to_asu",
nb::overload_cast<const Op::Miller&, const GroupOps&>(&ReciprocalAsu::to_asu, nb::const_),
nb::arg("hkl"), nb::arg("group_ops"))
;

nb::handle mod = m;
Expand Down
Loading

0 comments on commit 5c50955

Please sign in to comment.