Skip to content

Commit

Permalink
merging statistics, starting with Rmerge, Rmeas, Rpim, ...
Browse files Browse the repository at this point in the history
  • Loading branch information
wojdyr committed Feb 20, 2025
1 parent bf2021d commit e59f194
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 15 deletions.
13 changes: 8 additions & 5 deletions docs/merge-help.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
$ gemmi merge -h
Usage:
gemmi merge [options] INPUT_FILE OUTPUT_FILE
gemmi merge --stats [options] UNMERGED_FILE
gemmi merge --compare [options] UNMERGED_FILE MERGED_FILE
gemmi merge --compare [options] MMCIF_FILE_WITH_BOTH
Options:
-h, --help Print usage and exit.
-V, --version Print version and exit.
-v, --verbose Verbose output.
--anom output/compare I(+) and I(-) instead of IMEAN.
--anom Output/compare I(+) and I(-) instead of IMEAN.
--no-sysabs Do not output systematic absences.
--nobs Add MTZ column NOBS with the number of merged
reflections.
--input-block=NAME input mmCIF block name (default: first unmerged).
-b NAME, --block=NAME output mmCIF block name: data_NAME (default: merged).
--compare compare unmerged and merged data (no output file).
--print-all print all compared reflections.
--input-block=NAME Input mmCIF block name (default: first unmerged).
-b NAME, --block=NAME Output mmCIF block name: data_NAME (default: merged).
--stats[=N] Print merging statistics in N resolution shells
(default: N=1).
--compare Compare unmerged and merged data (no output file).
--print-all Print all compared reflections.

The input file can be SF-mmCIF with _diffrn_refln, MTZ or XDS_ASCII.HKL.
The output file can be either SF-mmCIF or MTZ.
38 changes: 38 additions & 0 deletions include/gemmi/intensit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

namespace gemmi {

struct Binner;
struct Mtz;
struct XdsAscii;
struct ReflnBlock;
Expand All @@ -29,6 +30,40 @@ using std::int8_t;
enum class DataType { Unknown, Unmerged, Mean, Anomalous,
MergedMA, MergedAM, UAM };

struct MergingR {
int all_refl = 0;
int unique_refl = 0;
double r_merge_num = 0; // numerator
double r_meas_num = 0;
double r_pim_num = 0;
double intensity_sum = 0; // denominator

double r_merge() const { return r_merge_num / intensity_sum; }
double r_meas() const { return r_meas_num / intensity_sum; }
double r_pim() const { return r_pim_num / intensity_sum; }

void add(double r_merge_num_, int nobs, double intensity_sum_) {
all_refl += nobs;
unique_refl += 1;
if (nobs > 1) { // for nobs==1, r_merge_num_ must be 0
r_merge_num += r_merge_num_;
double t = r_merge_num_ / std::sqrt(nobs - 1);
r_pim_num += t;
r_meas_num += std::sqrt(nobs) * t;
}
intensity_sum += intensity_sum_;
}

void add_other(const MergingR& o) {
all_refl += o.all_refl;
unique_refl += o.unique_refl;
r_merge_num += o.r_merge_num;
r_meas_num += o.r_meas_num;
r_pim_num += o.r_pim_num;
intensity_sum += o.intensity_sum;
}
};

/// Returns STARANISO version or empty string.
GEMMI_DLL std::string read_staraniso_b_from_mtz(const Mtz& mtz, SMat33<double>& output);

Expand All @@ -45,6 +80,7 @@ struct GEMMI_DLL Intensities {
return std::tie(hkl[0], hkl[1], hkl[2], isign) <
std::tie(o.hkl[0], o.hkl[1], o.hkl[2], o.isign);
}
// for merged data
const char* intensity_label() const {
if (isign == 0)
return "<I>";
Expand Down Expand Up @@ -115,6 +151,8 @@ struct GEMMI_DLL Intensities {

void merge_in_place(DataType data_type);

std::vector<MergingR> calculate_merging_rs(const Binner* binner) const;

void switch_to_asu_indices();

void read_unmerged_intensities_from_mtz(const Mtz& mtz);
Expand Down
71 changes: 61 additions & 10 deletions prog/merge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <algorithm> // for sort
#include <iostream> // for cout
#include <gemmi/asudata.hpp> // for calculate_hkl_value_correlation
#include <gemmi/binner.hpp> // for Binner
#include <gemmi/gz.hpp> // for MaybeGzipped
#include <gemmi/mtz2cif.hpp> // for MtzToCif
#include <gemmi/fstream.hpp> // for Ofstream
Expand All @@ -20,32 +21,35 @@
namespace {

enum OptionIndex {
WriteAnom=4, NoSysAbs, NumObs, InputBlock, OutputBlock, Compare, PrintAll
WriteAnom=4, NoSysAbs, NumObs, InputBlock, OutputBlock, Stats, Compare, PrintAll
};

const option::Descriptor Usage[] = {
{ NoOp, 0, "", "", Arg::None,
"Usage:\n " EXE_NAME " [options] INPUT_FILE OUTPUT_FILE"
"\n " EXE_NAME " --stats [options] UNMERGED_FILE"
"\n " EXE_NAME " --compare [options] UNMERGED_FILE MERGED_FILE"
"\n " EXE_NAME " --compare [options] MMCIF_FILE_WITH_BOTH"
"\nOptions:"},
CommonUsage[Help],
CommonUsage[Version],
CommonUsage[Verbose],
{ WriteAnom, 0, "a", "anom", Arg::None,
" --anom \toutput/compare I(+) and I(-) instead of IMEAN." },
" --anom \tOutput/compare I(+) and I(-) instead of IMEAN." },
{ NoSysAbs, 0, "", "no-sysabs", Arg::None,
" --no-sysabs \tDo not output systematic absences." },
{ NumObs, 0, "", "nobs", Arg::None,
" --nobs \tAdd MTZ column NOBS with the number of merged reflections." },
{ InputBlock, 0, "", "input-block", Arg::Required,
" --input-block=NAME \tinput mmCIF block name (default: first unmerged)." },
" --input-block=NAME \tInput mmCIF block name (default: first unmerged)." },
{ OutputBlock, 0, "b", "block", Arg::Required,
" -b NAME, --block=NAME \toutput mmCIF block name: data_NAME (default: merged)." },
" -b NAME, --block=NAME \tOutput mmCIF block name: data_NAME (default: merged)." },
{ Stats, 0, "", "stats", Arg::Optional,
" --stats[=N] \tPrint merging statistics in N resolution shells (default: N=1)." },
{ Compare, 0, "", "compare", Arg::None,
" --compare \tcompare unmerged and merged data (no output file)." },
" --compare \tCompare unmerged and merged data (no output file)." },
{ PrintAll, 0, "", "print-all", Arg::None,
" --print-all \tprint all compared reflections." },
" --print-all \tPrint all compared reflections." },
{ NoOp, 0, "", "", Arg::None,
"\nThe input file can be SF-mmCIF with _diffrn_refln, MTZ or XDS_ASCII.HKL."
"\nThe output file can be either SF-mmCIF or MTZ."
Expand Down Expand Up @@ -239,14 +243,46 @@ void compare_intensities(Intensities& intensities, Intensities& ref, bool print_
printf("Sigma CC: %.9g%% (mean ratio: %g)\n", 100 * cs.coefficient(), 1./cs.mean_ratio());
}

void print_merging_statistics(const Intensities& intensities, int nbins) {
gemmi::Binner binner;
if (nbins > 1) {
auto method = gemmi::Binner::Method::Dstar3;
binner.setup(nbins, method, gemmi::IntensitiesDataProxy{intensities});
}
auto stats = intensities.calculate_merging_rs(nbins > 1 ? &binner : nullptr);
gemmi::MergingR total;
for (const gemmi::MergingR& r : stats)
total.add_other(r);
printf("All reflections: %d\n", total.all_refl);
printf("Unique reflections: %d\n", total.unique_refl);
printf("Mean intensity: %g\n", total.intensity_sum / total.all_refl);
printf("R-merge: %.4f\n", total.r_merge());
printf("R-meas: %.4f\n", total.r_meas());
printf("R-pim: %.4f\n", total.r_pim());
if (nbins > 1) {
printf("In resolution shells:\n"
" d_max d_min #all #uniq <I> Rmerge Rmeas Rpim\n");
for (int i = 0; i < nbins; ++i) {
const gemmi::MergingR& r = stats[i];
printf(" %6.2f %5.2f %6d %6d %8.2f %8.4f %8.4f %8.4f\n",
binner.dmax_of_bin(i), binner.dmin_of_bin(i),
r.all_refl, r.unique_refl, r.intensity_sum / r.all_refl,
r.r_merge(), r.r_meas(), r.r_pim());
}
}
}

} // anonymous namespace

int GEMMI_MAIN(int argc, char **argv) {
OptParser p(EXE_NAME);
p.simple_parse(argc, argv, Usage);
if (p.nonOptionsCount() != 2 &&
!(p.nonOptionsCount() == 1 && p.options[Compare])) {
fprintf(stderr, "%s requires 2 arguments (or single arg with --compare), got %d.\n",
p.check_exclusive_pair(Stats, Compare);
int n_args = p.nonOptionsCount();
if (n_args == 1 && (p.options[Stats] || p.options[Compare])) { // ok
} else if (n_args == 2 && !p.options[Stats]) { // ok
} else {
fprintf(stderr, "%s requires 2 arguments or 1 arg with --stats/--compare, got %d.\n",
p.program_name, p.nonOptionsCount());
p.print_try_help_and_exit("");
}
Expand All @@ -273,7 +309,22 @@ int GEMMI_MAIN(int argc, char **argv) {
if (verbose)
output_intensity_statistics(intensities);

if (p.options[Compare]) {
if (p.options[Stats]) {
int nbins = 1;
if (const char* arg = p.options[Stats].arg) {
nbins = std::atoi(arg);
if (nbins <= 0) {
fprintf(stderr, "Wrong argument for option --stats (expected N>0): %s\n", arg);
return 1;
}
}
try {
print_merging_statistics(intensities, nbins);
} catch (std::exception& e) {
std::fprintf(stderr, "ERROR: %s\n", e.what());
return 1;
}
} else if (p.options[Compare]) {
if (verbose)
std::fprintf(stderr, "Reading merged reflections from %s ...\n", output_path);
auto type_we_want = to_anom ? DataType::Anomalous : DataType::MergedMA;
Expand Down
45 changes: 45 additions & 0 deletions src/intensit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <gemmi/intensit.hpp>
#include <gemmi/atof.hpp> // for fast_from_chars
#include <gemmi/binner.hpp> // for Binner
#include <gemmi/mtz.hpp> // for Mtz
#include <gemmi/refln.hpp> // for ReflnBlock
#include <gemmi/xds_ascii.hpp> // for XdsAscii
Expand Down Expand Up @@ -162,6 +163,8 @@ void Intensities::merge_in_place(DataType new_type) {
for (Refl& refl : data)
refl.isign = 0;
} else if (type == DataType::Unmerged) {
if (!spacegroup)
fail("unknown space group");
GroupOps gops = spacegroup->operations();
for (Refl& refl : data)
refl.isign = refl.isym % 2 != 0 || gops.is_reflection_centric(refl.hkl) ? 1 : -1;
Expand Down Expand Up @@ -194,6 +197,48 @@ void Intensities::merge_in_place(DataType new_type) {
type = new_type;
}

std::vector<MergingR> Intensities::calculate_merging_rs(const Binner* binner) const {
if (data.empty())
fail("no data");
if (type != DataType::Unmerged)
fail("merging statistics can be calculated only from unmerged data");
if (!std::is_sorted(data.begin(), data.end()))
fail("call Intensities.sort() before calculating merging statistics");
if (binner)
binner->ensure_limits_are_set();

size_t nbins = binner ? binner->size() : 1;

std::vector<MergingR> stats(nbins);
int bin_hint = (int)nbins - 1;
Miller hkl = data[0].hkl;
double intensity_sum = 0;
int nobs = 0;
auto add = [&](const Refl* end) {
double abs_diff_sum = 0;
if (nobs > 1) {
double avg = intensity_sum / nobs;
for (const Refl* r = end - nobs; r != end; ++r)
abs_diff_sum += std::fabs(r->value - avg);
}
int bin = binner ? binner->get_bin_hinted(hkl, bin_hint) : 0;
stats[bin].add(abs_diff_sum, nobs, intensity_sum);
};
for (const Refl& refl : data) {
// TODO: optional outlier rejection
if (refl.hkl != hkl) {
add(&refl);
hkl = refl.hkl;
intensity_sum = 0;
nobs = 0;
}
intensity_sum += refl.value;
++nobs;
}
add(data.data() + data.size());
return stats;
}

void Intensities::switch_to_asu_indices() {
if (!spacegroup)
return;
Expand Down

0 comments on commit e59f194

Please sign in to comment.