From 179b50a1632fec068cceb2010bee33df454381c9 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Wed, 13 Jan 2021 20:33:05 -0400 Subject: [PATCH 01/37] WIP: refactoring asserts --- .gitignore | 1 + include/utils.h | 19 ++++ src/compare_main.cpp | 9 +- src/estimate_parameters.cpp | 10 +- src/fastaq.cpp | 4 - src/interval.cpp | 2 - src/kmergraph.cpp | 174 ++++++++++++++++++++----------- src/kmergraphwithcoverage.cpp | 188 +++++++++++++++++++++++++--------- src/prg/path.cpp | 2 +- src/utils.cpp | 15 +-- 10 files changed, 287 insertions(+), 137 deletions(-) diff --git a/.gitignore b/.gitignore index 9ebf5817..16258254 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ example/pandora_workflow build_portable_executable pandora-linux-precompiled +/cmake-build-release/ diff --git a/include/utils.h b/include/utils.h index 9096a285..77d1c133 100644 --- a/include/utils.h +++ b/include/utils.h @@ -11,6 +11,7 @@ #include #include "minihits.h" #include "pangenome/ns.cpp" +#include namespace fs = boost::filesystem; @@ -101,6 +102,24 @@ uint32_t pangraph_from_read_file(const std::string&, std::shared_ptr>&, PanNode*, uint32_t, float); +class FatalError { +public: + FatalError(){ + BOOST_LOG_TRIVIAL(error) << std::endl << std::endl << "[FATAL ERROR] "; + } + + virtual ~FatalError() { + BOOST_LOG_TRIVIAL(error) << std::endl << std::endl; + std::exit(EXIT_FAILURE); + } + + template + FatalError& operator<<(const T &element) { + BOOST_LOG_TRIVIAL(error) << element; + return *this; + } +}; + void fatal_error(const std::string& message); // TODO : refactor all file open and closing to use these functions diff --git a/src/compare_main.cpp b/src/compare_main.cpp index ce8c56d2..33eec1b4 100644 --- a/src/compare_main.cpp +++ b/src/compare_main.cpp @@ -407,7 +407,13 @@ int pandora_compare(CompareOptions& opt) pangenome::Node& pangraph_node = *pangraph_node_entry.second; const auto& prg_id = pangraph_node.prg_id; - assert(prgs.size() > prg_id); + + bool valid_prg_id = prgs.size() > prg_id; + if (!valid_prg_id) { + FatalError() << "[Error on PanRG] A PRG has an invalid ID (" << prg_id + << "), >= than the number of PRGs (" << prgs.size() + << ") in the PanRG"; + } const auto& prg_ptr = prgs[prg_id]; const auto vcf_reference_path @@ -425,7 +431,6 @@ int pandora_compare(CompareOptions& opt) // add all samples to the vcf vcf.add_samples(sample_names); - assert(vcf.samples.size() == samples.size()); // build the vcf pangraph_node.construct_multisample_vcf( diff --git a/src/estimate_parameters.cpp b/src/estimate_parameters.cpp index b5145a5e..32f88a3c 100644 --- a/src/estimate_parameters.cpp +++ b/src/estimate_parameters.cpp @@ -9,8 +9,6 @@ #include "pangenome/pannode.h" #include "estimate_parameters.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - double fit_mean_covg( const std::vector& kmer_covg_dist, const uint8_t zero_thresh) { @@ -48,8 +46,12 @@ double fit_variance_covg(const std::vector& kmer_covg_dist, double& me void fit_negative_binomial(double& mean, double& variance, float& p, float& r) { - assert(mean > 0 and variance > 0); - assert(mean < variance); + + bool negative_binomial_parameters_are_ok = mean > 0 and variance > 0 and mean < variance; + if (!negative_binomial_parameters_are_ok) { + FatalError() << "In fit_negative_binomial(): parameters are invalid " + << "(mean is " << mean << ", variance is " << variance << ")"; + } p = mean / variance; r = (mean * p / (1 - p) + variance * p * p / (1 - p)) / 2; BOOST_LOG_TRIVIAL(debug) << "Negative binomial parameters p: " << p diff --git a/src/fastaq.cpp b/src/fastaq.cpp index e9a2e1fa..493778d9 100644 --- a/src/fastaq.cpp +++ b/src/fastaq.cpp @@ -1,7 +1,5 @@ #include "fastaq.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - Fastaq::Fastaq(bool gz, bool fq) : gzipped(gz) , fastq(fq) @@ -51,7 +49,6 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, const std::vector& covgs, const uint_least16_t global_covg, const std::string header) { - assert(name != ""); assert(covgs.size() == sequence.length()); auto mod_global_covg = global_covg; @@ -77,7 +74,6 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, void Fastaq::add_entry( const std::string& name, const std::string& sequence, const std::string header) { - assert(name != ""); names.push_back(name); diff --git a/src/interval.cpp b/src/interval.cpp index 3297baa0..2eed4dbf 100644 --- a/src/interval.cpp +++ b/src/interval.cpp @@ -1,7 +1,5 @@ #include "interval.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - Interval::Interval(uint32_t s, uint32_t e) : start(s) { diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index 1a754c41..d8b76e02 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -10,8 +10,6 @@ #include "kmergraph.h" #include "localPRG.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace prg; KmerGraph::KmerGraph() @@ -34,7 +32,6 @@ KmerGraph::KmerGraph(const KmerGraph& other) // create deep copies of the nodes, minus the edges for (const auto& node : other.nodes) { n = std::make_shared(*node); - assert(nodes.size() == n->id); nodes.push_back(n); sorted_nodes.insert(n); } @@ -66,7 +63,6 @@ KmerGraph& KmerGraph::operator=(const KmerGraph& other) // create deep copies of the nodes, minus the edges for (const auto& node : other.nodes) { n = std::make_shared(*node); - assert(nodes.size() == n->id); nodes.push_back(n); sorted_nodes.insert(n); } @@ -84,11 +80,7 @@ KmerGraph& KmerGraph::operator=(const KmerGraph& other) void KmerGraph::clear() { nodes.clear(); - assert(nodes.empty()); - sorted_nodes.clear(); - assert(sorted_nodes.empty()); - shortest_path_length = 0; k = 0; } @@ -105,12 +97,15 @@ KmerNodePtr KmerGraph::add_node(const prg::Path& p) KmerNodePtr n(std::make_shared(nodes.size(), p)); // create the node nodes.push_back(n); // add it to nodes sorted_nodes.insert(n); - assert(k == 0 or p.length() == 0 or p.length() == k); + + bool path_is_valid = k == 0 or p.length() == 0 or p.length() == k; + if (!path_is_valid) { + FatalError() << "In KmerGraph::add_node(), the node path is not valid (k is " << k + << ", p.length() is " << p.length(); + } if (k == 0 and p.length() > 0) { k = p.length(); } - assert(nodes.size() < std::numeric_limits::max() - || assert_msg("WARNING, reached max kmer graph node size")); if (nodes.size() == reserved_size) { reserved_size *= 2; nodes.reserve(reserved_size); @@ -124,7 +119,6 @@ KmerNodePtr KmerGraph::add_node_with_kh( KmerNodePtr n = add_node(p); n->khash = kh; n->num_AT = num; - assert(n->khash < std::numeric_limits::max()); return n; } @@ -135,12 +129,22 @@ bool condition::operator()(const KmerNodePtr kn) const { return kn->path == q; } void KmerGraph::add_edge(KmerNodePtr from, KmerNodePtr to) { - assert(from->id < nodes.size() and nodes[from->id] == from); - assert(to->id < nodes.size() and nodes[to->id] == to); - assert(from->path < to->path - or assert_msg("Cannot add edge from " << from->id << " to " << to->id - << " because " << from->path - << " is not less than " << to->path)); + bool from_node_is_valid = from->id < nodes.size() and nodes[from->id] == from; + if (!from_node_is_valid) { + FatalError() << "In KmerGraph::add_edge(), from node is invalid"; + } + + bool to_node_is_valid = to->id < nodes.size() and nodes[to->id] == to; + if (!to_node_is_valid) { + FatalError() << "In KmerGraph::add_edge(), to node is invalid"; + } + + bool path_order_is_valid = from->path < to->path; + if (!path_order_is_valid) { + FatalError() << "In KmerGraph::add_edge(), cannot add edge from " << from->id + << " to " << to->id << " because " << from->path + << " is not less than " << to->path << " (path order is invalid)"; + } if (from->find_node_ptr_in_out_nodes(to) == from->out_nodes.end()) { from->out_nodes.emplace_back(to); @@ -199,22 +203,34 @@ void KmerGraph::check() const { // should not have any leaves, only nodes with degree 0 are start and end for (auto c = sorted_nodes.begin(); c != sorted_nodes.end(); ++c) { - assert(!(*c)->in_nodes.empty() or (*c) == (*sorted_nodes.begin()) - || assert_msg( - "node" << **c << " has inNodes size " << (*c)->in_nodes.size())); - assert(!(*c)->out_nodes.empty() or (*c) == *(sorted_nodes.rbegin()) - || assert_msg("node" - << **c << " has outNodes size " << (*c)->out_nodes.size() - << " and isn't equal to back node " << **(sorted_nodes.rbegin()))); + bool is_start_node = (*c) == (*sorted_nodes.begin()); + bool is_end_node = (*c) == *(sorted_nodes.rbegin()); + bool indegree_zero = (*c)->in_nodes.empty(); + bool outdegree_zero = (*c)->out_nodes.empty(); + + if (indegree_zero and !is_start_node) { + FatalError() << "In KmerGraph::check(), node " << **c << "has indegree 0 and is not a start node"; + } + if (outdegree_zero and !is_end_node) { + FatalError() << "In KmerGraph::check(), node " << **c << "has outdegree 0 and is not an end node"; + } for (const auto& d : (*c)->out_nodes) { auto dAsSharedPtr = d.lock(); - assert((*c)->path < dAsSharedPtr->path - || assert_msg( - (*c)->path << " is not less than " << dAsSharedPtr->path)); - assert(find(c, sorted_nodes.end(), dAsSharedPtr) != sorted_nodes.end() - || assert_msg(dAsSharedPtr->id - << " does not occur later in sorted list than " << (*c)->id)); - } + bool c_path_is_less_than_neighbours_path = (*c)->path < dAsSharedPtr->path; + if (!c_path_is_less_than_neighbours_path) { + FatalError() << "In KmerGraph::check(), path " << (*c)->path + << " is not less than path " << dAsSharedPtr->path + << " (invalid neighbour path order)"; + } + + bool neighbour_is_later_in_topological_order = + find(c, sorted_nodes.end(), dAsSharedPtr) != sorted_nodes.end(); + if (!neighbour_is_later_in_topological_order) { + FatalError() << "In KmerGraph::check(), node " << dAsSharedPtr->id + << " does not occur later in sorted list than node " << (*c)->id + << ", but it should due to the topological order"; + } + } } } @@ -285,7 +301,13 @@ void KmerGraph::load(const fs::path& filepath) while (getline(myfile, line).good()) { if (line[0] == 'S') { split_line = split(line, "\t"); - assert(split_line.size() >= 4); + + bool line_is_consistent = split_line.size() >= 4; + if (!line_is_consistent) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\" " + << "is inconsistent"; + } + id = std::stoi(split_line[1]); num_nodes = std::max(num_nodes, id); } @@ -299,22 +321,36 @@ void KmerGraph::load(const fs::path& filepath) while (getline(myfile, line).good()) { if (line[0] == 'S') { split_line = split(line, "\t"); - assert(split_line.size() >= 4); + + bool line_is_consistent = split_line.size() >= 4; + if (!line_is_consistent) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\" " + << "is inconsistent"; + } + id = stoi(split_line[1]); ss << split_line[2]; char c = ss.peek(); - assert(isdigit(c) - or assert_msg("Cannot read in this sort of kmergraph GFA as it " - "does not label nodes " - "with their PRG path")); + + if (!isdigit(c)) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\": " + << "Cannot read in this sort of kmergraph GFA as it " + << "does not label nodes with their PRG path"; + } + ss >> p; ss.clear(); - // add_node(p); + KmerNodePtr kmer_node = std::make_shared(id, p); - assert(kmer_node != nullptr); - assert(id == nodes.size() or num_nodes - id == nodes.size() - or assert_msg("id " << id << " != " << nodes.size() - << " nodes.size() for kmergraph ")); + + bool id_is_consistent = (id == nodes.size() or num_nodes - id == nodes.size()); + if (!id_is_consistent) { + FatalError() << "In KmerGraph::load(), id is inconsistent." + << "id = " << id << ", " + << "nodes.size() = " << nodes.size() << ", " + << "num_nodes = " << num_nodes; + } + nodes.push_back(kmer_node); sorted_nodes.insert(kmer_node); if (k == 0 and p.length() > 0) { @@ -325,12 +361,29 @@ void KmerGraph::load(const fs::path& filepath) } } else if (line[0] == 'L') { split_line = split(line, "\t"); - assert(split_line.size() >= 5); - assert(stoi(split_line[1]) < (int)outnode_counts.size() - or assert_msg( - stoi(split_line[1]) << ">=" << outnode_counts.size())); - assert(stoi(split_line[3]) < (int)innode_counts.size() - or assert_msg(stoi(split_line[3]) << ">=" << innode_counts.size())); + + bool line_is_consistent = split_line.size() >= 5; + if (!line_is_consistent) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\" " + << "is inconsistent"; + } + + int from_node = stoi(split_line[1]); + int to_node = stoi(split_line[3]); + + bool from_node_in_range = from_node < (int)outnode_counts.size(); + bool to_node_in_range = to_node < (int)innode_counts.size(); + if (!from_node_in_range) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\": " + << "from_node out of range: " + << from_node << ">=" << outnode_counts.size(); + } + if (!to_node_in_range) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\": " + << "to_node out of range: " + << to_node << ">=" << innode_counts.size(); + } + outnode_counts[stoi(split_line[1])] += 1; innode_counts[stoi(split_line[3])] += 1; } @@ -342,12 +395,11 @@ void KmerGraph::load(const fs::path& filepath) id = 0; for (const auto& n : nodes) { - assert(nodes[id]->id == id); + bool id_is_consistent = (nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); + if (!id_is_consistent) { + FatalError() << "In KmerGraph::load(), Node: " << n << " has inconsistent id, should be " << id; + } id++; - assert(n->id < outnode_counts.size() - or assert_msg(n->id << ">=" << outnode_counts.size())); - assert(n->id < innode_counts.size() - or assert_msg(n->id << ">=" << innode_counts.size())); n->out_nodes.reserve(outnode_counts[n->id]); n->in_nodes.reserve(innode_counts[n->id]); } @@ -358,7 +410,13 @@ void KmerGraph::load(const fs::path& filepath) while (getline(myfile, line).good()) { if (line[0] == 'L') { split_line = split(line, "\t"); - assert(split_line.size() >= 5); + + bool line_is_consistent = split_line.size() >= 5; + if (!line_is_consistent) { + FatalError() << "In KmerGraph::load(), line \"" << line << "\" " + << "is inconsistent"; + } + if (split_line[2] == split_line[4]) { from = std::stoi(split_line[1]); to = std::stoi(split_line[3]); @@ -386,12 +444,6 @@ uint32_t KmerGraph::min_path_length() return shortest_path_length; } -#ifndef NDEBUG - // TODO: check if tests must be updated or not due to this (I think not - - // sorted_nodes is always sorted) if this is added, some tests bug, since it was not - // executed before... check(); -#endif - std::vector len( sorted_nodes.size(), 0); // length of shortest path from node i to end of graph for (uint32_t j = sorted_nodes.size() - 1; j != 0; --j) { diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp index a0117b30..8dfdc0be 100644 --- a/src/kmergraphwithcoverage.cpp +++ b/src/kmergraphwithcoverage.cpp @@ -15,22 +15,35 @@ using namespace prg; void KmerGraphWithCoverage::set_exp_depth_covg(const uint32_t edp) { - assert(edp > 0); + bool exp_depth_covg_parameter_is_valid = edp > 0; + if (!exp_depth_covg_parameter_is_valid) { + FatalError() << "In KmerGraphWithCoverage::set_exp_depth_covg(): exp_depth_covg is invalid, must be > 0, is " << edp; + } exp_depth_covg = edp; } void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate) { BOOST_LOG_TRIVIAL(debug) << "Set p in kmergraph"; - assert(kmer_prg->k != 0); - assert(0 < e_rate and e_rate < 1); + + bool valid_parameters_to_set_p = (kmer_prg->k != 0) && (0 < e_rate and e_rate < 1); + if (!valid_parameters_to_set_p) { + FatalError() << "In KmerGraphWithCoverage::set_binomial_parameter_p(): " + << "Parameters to set p are not valid " + << "(kmer_prg->k = " << kmer_prg->k << ", e_rate = " << e_rate << ")"; + } + binomial_parameter_p = 1 / exp(e_rate * kmer_prg->k); } void KmerGraphWithCoverage::increment_covg( uint32_t node_id, pandora::Strand strand, uint32_t sample_id) { - assert(this->node_index_to_sample_coverage[node_id].size() > sample_id); + bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; + if (!sample_is_valid) { + FatalError() << "In KmerGraphWithCoverage::increment_covg(), sample_id is invalid (" + << sample_id << ")"; + } // get a pointer to the value we want to increment uint16_t* coverage_ptr = nullptr; @@ -66,7 +79,12 @@ uint32_t KmerGraphWithCoverage::get_covg( void KmerGraphWithCoverage::set_covg( uint32_t node_id, uint16_t value, pandora::Strand strand, uint32_t sample_id) { - assert(this->node_index_to_sample_coverage[node_id].size() > sample_id); + bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; + if (!sample_is_valid) { + FatalError() << "In KmerGraphWithCoverage::set_covg(), sample_id is invalid (" + << sample_id << ")"; + } + if (strand == pandora::Strand::Forward) { this->node_index_to_sample_coverage[node_id][sample_id].first = value; } else { @@ -79,11 +97,17 @@ void KmerGraphWithCoverage::set_negative_binomial_parameters( { if (nbin_prob == 0 and nb_fail == 0) return; - assert((negative_binomial_parameter_p > 0 and negative_binomial_parameter_p < 1) - || assert_msg( - "nb_p " << negative_binomial_parameter_p << " was not set in kmergraph")); - assert(negative_binomial_parameter_r > 0 - || assert_msg("nb_r was not set in kmergraph")); + + bool negative_binomial_parameters_were_previously_set = + (negative_binomial_parameter_p > 0 and negative_binomial_parameter_p < 1) && + (negative_binomial_parameter_r > 0); + if (!(negative_binomial_parameters_were_previously_set)) { + FatalError() << "In KmerGraphWithCoverage::set_negative_binomial_parameters(): " + << "negative_binomial_parameter_p (" << negative_binomial_parameter_p << ")" + << " or negative_binomial_parameter_r (" << negative_binomial_parameter_r << ") " + << "were not correctly set"; + } + negative_binomial_parameter_p += nbin_prob; negative_binomial_parameter_r += nb_fail; } @@ -102,7 +126,11 @@ float KmerGraphWithCoverage::nbin_prob(uint32_t node_id, const uint32_t& sample_ float KmerGraphWithCoverage::lin_prob(uint32_t node_id, const uint32_t& sample_id) { - assert(num_reads != 0); + bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; + if (!reads_were_mapped_to_this_kmer_graph) { + FatalError() << "In KmerGraphWithCoverage::lin_prob(): impossible to compute " + << "lin_prob, no reads were mapped to this kmer graph"; + } auto k = this->get_forward_covg(node_id, sample_id) + this->get_reverse_covg(node_id, sample_id); return log(float(k) / num_reads); @@ -110,20 +138,29 @@ float KmerGraphWithCoverage::lin_prob(uint32_t node_id, const uint32_t& sample_i float KmerGraphWithCoverage::bin_prob(uint32_t node_id, const uint32_t& sample_id) { - assert(num_reads != 0); + bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; + if (!reads_were_mapped_to_this_kmer_graph) { + FatalError() << "In KmerGraphWithCoverage::bin_prob(): impossible to compute " + << "bin_prob, no reads were mapped to this kmer graph"; + } return bin_prob(node_id, num_reads, sample_id); } float KmerGraphWithCoverage::bin_prob( const uint32_t& node_id, const uint32_t& num, const uint32_t& sample_id) { - assert(binomial_parameter_p != 1); - assert(node_id < kmer_prg->nodes.size()); -#ifndef NDEBUG - // TODO: check if tests must be updated or not due to this (I think not - - // sorted_nodes is always sorted) if this is added, some tests bug, since it was not - // executed before... check(); -#endif + bool binomial_parameter_p_is_set_correctly = binomial_parameter_p != 1; + if (!binomial_parameter_p_is_set_correctly) { + FatalError() << "In KmerGraphWithCoverage::bin_prob(): " + << "binomial_parameter_p (" << binomial_parameter_p << ")" + << " is not correctly set"; + } + + bool node_exists = node_id < kmer_prg->nodes.size(); + if (!node_exists) { + FatalError() << "In KmerGraphWithCoverage::bin_prob(): " + << "attempt to access inexistent node" << node_id; + } uint32_t sum_coverages = this->get_forward_covg(node_id, sample_id) + this->get_reverse_covg(node_id, sample_id); @@ -152,18 +189,22 @@ float KmerGraphWithCoverage::get_prob( const std::string& prob_model, const uint32_t& node_id, const uint32_t& sample_id) { if (prob_model == "nbin") { + // is there no parameter check here? return nbin_prob(node_id, sample_id); } else if (prob_model == "bin") { - assert(binomial_parameter_p < 1 - || assert_msg("binomial_parameter_p was not set in kmergraph")); - assert(num_reads > 0 || assert_msg("num_reads was not set in kmergraph")); + bool binomial_parameters_are_ok = (binomial_parameter_p < 1) && (num_reads > 0); + if (!binomial_parameters_are_ok) { + FatalError() << "In KmerGraphWithCoverage::get_prob(): binomial parameters " + << "are not ok (binomial_parameter_p = " << binomial_parameter_p << ", " + << "num_reads = " << num_reads; + } return bin_prob(node_id, sample_id); } else if (prob_model == "lin") { + // is there no parameter check here? return lin_prob(node_id, sample_id); } else { - BOOST_LOG_TRIVIAL(warning) << "Invalid probability model for kmer coverage " - "distribution: should be nbin, bin or lin"; - exit(1); + FatalError() << "Invalid probability model for kmer coverage distribution: " + << "should be nbin, bin or lin"; } } @@ -246,6 +287,9 @@ float KmerGraphWithCoverage::find_max_path(std::vector& maxpath, max_sum_of_log_probs_from_node[current_node->id] -= get_prob(prob_model, sorted_nodes[prev_node]->id, sample_id); length_of_maxpath_from_node[current_node->id] -= 1; + + // this remains as an assert, as it is a code check + // Note: I think we might even be able to remove this assert(length_of_maxpath_from_node[current_node->id] == max_num_kmers_to_average); } @@ -274,8 +318,11 @@ float KmerGraphWithCoverage::find_max_path(std::vector& maxpath, } } - assert(length_of_maxpath_from_node[0] > 0 - || assert_msg("found no path through kmer prg")); + bool path_was_found_through_the_kmer_PRG = length_of_maxpath_from_node[0] > 0; + if (!path_was_found_through_the_kmer_PRG) { + FatalError() << "In KmerGraphWithCoverage::find_max_path(), found no path through kmer prg"; + } + return prob_path(maxpath, sample_id, prob_model); } @@ -410,7 +457,13 @@ void KmerGraphWithCoverage::load(const std::string& filepath) while (getline(myfile, line).good()) { if (line[0] == 'S') { split_line = split(line, "\t"); - assert(split_line.size() >= 4); + + bool line_is_consistent = split_line.size() >= 4; + if (!line_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " + << "is inconsistent"; + } + id = std::stoi(split_line[1]); num_nodes = std::max(num_nodes, id); } @@ -424,23 +477,36 @@ void KmerGraphWithCoverage::load(const std::string& filepath) while (getline(myfile, line).good()) { if (line[0] == 'S') { split_line = split(line, "\t"); - assert(split_line.size() >= 4); + + bool line_is_consistent = split_line.size() >= 4; + if (!line_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " + << "is inconsistent"; + } + id = stoi(split_line[1]); ss << split_line[2]; char c = ss.peek(); - assert(isdigit(c) - or assert_msg("Cannot read in this sort of kmergraph GFA as it " - "does not label nodes " - "with their PRG path")); + + if (!isdigit(c)) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " + << "Cannot read in this sort of kmergraph GFA as it " + << "does not label nodes with their PRG path"; + } + ss >> p; ss.clear(); // add_node(p); KmerNodePtr n = std::make_shared(id, p); - assert(n != nullptr); - assert(id == kmer_prg->nodes.size() - or num_nodes - id == kmer_prg->nodes.size() - or assert_msg("id " << id << " != " << kmer_prg->nodes.size() - << " nodes.size() for kmergraph ")); + + bool id_is_consistent = (id == kmer_prg->nodes.size() or num_nodes - id == kmer_prg->nodes.size()); + if (!id_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), id is inconsistent." + << "id = " << id << ", " + << "nodes.size() = " << kmer_prg->nodes.size() << ", " + << "num_nodes = " << num_nodes; + } + kmer_prg->nodes.push_back(n); kmer_prg->sorted_nodes.insert(n); if (kmer_prg->k == 0 and p.length() > 0) { @@ -455,12 +521,29 @@ void KmerGraphWithCoverage::load(const std::string& filepath) } } else if (line[0] == 'L') { split_line = split(line, "\t"); - assert(split_line.size() >= 5); - assert(stoi(split_line[1]) < (int)outnode_counts.size() - or assert_msg( - stoi(split_line[1]) << ">=" << outnode_counts.size())); - assert(stoi(split_line[3]) < (int)innode_counts.size() - or assert_msg(stoi(split_line[3]) << ">=" << innode_counts.size())); + + bool line_is_consistent = split_line.size() >= 5; + if (!line_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " + << "is inconsistent"; + } + + int from_node = stoi(split_line[1]); + int to_node = stoi(split_line[3]); + + bool from_node_in_range = from_node < (int)outnode_counts.size(); + bool to_node_in_range = to_node < (int)innode_counts.size(); + if (!from_node_in_range) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " + << "from_node out of range: " + << from_node << ">=" << outnode_counts.size(); + } + if (!to_node_in_range) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " + << "to_node out of range: " + << to_node << ">=" << innode_counts.size(); + } + outnode_counts[stoi(split_line[1])] += 1; innode_counts[stoi(split_line[3])] += 1; } @@ -472,12 +555,11 @@ void KmerGraphWithCoverage::load(const std::string& filepath) id = 0; for (const auto& n : kmer_prg->nodes) { - assert(kmer_prg->nodes[id]->id == id); + bool id_is_consistent = (kmer_prg->nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); + if (!id_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), Node: " << n << " has inconsistent id, should be " << id; + } id++; - assert(n->id < outnode_counts.size() - or assert_msg(n->id << ">=" << outnode_counts.size())); - assert(n->id < innode_counts.size() - or assert_msg(n->id << ">=" << innode_counts.size())); n->out_nodes.reserve(outnode_counts[n->id]); n->in_nodes.reserve(innode_counts[n->id]); } @@ -488,7 +570,13 @@ void KmerGraphWithCoverage::load(const std::string& filepath) while (getline(myfile, line).good()) { if (line[0] == 'L') { split_line = split(line, "\t"); - assert(split_line.size() >= 5); + + bool line_is_consistent = split_line.size() >= 5; + if (!line_is_consistent) { + FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " + << "is inconsistent"; + } + if (split_line[2] == split_line[4]) { from = std::stoi(split_line[1]); to = std::stoi(split_line[3]); diff --git a/src/prg/path.cpp b/src/prg/path.cpp index 39f82bb1..bedadaa5 100644 --- a/src/prg/path.cpp +++ b/src/prg/path.cpp @@ -62,7 +62,7 @@ std::vector prg::Path::nodes_along_path(const LocalPRG& localPrg) // redudant call, return the memoized local node path return memoizedLocalNodePath; } else { - fatal_error("Bug on prg::Path::nodes_along_path()"); + FatalError() << "Bug on prg::Path::nodes_along_path()"; } } diff --git a/src/utils.cpp b/src/utils.cpp index 0efe9a4a..1c698ad6 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -531,20 +531,11 @@ uint32_t pangraph_from_read_file(const std::string& filepath, return covg; } -void fatal_error(const string& message) -{ - cerr << endl << endl << "[FATAL ERROR] " << message << endl << endl; - cerr.flush(); - exit(1); -} - void open_file_for_reading(const std::string& file_path, std::ifstream& stream) { stream.open(file_path); if (!stream.is_open()) { - std::stringstream ss; - ss << "Error opening file " << file_path; - fatal_error(ss.str()); + FatalError() << "Error opening file " << file_path; } } @@ -552,9 +543,7 @@ void open_file_for_writing(const std::string& file_path, std::ofstream& stream) { stream.open(file_path); if (!stream.is_open()) { - std::stringstream ss; - ss << "Error opening file " << file_path; - fatal_error(ss.str()); + FatalError() << "Error opening file " << file_path; } } From fda36fcd8d09ce183a30534f6b6e3fe4fc892d0f Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Mon, 18 Jan 2021 14:44:47 -0400 Subject: [PATCH 02/37] Fixing error message https://github.com/rmcolq/pandora/pull/257#discussion_r557789633 --- include/utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/utils.h b/include/utils.h index 77d1c133..516e96d5 100644 --- a/include/utils.h +++ b/include/utils.h @@ -105,7 +105,7 @@ void infer_most_likely_prg_path_for_pannode( class FatalError { public: FatalError(){ - BOOST_LOG_TRIVIAL(error) << std::endl << std::endl << "[FATAL ERROR] "; + BOOST_LOG_TRIVIAL(error) << "[FATAL ERROR] "; } virtual ~FatalError() { From 8035567a4b052a3a7849dc4577d35bec0a0b22f3 Mon Sep 17 00:00:00 2001 From: leoisl Date: Mon, 18 Jan 2021 14:46:02 -0400 Subject: [PATCH 03/37] Update src/estimate_parameters.cpp Co-authored-by: Michael Hall --- src/estimate_parameters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/estimate_parameters.cpp b/src/estimate_parameters.cpp index 32f88a3c..d04c84eb 100644 --- a/src/estimate_parameters.cpp +++ b/src/estimate_parameters.cpp @@ -49,7 +49,7 @@ void fit_negative_binomial(double& mean, double& variance, float& p, float& r) bool negative_binomial_parameters_are_ok = mean > 0 and variance > 0 and mean < variance; if (!negative_binomial_parameters_are_ok) { - FatalError() << "In fit_negative_binomial(): parameters are invalid " + FatalError() << "Negative binomial parameters are invalid " << "(mean is " << mean << ", variance is " << variance << ")"; } p = mean / variance; From 460849c6e2a4d058d96afd327702544508b7ed21 Mon Sep 17 00:00:00 2001 From: leoisl Date: Mon, 18 Jan 2021 14:50:06 -0400 Subject: [PATCH 04/37] Update src/kmergraph.cpp Co-authored-by: Michael Hall --- src/kmergraph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index d8b76e02..270bb0d4 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -98,7 +98,7 @@ KmerNodePtr KmerGraph::add_node(const prg::Path& p) nodes.push_back(n); // add it to nodes sorted_nodes.insert(n); - bool path_is_valid = k == 0 or p.length() == 0 or p.length() == k; + const bool path_is_valid = k == 0 or p.length() == 0 or p.length() == k; if (!path_is_valid) { FatalError() << "In KmerGraph::add_node(), the node path is not valid (k is " << k << ", p.length() is " << p.length(); @@ -494,4 +494,4 @@ bool KmerGraph::operator==(const KmerGraph& other_graph) const bool pCompKmerNode::operator()(KmerNodePtr lhs, KmerNodePtr rhs) { return (lhs->path) < (rhs->path); -} \ No newline at end of file +} From 106c061544703494abb2876927b35325d3f831f2 Mon Sep 17 00:00:00 2001 From: leoisl Date: Mon, 18 Jan 2021 15:05:02 -0400 Subject: [PATCH 05/37] Update src/kmergraph.cpp Co-authored-by: Michael Hall --- src/kmergraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index 270bb0d4..d8bb3480 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -129,7 +129,7 @@ bool condition::operator()(const KmerNodePtr kn) const { return kn->path == q; } void KmerGraph::add_edge(KmerNodePtr from, KmerNodePtr to) { - bool from_node_is_valid = from->id < nodes.size() and nodes[from->id] == from; + const bool from_node_is_valid = from->id < nodes.size() and nodes[from->id] == from; if (!from_node_is_valid) { FatalError() << "In KmerGraph::add_edge(), from node is invalid"; } From efb6be578c3ad5658af1b07933d3df793db502a9 Mon Sep 17 00:00:00 2001 From: leoisl Date: Mon, 18 Jan 2021 15:05:34 -0400 Subject: [PATCH 06/37] Update src/kmergraph.cpp Co-authored-by: Michael Hall --- src/kmergraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index d8bb3480..b13aabdf 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -134,7 +134,7 @@ void KmerGraph::add_edge(KmerNodePtr from, KmerNodePtr to) FatalError() << "In KmerGraph::add_edge(), from node is invalid"; } - bool to_node_is_valid = to->id < nodes.size() and nodes[to->id] == to; + const bool to_node_is_valid = to->id < nodes.size() and nodes[to->id] == to; if (!to_node_is_valid) { FatalError() << "In KmerGraph::add_edge(), to node is invalid"; } From 84dd5f0000b4e17e0f817a306fdf2c2d8a4648da Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 19 Jan 2021 21:59:29 -0400 Subject: [PATCH 07/37] Adding backward to produce meaningful stacktraces --- CMakeLists.txt | 10 + README.md | 5 + cmake/Hunter/config.cmake | 14 +- include/backward.hpp | 4406 +++++++++++++++++ .../build_portable_binary_core.sh | 6 +- src/backward.cpp | 42 + test/CMakeLists.txt | 1 + 7 files changed, 4479 insertions(+), 5 deletions(-) create mode 100644 include/backward.hpp create mode 100644 src/backward.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ab9ca2f5..6e879bde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,15 @@ project(${PROJECT_NAME_STR} VERSION "0.7.0" LANGUAGES C CXX) configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) +if(PRINT_STACKTRACE) + message(STATUS "Printing meaningful stacktrace enabled, please have binutils-dev installed") + add_compile_definitions(BACKWARD_HAS_BFD=1) + set(BACKWARD_LIBRARIES "-lbfd") +else() + set(BACKWARD_LIBRARIES "") +endif() + + # add a RELEASE_WITH_ASSERTS build type - TODO: FIX THIS set(CMAKE_CXX_FLAGS_RELEASE_WITH_ASSERTS "-O3") @@ -122,6 +131,7 @@ target_link_libraries(${PROJECT_NAME} ${ZLIB_LIBRARY} ${CMAKE_DL_LIBS} ${STATIC_C_CXX} + ${BACKWARD_LIBRARIES} ) enable_testing() diff --git a/README.md b/README.md index d8387805..d4ddb82f 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,11 @@ make -j4 ctest -VV ``` +* If you want to produce meaningful stack traces in case `pandora` errors out, `binutils-dev` must be installed and the + `cmake` command above must be changed to: + +`cmake -DPRINT_STACKTRACE=True ..` + ## Usage See [Usage](https://github.com/rmcolq/pandora/wiki/Usage). diff --git a/cmake/Hunter/config.cmake b/cmake/Hunter/config.cmake index 92df9d0e..f661c65a 100644 --- a/cmake/Hunter/config.cmake +++ b/cmake/Hunter/config.cmake @@ -1,5 +1,15 @@ hunter_config( - Boost + Boost + VERSION + "1.62.0" + CONFIGURATION_TYPES + Debug +) + +hunter_config( + GTest VERSION - "1.62.0" + "1.10.0" + CONFIGURATION_TYPES + Debug ) diff --git a/include/backward.hpp b/include/backward.hpp new file mode 100644 index 00000000..24500a34 --- /dev/null +++ b/include/backward.hpp @@ -0,0 +1,4406 @@ +/* + * backward.hpp + * Copyright 2013 Google Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef H_6B9572DA_A64B_49E6_B234_051480991C89 +#define H_6B9572DA_A64B_49E6_B234_051480991C89 + +#ifndef __cplusplus +#error "It's not going to compile without a C++ compiler..." +#endif + +#if defined(BACKWARD_CXX11) +#elif defined(BACKWARD_CXX98) +#else +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800) +#define BACKWARD_CXX11 +#define BACKWARD_ATLEAST_CXX11 +#define BACKWARD_ATLEAST_CXX98 +#else +#define BACKWARD_CXX98 +#define BACKWARD_ATLEAST_CXX98 +#endif +#endif + +// You can define one of the following (or leave it to the auto-detection): +// +// #define BACKWARD_SYSTEM_LINUX +// - specialization for linux +// +// #define BACKWARD_SYSTEM_DARWIN +// - specialization for Mac OS X 10.5 and later. +// +// #define BACKWARD_SYSTEM_WINDOWS +// - specialization for Windows (Clang 9 and MSVC2017) +// +// #define BACKWARD_SYSTEM_UNKNOWN +// - placebo implementation, does nothing. +// +#if defined(BACKWARD_SYSTEM_LINUX) +#elif defined(BACKWARD_SYSTEM_DARWIN) +#elif defined(BACKWARD_SYSTEM_UNKNOWN) +#elif defined(BACKWARD_SYSTEM_WINDOWS) +#else +#if defined(__linux) || defined(__linux__) +#define BACKWARD_SYSTEM_LINUX +#elif defined(__APPLE__) +#define BACKWARD_SYSTEM_DARWIN +#elif defined(_WIN32) +#define BACKWARD_SYSTEM_WINDOWS +#else +#define BACKWARD_SYSTEM_UNKNOWN +#endif +#endif + +#define NOINLINE __attribute__((noinline)) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(BACKWARD_SYSTEM_LINUX) + +// On linux, backtrace can back-trace or "walk" the stack using the following +// libraries: +// +// #define BACKWARD_HAS_UNWIND 1 +// - unwind comes from libgcc, but I saw an equivalent inside clang itself. +// - with unwind, the stacktrace is as accurate as it can possibly be, since +// this is used by the C++ runtine in gcc/clang for stack unwinding on +// exception. +// - normally libgcc is already linked to your program by default. +// +// #define BACKWARD_HAS_LIBUNWIND 1 +// - libunwind provides, in some cases, a more accurate stacktrace as it knows +// to decode signal handler frames and lets us edit the context registers when +// unwinding, allowing stack traces over bad function references. +// +// #define BACKWARD_HAS_BACKTRACE == 1 +// - backtrace seems to be a little bit more portable than libunwind, but on +// linux, it uses unwind anyway, but abstract away a tiny information that is +// sadly really important in order to get perfectly accurate stack traces. +// - backtrace is part of the (e)glib library. +// +// The default is: +// #define BACKWARD_HAS_UNWIND == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_UNWIND == 1 +#elif BACKWARD_HAS_LIBUNWIND == 1 +#elif BACKWARD_HAS_BACKTRACE == 1 +#else +#undef BACKWARD_HAS_UNWIND +#define BACKWARD_HAS_UNWIND 1 +#undef BACKWARD_HAS_LIBUNWIND +#define BACKWARD_HAS_LIBUNWIND 0 +#undef BACKWARD_HAS_BACKTRACE +#define BACKWARD_HAS_BACKTRACE 0 +#endif + +// On linux, backward can extract detailed information about a stack trace +// using one of the following libraries: +// +// #define BACKWARD_HAS_DW 1 +// - libdw gives you the most juicy details out of your stack traces: +// - object filename +// - function name +// - source filename +// - line and column numbers +// - source code snippet (assuming the file is accessible) +// - variables name and values (if not optimized out) +// - You need to link with the lib "dw": +// - apt-get install libdw-dev +// - g++/clang++ -ldw ... +// +// #define BACKWARD_HAS_BFD 1 +// - With libbfd, you get a fair amount of details: +// - object filename +// - function name +// - source filename +// - line numbers +// - source code snippet (assuming the file is accessible) +// - You need to link with the lib "bfd": +// - apt-get install binutils-dev +// - g++/clang++ -lbfd ... +// +// #define BACKWARD_HAS_DWARF 1 +// - libdwarf gives you the most juicy details out of your stack traces: +// - object filename +// - function name +// - source filename +// - line and column numbers +// - source code snippet (assuming the file is accessible) +// - variables name and values (if not optimized out) +// - You need to link with the lib "dwarf": +// - apt-get install libdwarf-dev +// - g++/clang++ -ldwarf ... +// +// #define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +// - backtrace provides minimal details for a stack trace: +// - object filename +// - function name +// - backtrace is part of the (e)glib library. +// +// The default is: +// #define BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_DW == 1 +#elif BACKWARD_HAS_BFD == 1 +#elif BACKWARD_HAS_DWARF == 1 +#elif BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +#else +#undef BACKWARD_HAS_DW +#define BACKWARD_HAS_DW 0 +#undef BACKWARD_HAS_BFD +#define BACKWARD_HAS_BFD 0 +#undef BACKWARD_HAS_DWARF +#define BACKWARD_HAS_DWARF 0 +#undef BACKWARD_HAS_BACKTRACE_SYMBOL +#define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +#endif + +#include +#include +#ifdef __ANDROID__ +// Old Android API levels define _Unwind_Ptr in both link.h and +// unwind.h Rename the one in link.h as we are not going to be using +// it +#define _Unwind_Ptr _Unwind_Ptr_Custom +#include +#undef _Unwind_Ptr +#else +#include +#endif +#include +#include +#include +#include + +#if BACKWARD_HAS_BFD == 1 +// NOTE: defining PACKAGE{,_VERSION} is required before including +// bfd.h on some platforms, see also: +// https://sourceware.org/bugzilla/show_bug.cgi?id=14243 +#ifndef PACKAGE +#define PACKAGE +#endif +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION +#endif +#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#include +#undef _GNU_SOURCE +#else +#include +#endif +#endif + +#if BACKWARD_HAS_DW == 1 +#include +#include +#include +#endif + +#if BACKWARD_HAS_DWARF == 1 +#include +#include +#include +#include +#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#include +#undef _GNU_SOURCE +#else +#include +#endif +#endif + +#if (BACKWARD_HAS_BACKTRACE == 1) || (BACKWARD_HAS_BACKTRACE_SYMBOL == 1) +// then we shall rely on backtrace +#include +#endif + +#endif // defined(BACKWARD_SYSTEM_LINUX) + +#if defined(BACKWARD_SYSTEM_DARWIN) +// On Darwin, backtrace can back-trace or "walk" the stack using the following +// libraries: +// +// #define BACKWARD_HAS_UNWIND 1 +// - unwind comes from libgcc, but I saw an equivalent inside clang itself. +// - with unwind, the stacktrace is as accurate as it can possibly be, since +// this is used by the C++ runtine in gcc/clang for stack unwinding on +// exception. +// - normally libgcc is already linked to your program by default. +// +// #define BACKWARD_HAS_LIBUNWIND 1 +// - libunwind comes from clang, which implements an API compatible version. +// - libunwind provides, in some cases, a more accurate stacktrace as it knows +// to decode signal handler frames and lets us edit the context registers when +// unwinding, allowing stack traces over bad function references. +// +// #define BACKWARD_HAS_BACKTRACE == 1 +// - backtrace is available by default, though it does not produce as much +// information as another library might. +// +// The default is: +// #define BACKWARD_HAS_UNWIND == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_UNWIND == 1 +#elif BACKWARD_HAS_BACKTRACE == 1 +#elif BACKWARD_HAS_LIBUNWIND == 1 +#else +#undef BACKWARD_HAS_UNWIND +#define BACKWARD_HAS_UNWIND 1 +#undef BACKWARD_HAS_BACKTRACE +#define BACKWARD_HAS_BACKTRACE 0 +#undef BACKWARD_HAS_LIBUNWIND +#define BACKWARD_HAS_LIBUNWIND 0 +#endif + +// On Darwin, backward can extract detailed information about a stack trace +// using one of the following libraries: +// +// #define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +// - backtrace provides minimal details for a stack trace: +// - object filename +// - function name +// +// The default is: +// #define BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +// +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +#else +#undef BACKWARD_HAS_BACKTRACE_SYMBOL +#define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +#endif + +#include +#include +#include +#include +#include +#include + +#if (BACKWARD_HAS_BACKTRACE == 1) || (BACKWARD_HAS_BACKTRACE_SYMBOL == 1) +#include +#endif +#endif // defined(BACKWARD_SYSTEM_DARWIN) + +#if defined(BACKWARD_SYSTEM_WINDOWS) + +#include +#include +#include + +#include +typedef SSIZE_T ssize_t; + +#define NOMINMAX +#include +#include + +#include +#include + +#ifndef __clang__ +#undef NOINLINE +#define NOINLINE __declspec(noinline) +#endif + +#pragma comment(lib, "psapi.lib") +#pragma comment(lib, "dbghelp.lib") + +// Comment / packing is from stackoverflow: +// https://stackoverflow.com/questions/6205981/windows-c-stack-trace-from-a-running-app/28276227#28276227 +// Some versions of imagehlp.dll lack the proper packing directives themselves +// so we need to do it. +#pragma pack(push, before_imagehlp, 8) +#include +#pragma pack(pop, before_imagehlp) + +// TODO maybe these should be undefined somewhere else? +#undef BACKWARD_HAS_UNWIND +#undef BACKWARD_HAS_BACKTRACE +#if BACKWARD_HAS_PDB_SYMBOL == 1 +#else +#undef BACKWARD_HAS_PDB_SYMBOL +#define BACKWARD_HAS_PDB_SYMBOL 1 +#endif + +#endif + +#if BACKWARD_HAS_UNWIND == 1 + +#include +// while gcc's unwind.h defines something like that: +// extern _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *); +// extern _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *); +// +// clang's unwind.h defines something like this: +// uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context); +// +// Even if the _Unwind_GetIPInfo can be linked to, it is not declared, worse we +// cannot just redeclare it because clang's unwind.h doesn't define _Unwind_Ptr +// anyway. +// +// Luckily we can play on the fact that the guard macros have a different name: +#ifdef __CLANG_UNWIND_H +// In fact, this function still comes from libgcc (on my different linux boxes, +// clang links against libgcc). +#include +extern "C" uintptr_t _Unwind_GetIPInfo(_Unwind_Context *, int *); +#endif + +#endif // BACKWARD_HAS_UNWIND == 1 + +#if BACKWARD_HAS_LIBUNWIND == 1 +#define UNW_LOCAL_ONLY +#include +#endif // BACKWARD_HAS_LIBUNWIND == 1 + +#ifdef BACKWARD_ATLEAST_CXX11 +#include +#include // for std::swap +namespace backward { +namespace details { +template struct hashtable { + typedef std::unordered_map type; +}; +using std::move; +} // namespace details +} // namespace backward +#else // NOT BACKWARD_ATLEAST_CXX11 +#define nullptr NULL +#define override +#include +namespace backward { +namespace details { +template struct hashtable { + typedef std::map type; +}; +template const T &move(const T &v) { return v; } +template T &move(T &v) { return v; } +} // namespace details +} // namespace backward +#endif // BACKWARD_ATLEAST_CXX11 + +namespace backward { +namespace details { +#if defined(BACKWARD_SYSTEM_WINDOWS) +const char kBackwardPathDelimiter[] = ";"; +#else +const char kBackwardPathDelimiter[] = ":"; +#endif +} // namespace details +} // namespace backward + +namespace backward { + +namespace system_tag { +struct linux_tag; // seems that I cannot call that "linux" because the name +// is already defined... so I am adding _tag everywhere. +struct darwin_tag; +struct windows_tag; +struct unknown_tag; + +#if defined(BACKWARD_SYSTEM_LINUX) +typedef linux_tag current_tag; +#elif defined(BACKWARD_SYSTEM_DARWIN) +typedef darwin_tag current_tag; +#elif defined(BACKWARD_SYSTEM_WINDOWS) +typedef windows_tag current_tag; +#elif defined(BACKWARD_SYSTEM_UNKNOWN) +typedef unknown_tag current_tag; +#else +#error "May I please get my system defines?" +#endif +} // namespace system_tag + +namespace trace_resolver_tag { +#if defined(BACKWARD_SYSTEM_LINUX) +struct libdw; +struct libbfd; +struct libdwarf; +struct backtrace_symbol; + +#if BACKWARD_HAS_DW == 1 +typedef libdw current; +#elif BACKWARD_HAS_BFD == 1 +typedef libbfd current; +#elif BACKWARD_HAS_DWARF == 1 +typedef libdwarf current; +#elif BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +typedef backtrace_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#elif defined(BACKWARD_SYSTEM_DARWIN) +struct backtrace_symbol; + +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +typedef backtrace_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#elif defined(BACKWARD_SYSTEM_WINDOWS) +struct pdb_symbol; +#if BACKWARD_HAS_PDB_SYMBOL == 1 +typedef pdb_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#endif +} // namespace trace_resolver_tag + +namespace details { + +template struct rm_ptr { typedef T type; }; + +template struct rm_ptr { typedef T type; }; + +template struct rm_ptr { typedef const T type; }; + +template struct deleter { + template void operator()(U &ptr) const { (*F)(ptr); } +}; + +template struct default_delete { + void operator()(T &ptr) const { delete ptr; } +}; + +template > +class handle { + struct dummy; + T _val; + bool _empty; + +#ifdef BACKWARD_ATLEAST_CXX11 + handle(const handle &) = delete; + handle &operator=(const handle &) = delete; +#endif + +public: + ~handle() { + if (!_empty) { + Deleter()(_val); + } + } + + explicit handle() : _val(), _empty(true) {} + explicit handle(T val) : _val(val), _empty(false) { + if (!_val) + _empty = true; + } + +#ifdef BACKWARD_ATLEAST_CXX11 + handle(handle &&from) : _empty(true) { swap(from); } + handle &operator=(handle &&from) { + swap(from); + return *this; + } +#else + explicit handle(const handle &from) : _empty(true) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + } + handle &operator=(const handle &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + return *this; + } +#endif + + void reset(T new_val) { + handle tmp(new_val); + swap(tmp); + } + + void update(T new_val) { + _val = new_val; + _empty = !static_cast(new_val); + } + + operator const dummy *() const { + if (_empty) { + return nullptr; + } + return reinterpret_cast(_val); + } + T get() { return _val; } + T release() { + _empty = true; + return _val; + } + void swap(handle &b) { + using std::swap; + swap(b._val, _val); // can throw, we are safe here. + swap(b._empty, _empty); // should not throw: if you cannot swap two + // bools without throwing... It's a lost cause anyway! + } + + T &operator->() { return _val; } + const T &operator->() const { return _val; } + + typedef typename rm_ptr::type &ref_t; + typedef const typename rm_ptr::type &const_ref_t; + ref_t operator*() { return *_val; } + const_ref_t operator*() const { return *_val; } + ref_t operator[](size_t idx) { return _val[idx]; } + + // Watch out, we've got a badass over here + T *operator&() { + _empty = false; + return &_val; + } +}; + +// Default demangler implementation (do nothing). +template struct demangler_impl { + static std::string demangle(const char *funcname) { return funcname; } +}; + +#if defined(BACKWARD_SYSTEM_LINUX) || defined(BACKWARD_SYSTEM_DARWIN) + +template <> struct demangler_impl { + demangler_impl() : _demangle_buffer_length(0) {} + + std::string demangle(const char *funcname) { + using namespace details; + char *result = abi::__cxa_demangle(funcname, _demangle_buffer.get(), + &_demangle_buffer_length, nullptr); + if (result) { + _demangle_buffer.update(result); + return result; + } + return funcname; + } + +private: + details::handle _demangle_buffer; + size_t _demangle_buffer_length; +}; + +#endif // BACKWARD_SYSTEM_LINUX || BACKWARD_SYSTEM_DARWIN + +struct demangler : public demangler_impl {}; + +// Split a string on the platform's PATH delimiter. Example: if delimiter +// is ":" then: +// "" --> [] +// ":" --> ["",""] +// "::" --> ["","",""] +// "/a/b/c" --> ["/a/b/c"] +// "/a/b/c:/d/e/f" --> ["/a/b/c","/d/e/f"] +// etc. +inline std::vector split_source_prefixes(const std::string &s) { + std::vector out; + size_t last = 0; + size_t next = 0; + size_t delimiter_size = sizeof(kBackwardPathDelimiter) - 1; + while ((next = s.find(kBackwardPathDelimiter, last)) != std::string::npos) { + out.push_back(s.substr(last, next - last)); + last = next + delimiter_size; + } + if (last <= s.length()) { + out.push_back(s.substr(last)); + } + return out; +} + +} // namespace details + +/*************** A TRACE ***************/ + +struct Trace { + void *addr; + size_t idx; + + Trace() : addr(nullptr), idx(0) {} + + explicit Trace(void *_addr, size_t _idx) : addr(_addr), idx(_idx) {} +}; + +struct ResolvedTrace : public Trace { + + struct SourceLoc { + std::string function; + std::string filename; + unsigned line; + unsigned col; + + SourceLoc() : line(0), col(0) {} + + bool operator==(const SourceLoc &b) const { + return function == b.function && filename == b.filename && + line == b.line && col == b.col; + } + + bool operator!=(const SourceLoc &b) const { return !(*this == b); } + }; + + // In which binary object this trace is located. + std::string object_filename; + + // The function in the object that contain the trace. This is not the same + // as source.function which can be an function inlined in object_function. + std::string object_function; + + // The source location of this trace. It is possible for filename to be + // empty and for line/col to be invalid (value 0) if this information + // couldn't be deduced, for example if there is no debug information in the + // binary object. + SourceLoc source; + + // An optionals list of "inliners". All the successive sources location + // from where the source location of the trace (the attribute right above) + // is inlined. It is especially useful when you compiled with optimization. + typedef std::vector source_locs_t; + source_locs_t inliners; + + ResolvedTrace() : Trace() {} + ResolvedTrace(const Trace &mini_trace) : Trace(mini_trace) {} +}; + +/*************** STACK TRACE ***************/ + +// default implemention. +template class StackTraceImpl { +public: + size_t size() const { return 0; } + Trace operator[](size_t) const { return Trace(); } + size_t load_here(size_t = 0) { return 0; } + size_t load_from(void *, size_t = 0, void * = nullptr, void * = nullptr) { + return 0; + } + size_t thread_id() const { return 0; } + void skip_n_firsts(size_t) {} +}; + +class StackTraceImplBase { +public: + StackTraceImplBase() + : _thread_id(0), _skip(0), _context(nullptr), _error_addr(nullptr) {} + + size_t thread_id() const { return _thread_id; } + + void skip_n_firsts(size_t n) { _skip = n; } + +protected: + void load_thread_info() { +#ifdef BACKWARD_SYSTEM_LINUX +#ifndef __ANDROID__ + _thread_id = static_cast(syscall(SYS_gettid)); +#else + _thread_id = static_cast(gettid()); +#endif + if (_thread_id == static_cast(getpid())) { + // If the thread is the main one, let's hide that. + // I like to keep little secret sometimes. + _thread_id = 0; + } +#elif defined(BACKWARD_SYSTEM_DARWIN) + _thread_id = reinterpret_cast(pthread_self()); + if (pthread_main_np() == 1) { + // If the thread is the main one, let's hide that. + _thread_id = 0; + } +#endif + } + + void set_context(void *context) { _context = context; } + void *context() const { return _context; } + + void set_error_addr(void *error_addr) { _error_addr = error_addr; } + void *error_addr() const { return _error_addr; } + + size_t skip_n_firsts() const { return _skip; } + +private: + size_t _thread_id; + size_t _skip; + void *_context; + void *_error_addr; +}; + +class StackTraceImplHolder : public StackTraceImplBase { +public: + size_t size() const { + return (_stacktrace.size() >= skip_n_firsts()) + ? _stacktrace.size() - skip_n_firsts() + : 0; + } + Trace operator[](size_t idx) const { + if (idx >= size()) { + return Trace(); + } + return Trace(_stacktrace[idx + skip_n_firsts()], idx); + } + void *const *begin() const { + if (size()) { + return &_stacktrace[skip_n_firsts()]; + } + return nullptr; + } + +protected: + std::vector _stacktrace; +}; + +#if BACKWARD_HAS_UNWIND == 1 + +namespace details { + +template class Unwinder { +public: + size_t operator()(F &f, size_t depth) { + _f = &f; + _index = -1; + _depth = depth; + _Unwind_Backtrace(&this->backtrace_trampoline, this); + return static_cast(_index); + } + +private: + F *_f; + ssize_t _index; + size_t _depth; + + static _Unwind_Reason_Code backtrace_trampoline(_Unwind_Context *ctx, + void *self) { + return (static_cast(self))->backtrace(ctx); + } + + _Unwind_Reason_Code backtrace(_Unwind_Context *ctx) { + if (_index >= 0 && static_cast(_index) >= _depth) + return _URC_END_OF_STACK; + + int ip_before_instruction = 0; + uintptr_t ip = _Unwind_GetIPInfo(ctx, &ip_before_instruction); + + if (!ip_before_instruction) { + // calculating 0-1 for unsigned, looks like a possible bug to sanitiziers, + // so let's do it explicitly: + if (ip == 0) { + ip = std::numeric_limits::max(); // set it to 0xffff... (as + // from casting 0-1) + } else { + ip -= 1; // else just normally decrement it (no overflow/underflow will + // happen) + } + } + + if (_index >= 0) { // ignore first frame. + (*_f)(static_cast(_index), reinterpret_cast(ip)); + } + _index += 1; + return _URC_NO_REASON; + } +}; + +template size_t unwind(F f, size_t depth) { + Unwinder unwinder; + return unwinder(f, depth); +} + +} // namespace details + +template <> +class StackTraceImpl : public StackTraceImplHolder { +public: + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_thread_info(); + set_context(context); + set_error_addr(error_addr); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth); + size_t trace_cnt = details::unwind(callback(*this), depth); + _stacktrace.resize(trace_cnt); + skip_n_firsts(0); + return size(); + } + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } + +private: + struct callback { + StackTraceImpl &self; + callback(StackTraceImpl &_self) : self(_self) {} + + void operator()(size_t idx, void *addr) { self._stacktrace[idx] = addr; } + }; +}; + +#elif BACKWARD_HAS_LIBUNWIND == 1 + +template <> +class StackTraceImpl : public StackTraceImplHolder { +public: + __attribute__((noinline)) size_t load_here(size_t depth = 32, + void *_context = nullptr, + void *_error_addr = nullptr) { + set_context(_context); + set_error_addr(_error_addr); + load_thread_info(); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth + 1); + + int result = 0; + + unw_context_t ctx; + size_t index = 0; + + // Add the tail call. If the Instruction Pointer is the crash address it + // means we got a bad function pointer dereference, so we "unwind" the + // bad pointer manually by using the return address pointed to by the + // Stack Pointer as the Instruction Pointer and letting libunwind do + // the rest + + if (context()) { + ucontext_t *uctx = reinterpret_cast(context()); +#ifdef REG_RIP // x86_64 + if (uctx->uc_mcontext.gregs[REG_RIP] == + reinterpret_cast(error_addr())) { + uctx->uc_mcontext.gregs[REG_RIP] = + *reinterpret_cast(uctx->uc_mcontext.gregs[REG_RSP]); + } + _stacktrace[index] = + reinterpret_cast(uctx->uc_mcontext.gregs[REG_RIP]); + ++index; + ctx = *reinterpret_cast(uctx); +#elif defined(REG_EIP) // x86_32 + if (uctx->uc_mcontext.gregs[REG_EIP] == + reinterpret_cast(error_addr())) { + uctx->uc_mcontext.gregs[REG_EIP] = + *reinterpret_cast(uctx->uc_mcontext.gregs[REG_ESP]); + } + _stacktrace[index] = + reinterpret_cast(uctx->uc_mcontext.gregs[REG_EIP]); + ++index; + ctx = *reinterpret_cast(uctx); +#elif defined(__arm__) + // libunwind uses its own context type for ARM unwinding. + // Copy the registers from the signal handler's context so we can + // unwind + unw_getcontext(&ctx); + ctx.regs[UNW_ARM_R0] = uctx->uc_mcontext.arm_r0; + ctx.regs[UNW_ARM_R1] = uctx->uc_mcontext.arm_r1; + ctx.regs[UNW_ARM_R2] = uctx->uc_mcontext.arm_r2; + ctx.regs[UNW_ARM_R3] = uctx->uc_mcontext.arm_r3; + ctx.regs[UNW_ARM_R4] = uctx->uc_mcontext.arm_r4; + ctx.regs[UNW_ARM_R5] = uctx->uc_mcontext.arm_r5; + ctx.regs[UNW_ARM_R6] = uctx->uc_mcontext.arm_r6; + ctx.regs[UNW_ARM_R7] = uctx->uc_mcontext.arm_r7; + ctx.regs[UNW_ARM_R8] = uctx->uc_mcontext.arm_r8; + ctx.regs[UNW_ARM_R9] = uctx->uc_mcontext.arm_r9; + ctx.regs[UNW_ARM_R10] = uctx->uc_mcontext.arm_r10; + ctx.regs[UNW_ARM_R11] = uctx->uc_mcontext.arm_fp; + ctx.regs[UNW_ARM_R12] = uctx->uc_mcontext.arm_ip; + ctx.regs[UNW_ARM_R13] = uctx->uc_mcontext.arm_sp; + ctx.regs[UNW_ARM_R14] = uctx->uc_mcontext.arm_lr; + ctx.regs[UNW_ARM_R15] = uctx->uc_mcontext.arm_pc; + + // If we have crashed in the PC use the LR instead, as this was + // a bad function dereference + if (reinterpret_cast(error_addr()) == + uctx->uc_mcontext.arm_pc) { + ctx.regs[UNW_ARM_R15] = + uctx->uc_mcontext.arm_lr - sizeof(unsigned long); + } + _stacktrace[index] = reinterpret_cast(ctx.regs[UNW_ARM_R15]); + ++index; +#elif defined(__APPLE__) && defined(__x86_64__) + unw_getcontext(&ctx); + // OS X's implementation of libunwind uses its own context object + // so we need to convert the passed context to libunwind's format + // (information about the data layout taken from unw_getcontext.s + // in Apple's libunwind source + ctx.data[0] = uctx->uc_mcontext->__ss.__rax; + ctx.data[1] = uctx->uc_mcontext->__ss.__rbx; + ctx.data[2] = uctx->uc_mcontext->__ss.__rcx; + ctx.data[3] = uctx->uc_mcontext->__ss.__rdx; + ctx.data[4] = uctx->uc_mcontext->__ss.__rdi; + ctx.data[5] = uctx->uc_mcontext->__ss.__rsi; + ctx.data[6] = uctx->uc_mcontext->__ss.__rbp; + ctx.data[7] = uctx->uc_mcontext->__ss.__rsp; + ctx.data[8] = uctx->uc_mcontext->__ss.__r8; + ctx.data[9] = uctx->uc_mcontext->__ss.__r9; + ctx.data[10] = uctx->uc_mcontext->__ss.__r10; + ctx.data[11] = uctx->uc_mcontext->__ss.__r11; + ctx.data[12] = uctx->uc_mcontext->__ss.__r12; + ctx.data[13] = uctx->uc_mcontext->__ss.__r13; + ctx.data[14] = uctx->uc_mcontext->__ss.__r14; + ctx.data[15] = uctx->uc_mcontext->__ss.__r15; + ctx.data[16] = uctx->uc_mcontext->__ss.__rip; + + // If the IP is the same as the crash address we have a bad function + // dereference The caller's address is pointed to by %rsp, so we + // dereference that value and set it to be the next frame's IP. + if (uctx->uc_mcontext->__ss.__rip == + reinterpret_cast<__uint64_t>(error_addr())) { + ctx.data[16] = + *reinterpret_cast<__uint64_t *>(uctx->uc_mcontext->__ss.__rsp); + } + _stacktrace[index] = reinterpret_cast(ctx.data[16]); + ++index; +#elif defined(__APPLE__) + unw_getcontext(&ctx) + // TODO: Convert the ucontext_t to libunwind's unw_context_t like + // we do in 64 bits + if (ctx.uc_mcontext->__ss.__eip == + reinterpret_cast(error_addr())) { + ctx.uc_mcontext->__ss.__eip = ctx.uc_mcontext->__ss.__esp; + } + _stacktrace[index] = + reinterpret_cast(ctx.uc_mcontext->__ss.__eip); + ++index; +#endif + } + + unw_cursor_t cursor; + if (context()) { +#if defined(UNW_INIT_SIGNAL_FRAME) + result = unw_init_local2(&cursor, &ctx, UNW_INIT_SIGNAL_FRAME); +#else + result = unw_init_local(&cursor, &ctx); +#endif + } else { + unw_getcontext(&ctx); + ; + result = unw_init_local(&cursor, &ctx); + } + + if (result != 0) + return 1; + + unw_word_t ip = 0; + + while (index <= depth && unw_step(&cursor) > 0) { + result = unw_get_reg(&cursor, UNW_REG_IP, &ip); + if (result == 0) { + _stacktrace[index] = reinterpret_cast(--ip); + ++index; + } + } + --index; + + _stacktrace.resize(index + 1); + skip_n_firsts(0); + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + _stacktrace[i] = (void *)((uintptr_t)_stacktrace[i]); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } +}; + +#elif defined(BACKWARD_HAS_BACKTRACE) + +template <> +class StackTraceImpl : public StackTraceImplHolder { +public: + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + set_context(context); + set_error_addr(error_addr); + load_thread_info(); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth + 1); + size_t trace_cnt = backtrace(&_stacktrace[0], _stacktrace.size()); + _stacktrace.resize(trace_cnt); + skip_n_firsts(1); + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, contxt, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + _stacktrace[i] = (void *)((uintptr_t)_stacktrace[i] + 1); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } +}; + +#elif defined(BACKWARD_SYSTEM_WINDOWS) + +template <> +class StackTraceImpl : public StackTraceImplHolder { +public: + // We have to load the machine type from the image info + // So we first initialize the resolver, and it tells us this info + void set_machine_type(DWORD machine_type) { machine_type_ = machine_type; } + void set_context(CONTEXT *ctx) { ctx_ = ctx; } + void set_thread_handle(HANDLE handle) { thd_ = handle; } + + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + set_context(static_cast(context)); + set_error_addr(error_addr); + CONTEXT localCtx; // used when no context is provided + + if (depth == 0) { + return 0; + } + + if (!ctx_) { + ctx_ = &localCtx; + RtlCaptureContext(ctx_); + } + + if (!thd_) { + thd_ = GetCurrentThread(); + } + + HANDLE process = GetCurrentProcess(); + + STACKFRAME64 s; + memset(&s, 0, sizeof(STACKFRAME64)); + + // TODO: 32 bit context capture + s.AddrStack.Mode = AddrModeFlat; + s.AddrFrame.Mode = AddrModeFlat; + s.AddrPC.Mode = AddrModeFlat; +#ifdef _M_X64 + s.AddrPC.Offset = ctx_->Rip; + s.AddrStack.Offset = ctx_->Rsp; + s.AddrFrame.Offset = ctx_->Rbp; +#else + s.AddrPC.Offset = ctx_->Eip; + s.AddrStack.Offset = ctx_->Esp; + s.AddrFrame.Offset = ctx_->Ebp; +#endif + + if (!machine_type_) { +#ifdef _M_X64 + machine_type_ = IMAGE_FILE_MACHINE_AMD64; +#else + machine_type_ = IMAGE_FILE_MACHINE_I386; +#endif + } + + for (;;) { + // NOTE: this only works if PDBs are already loaded! + SetLastError(0); + if (!StackWalk64(machine_type_, process, thd_, &s, ctx_, NULL, + SymFunctionTableAccess64, SymGetModuleBase64, NULL)) + break; + + if (s.AddrReturn.Offset == 0) + break; + + _stacktrace.push_back(reinterpret_cast(s.AddrPC.Offset)); + + if (size() >= depth) + break; + } + + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } + +private: + DWORD machine_type_ = 0; + HANDLE thd_ = 0; + CONTEXT *ctx_ = nullptr; +}; + +#endif + +class StackTrace : public StackTraceImpl {}; + +/*************** TRACE RESOLVER ***************/ + +template class TraceResolverImpl; + +#ifdef BACKWARD_SYSTEM_UNKNOWN + +template <> class TraceResolverImpl { +public: + template void load_stacktrace(ST &) {} + ResolvedTrace resolve(ResolvedTrace t) { return t; } +}; + +#endif + +class TraceResolverImplBase { +protected: + std::string demangle(const char *funcname) { + return _demangler.demangle(funcname); + } + +private: + details::demangler _demangler; +}; + +#ifdef BACKWARD_SYSTEM_LINUX + +class TraceResolverLinuxBase : public TraceResolverImplBase { +public: + TraceResolverLinuxBase() + : argv0_(get_argv0()), exec_path_(read_symlink("/proc/self/exe")) {} + std::string resolve_exec_path(Dl_info &symbol_info) const { + // mutates symbol_info.dli_fname to be filename to open and returns filename + // to display + if (symbol_info.dli_fname == argv0_) { + // dladdr returns argv[0] in dli_fname for symbols contained in + // the main executable, which is not a valid path if the + // executable was found by a search of the PATH environment + // variable; In that case, we actually open /proc/self/exe, which + // is always the actual executable (even if it was deleted/replaced!) + // but display the path that /proc/self/exe links to. + symbol_info.dli_fname = "/proc/self/exe"; + return exec_path_; + } else { + return symbol_info.dli_fname; + } + } + +private: + std::string argv0_; + std::string exec_path_; + + static std::string get_argv0() { + std::string argv0; + std::ifstream ifs("/proc/self/cmdline"); + std::getline(ifs, argv0, '\0'); + return argv0; + } + + static std::string read_symlink(std::string const &symlink_path) { + std::string path; + path.resize(100); + + while (true) { + ssize_t len = + ::readlink(symlink_path.c_str(), &*path.begin(), path.size()); + if (len < 0) { + return ""; + } + if (static_cast(len) == path.size()) { + path.resize(path.size() * 2); + } else { + path.resize(static_cast(len)); + break; + } + } + + return path; + } +}; + +template class TraceResolverLinuxImpl; + +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 + +template <> +class TraceResolverLinuxImpl + : public TraceResolverLinuxBase { +public: + template void load_stacktrace(ST &st) { + using namespace details; + if (st.size() == 0) { + return; + } + _symbols.reset(backtrace_symbols(st.begin(), (int)st.size())); + } + + ResolvedTrace resolve(ResolvedTrace trace) { + char *filename = _symbols[trace.idx]; + char *funcname = filename; + while (*funcname && *funcname != '(') { + funcname += 1; + } + trace.object_filename.assign(filename, + funcname); // ok even if funcname is the ending + // \0 (then we assign entire string) + + if (*funcname) { // if it's not end of string (e.g. from last frame ip==0) + funcname += 1; + char *funcname_end = funcname; + while (*funcname_end && *funcname_end != ')' && *funcname_end != '+') { + funcname_end += 1; + } + *funcname_end = '\0'; + trace.object_function = this->demangle(funcname); + trace.source.function = trace.object_function; // we cannot do better. + } + return trace; + } + +private: + details::handle _symbols; +}; + +#endif // BACKWARD_HAS_BACKTRACE_SYMBOL == 1 + +#if BACKWARD_HAS_BFD == 1 + +template <> +class TraceResolverLinuxImpl + : public TraceResolverLinuxBase { +public: + TraceResolverLinuxImpl() : _bfd_loaded(false) {} + + template void load_stacktrace(ST &) {} + + ResolvedTrace resolve(ResolvedTrace trace) { + Dl_info symbol_info; + + // trace.addr is a virtual address in memory pointing to some code. + // Let's try to find from which loaded object it comes from. + // The loaded object can be yourself btw. + if (!dladdr(trace.addr, &symbol_info)) { + return trace; // dat broken trace... + } + + // Now we get in symbol_info: + // .dli_fname: + // pathname of the shared object that contains the address. + // .dli_fbase: + // where the object is loaded in memory. + // .dli_sname: + // the name of the nearest symbol to trace.addr, we expect a + // function name. + // .dli_saddr: + // the exact address corresponding to .dli_sname. + + if (symbol_info.dli_sname) { + trace.object_function = demangle(symbol_info.dli_sname); + } + + if (!symbol_info.dli_fname) { + return trace; + } + + trace.object_filename = resolve_exec_path(symbol_info); + bfd_fileobject &fobj = load_object_with_bfd(symbol_info.dli_fname); + if (!fobj.handle) { + return trace; // sad, we couldn't load the object :( + } + + find_sym_result *details_selected; // to be filled. + + // trace.addr is the next instruction to be executed after returning + // from the nested stack frame. In C++ this usually relate to the next + // statement right after the function call that leaded to a new stack + // frame. This is not usually what you want to see when printing out a + // stacktrace... + find_sym_result details_call_site = + find_symbol_details(fobj, trace.addr, symbol_info.dli_fbase); + details_selected = &details_call_site; + +#if BACKWARD_HAS_UNWIND == 0 + // ...this is why we also try to resolve the symbol that is right + // before the return address. If we are lucky enough, we will get the + // line of the function that was called. But if the code is optimized, + // we might get something absolutely not related since the compiler + // can reschedule the return address with inline functions and + // tail-call optimisation (among other things that I don't even know + // or cannot even dream about with my tiny limited brain). + find_sym_result details_adjusted_call_site = find_symbol_details( + fobj, (void *)(uintptr_t(trace.addr) - 1), symbol_info.dli_fbase); + + // In debug mode, we should always get the right thing(TM). + if (details_call_site.found && details_adjusted_call_site.found) { + // Ok, we assume that details_adjusted_call_site is a better estimation. + details_selected = &details_adjusted_call_site; + trace.addr = (void *)(uintptr_t(trace.addr) - 1); + } + + if (details_selected == &details_call_site && details_call_site.found) { + // we have to re-resolve the symbol in order to reset some + // internal state in BFD... so we can call backtrace_inliners + // thereafter... + details_call_site = + find_symbol_details(fobj, trace.addr, symbol_info.dli_fbase); + } +#endif // BACKWARD_HAS_UNWIND + + if (details_selected->found) { + if (details_selected->filename) { + trace.source.filename = details_selected->filename; + } + trace.source.line = details_selected->line; + + if (details_selected->funcname) { + // this time we get the name of the function where the code is + // located, instead of the function were the address is + // located. In short, if the code was inlined, we get the + // function correspoding to the code. Else we already got in + // trace.function. + trace.source.function = demangle(details_selected->funcname); + + if (!symbol_info.dli_sname) { + // for the case dladdr failed to find the symbol name of + // the function, we might as well try to put something + // here. + trace.object_function = trace.source.function; + } + } + + // Maybe the source of the trace got inlined inside the function + // (trace.source.function). Let's see if we can get all the inlined + // calls along the way up to the initial call site. + trace.inliners = backtrace_inliners(fobj, *details_selected); + +#if 0 + if (trace.inliners.size() == 0) { + // Maybe the trace was not inlined... or maybe it was and we + // are lacking the debug information. Let's try to make the + // world better and see if we can get the line number of the + // function (trace.source.function) now. + // + // We will get the location of where the function start (to be + // exact: the first instruction that really start the + // function), not where the name of the function is defined. + // This can be quite far away from the name of the function + // btw. + // + // If the source of the function is the same as the source of + // the trace, we cannot say if the trace was really inlined or + // not. However, if the filename of the source is different + // between the function and the trace... we can declare it as + // an inliner. This is not 100% accurate, but better than + // nothing. + + if (symbol_info.dli_saddr) { + find_sym_result details = find_symbol_details(fobj, + symbol_info.dli_saddr, + symbol_info.dli_fbase); + + if (details.found) { + ResolvedTrace::SourceLoc diy_inliner; + diy_inliner.line = details.line; + if (details.filename) { + diy_inliner.filename = details.filename; + } + if (details.funcname) { + diy_inliner.function = demangle(details.funcname); + } else { + diy_inliner.function = trace.source.function; + } + if (diy_inliner != trace.source) { + trace.inliners.push_back(diy_inliner); + } + } + } + } +#endif + } + + return trace; + } + +private: + bool _bfd_loaded; + + typedef details::handle> + bfd_handle_t; + + typedef details::handle bfd_symtab_t; + + struct bfd_fileobject { + bfd_handle_t handle; + bfd_vma base_addr; + bfd_symtab_t symtab; + bfd_symtab_t dynamic_symtab; + }; + + typedef details::hashtable::type fobj_bfd_map_t; + fobj_bfd_map_t _fobj_bfd_map; + + bfd_fileobject &load_object_with_bfd(const std::string &filename_object) { + using namespace details; + + if (!_bfd_loaded) { + using namespace details; + bfd_init(); + _bfd_loaded = true; + } + + fobj_bfd_map_t::iterator it = _fobj_bfd_map.find(filename_object); + if (it != _fobj_bfd_map.end()) { + return it->second; + } + + // this new object is empty for now. + bfd_fileobject &r = _fobj_bfd_map[filename_object]; + + // we do the work temporary in this one; + bfd_handle_t bfd_handle; + + int fd = open(filename_object.c_str(), O_RDONLY); + bfd_handle.reset(bfd_fdopenr(filename_object.c_str(), "default", fd)); + if (!bfd_handle) { + close(fd); + return r; + } + + if (!bfd_check_format(bfd_handle.get(), bfd_object)) { + return r; // not an object? You lose. + } + + if ((bfd_get_file_flags(bfd_handle.get()) & HAS_SYMS) == 0) { + return r; // that's what happen when you forget to compile in debug. + } + + ssize_t symtab_storage_size = bfd_get_symtab_upper_bound(bfd_handle.get()); + + ssize_t dyn_symtab_storage_size = + bfd_get_dynamic_symtab_upper_bound(bfd_handle.get()); + + if (symtab_storage_size <= 0 && dyn_symtab_storage_size <= 0) { + return r; // weird, is the file is corrupted? + } + + bfd_symtab_t symtab, dynamic_symtab; + ssize_t symcount = 0, dyn_symcount = 0; + + if (symtab_storage_size > 0) { + symtab.reset(static_cast( + malloc(static_cast(symtab_storage_size)))); + symcount = bfd_canonicalize_symtab(bfd_handle.get(), symtab.get()); + } + + if (dyn_symtab_storage_size > 0) { + dynamic_symtab.reset(static_cast( + malloc(static_cast(dyn_symtab_storage_size)))); + dyn_symcount = bfd_canonicalize_dynamic_symtab(bfd_handle.get(), + dynamic_symtab.get()); + } + + if (symcount <= 0 && dyn_symcount <= 0) { + return r; // damned, that's a stripped file that you got there! + } + + r.handle = move(bfd_handle); + r.symtab = move(symtab); + r.dynamic_symtab = move(dynamic_symtab); + return r; + } + + struct find_sym_result { + bool found; + const char *filename; + const char *funcname; + unsigned int line; + }; + + struct find_sym_context { + TraceResolverLinuxImpl *self; + bfd_fileobject *fobj; + void *addr; + void *base_addr; + find_sym_result result; + }; + + find_sym_result find_symbol_details(bfd_fileobject &fobj, void *addr, + void *base_addr) { + find_sym_context context; + context.self = this; + context.fobj = &fobj; + context.addr = addr; + context.base_addr = base_addr; + context.result.found = false; + bfd_map_over_sections(fobj.handle.get(), &find_in_section_trampoline, + static_cast(&context)); + return context.result; + } + + static void find_in_section_trampoline(bfd *, asection *section, void *data) { + find_sym_context *context = static_cast(data); + context->self->find_in_section( + reinterpret_cast(context->addr), + reinterpret_cast(context->base_addr), *context->fobj, section, + context->result); + } + + void find_in_section(bfd_vma addr, bfd_vma base_addr, bfd_fileobject &fobj, + asection *section, find_sym_result &result) { + if (result.found) + return; + +#ifdef bfd_get_section_flags + if ((bfd_get_section_flags(fobj.handle.get(), section) & SEC_ALLOC) == 0) +#else + if ((bfd_section_flags(section) & SEC_ALLOC) == 0) +#endif + return; // a debug section is never loaded automatically. + +#ifdef bfd_get_section_vma + bfd_vma sec_addr = bfd_get_section_vma(fobj.handle.get(), section); +#else + bfd_vma sec_addr = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size + bfd_size_type size = bfd_get_section_size(section); +#else + bfd_size_type size = bfd_section_size(section); +#endif + + // are we in the boundaries of the section? + if (addr < sec_addr || addr >= sec_addr + size) { + addr -= base_addr; // oups, a relocated object, lets try again... + if (addr < sec_addr || addr >= sec_addr + size) { + return; + } + } + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + if (!result.found && fobj.symtab) { + result.found = bfd_find_nearest_line( + fobj.handle.get(), section, fobj.symtab.get(), addr - sec_addr, + &result.filename, &result.funcname, &result.line); + } + + if (!result.found && fobj.dynamic_symtab) { + result.found = bfd_find_nearest_line( + fobj.handle.get(), section, fobj.dynamic_symtab.get(), + addr - sec_addr, &result.filename, &result.funcname, &result.line); + } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + } + + ResolvedTrace::source_locs_t + backtrace_inliners(bfd_fileobject &fobj, find_sym_result previous_result) { + // This function can be called ONLY after a SUCCESSFUL call to + // find_symbol_details. The state is global to the bfd_handle. + ResolvedTrace::source_locs_t results; + while (previous_result.found) { + find_sym_result result; + result.found = bfd_find_inliner_info(fobj.handle.get(), &result.filename, + &result.funcname, &result.line); + + if (result + .found) /* and not ( + cstrings_eq(previous_result.filename, + result.filename) and + cstrings_eq(previous_result.funcname, result.funcname) + and result.line == previous_result.line + )) */ + { + ResolvedTrace::SourceLoc src_loc; + src_loc.line = result.line; + if (result.filename) { + src_loc.filename = result.filename; + } + if (result.funcname) { + src_loc.function = demangle(result.funcname); + } + results.push_back(src_loc); + } + previous_result = result; + } + return results; + } + + bool cstrings_eq(const char *a, const char *b) { + if (!a || !b) { + return false; + } + return strcmp(a, b) == 0; + } +}; +#endif // BACKWARD_HAS_BFD == 1 + +#if BACKWARD_HAS_DW == 1 + +template <> +class TraceResolverLinuxImpl + : public TraceResolverLinuxBase { +public: + TraceResolverLinuxImpl() : _dwfl_handle_initialized(false) {} + + template void load_stacktrace(ST &) {} + + ResolvedTrace resolve(ResolvedTrace trace) { + using namespace details; + + Dwarf_Addr trace_addr = (Dwarf_Addr)trace.addr; + + if (!_dwfl_handle_initialized) { + // initialize dwfl... + _dwfl_cb.reset(new Dwfl_Callbacks); + _dwfl_cb->find_elf = &dwfl_linux_proc_find_elf; + _dwfl_cb->find_debuginfo = &dwfl_standard_find_debuginfo; + _dwfl_cb->debuginfo_path = 0; + + _dwfl_handle.reset(dwfl_begin(_dwfl_cb.get())); + _dwfl_handle_initialized = true; + + if (!_dwfl_handle) { + return trace; + } + + // ...from the current process. + dwfl_report_begin(_dwfl_handle.get()); + int r = dwfl_linux_proc_report(_dwfl_handle.get(), getpid()); + dwfl_report_end(_dwfl_handle.get(), NULL, NULL); + if (r < 0) { + return trace; + } + } + + if (!_dwfl_handle) { + return trace; + } + + // find the module (binary object) that contains the trace's address. + // This is not using any debug information, but the addresses ranges of + // all the currently loaded binary object. + Dwfl_Module *mod = dwfl_addrmodule(_dwfl_handle.get(), trace_addr); + if (mod) { + // now that we found it, lets get the name of it, this will be the + // full path to the running binary or one of the loaded library. + const char *module_name = dwfl_module_info(mod, 0, 0, 0, 0, 0, 0, 0); + if (module_name) { + trace.object_filename = module_name; + } + // We also look after the name of the symbol, equal or before this + // address. This is found by walking the symtab. We should get the + // symbol corresponding to the function (mangled) containing the + // address. If the code corresponding to the address was inlined, + // this is the name of the out-most inliner function. + const char *sym_name = dwfl_module_addrname(mod, trace_addr); + if (sym_name) { + trace.object_function = demangle(sym_name); + } + } + + // now let's get serious, and find out the source location (file and + // line number) of the address. + + // This function will look in .debug_aranges for the address and map it + // to the location of the compilation unit DIE in .debug_info and + // return it. + Dwarf_Addr mod_bias = 0; + Dwarf_Die *cudie = dwfl_module_addrdie(mod, trace_addr, &mod_bias); + +#if 1 + if (!cudie) { + // Sadly clang does not generate the section .debug_aranges, thus + // dwfl_module_addrdie will fail early. Clang doesn't either set + // the lowpc/highpc/range info for every compilation unit. + // + // So in order to save the world: + // for every compilation unit, we will iterate over every single + // DIEs. Normally functions should have a lowpc/highpc/range, which + // we will use to infer the compilation unit. + + // note that this is probably badly inefficient. + while ((cudie = dwfl_module_nextcu(mod, cudie, &mod_bias))) { + Dwarf_Die die_mem; + Dwarf_Die *fundie = + find_fundie_by_pc(cudie, trace_addr - mod_bias, &die_mem); + if (fundie) { + break; + } + } + } +#endif + +//#define BACKWARD_I_DO_NOT_RECOMMEND_TO_ENABLE_THIS_HORRIBLE_PIECE_OF_CODE +#ifdef BACKWARD_I_DO_NOT_RECOMMEND_TO_ENABLE_THIS_HORRIBLE_PIECE_OF_CODE + if (!cudie) { + // If it's still not enough, lets dive deeper in the shit, and try + // to save the world again: for every compilation unit, we will + // load the corresponding .debug_line section, and see if we can + // find our address in it. + + Dwarf_Addr cfi_bias; + Dwarf_CFI *cfi_cache = dwfl_module_eh_cfi(mod, &cfi_bias); + + Dwarf_Addr bias; + while ((cudie = dwfl_module_nextcu(mod, cudie, &bias))) { + if (dwarf_getsrc_die(cudie, trace_addr - bias)) { + + // ...but if we get a match, it might be a false positive + // because our (address - bias) might as well be valid in a + // different compilation unit. So we throw our last card on + // the table and lookup for the address into the .eh_frame + // section. + + handle frame; + dwarf_cfi_addrframe(cfi_cache, trace_addr - cfi_bias, &frame); + if (frame) { + break; + } + } + } + } +#endif + + if (!cudie) { + return trace; // this time we lost the game :/ + } + + // Now that we have a compilation unit DIE, this function will be able + // to load the corresponding section in .debug_line (if not already + // loaded) and hopefully find the source location mapped to our + // address. + Dwarf_Line *srcloc = dwarf_getsrc_die(cudie, trace_addr - mod_bias); + + if (srcloc) { + const char *srcfile = dwarf_linesrc(srcloc, 0, 0); + if (srcfile) { + trace.source.filename = srcfile; + } + int line = 0, col = 0; + dwarf_lineno(srcloc, &line); + dwarf_linecol(srcloc, &col); + trace.source.line = line; + trace.source.col = col; + } + + deep_first_search_by_pc(cudie, trace_addr - mod_bias, + inliners_search_cb(trace)); + if (trace.source.function.size() == 0) { + // fallback. + trace.source.function = trace.object_function; + } + + return trace; + } + +private: + typedef details::handle> + dwfl_handle_t; + details::handle> + _dwfl_cb; + dwfl_handle_t _dwfl_handle; + bool _dwfl_handle_initialized; + + // defined here because in C++98, template function cannot take locally + // defined types... grrr. + struct inliners_search_cb { + void operator()(Dwarf_Die *die) { + switch (dwarf_tag(die)) { + const char *name; + case DW_TAG_subprogram: + if ((name = dwarf_diename(die))) { + trace.source.function = name; + } + break; + + case DW_TAG_inlined_subroutine: + ResolvedTrace::SourceLoc sloc; + Dwarf_Attribute attr_mem; + + if ((name = dwarf_diename(die))) { + sloc.function = name; + } + if ((name = die_call_file(die))) { + sloc.filename = name; + } + + Dwarf_Word line = 0, col = 0; + dwarf_formudata(dwarf_attr(die, DW_AT_call_line, &attr_mem), &line); + dwarf_formudata(dwarf_attr(die, DW_AT_call_column, &attr_mem), &col); + sloc.line = (unsigned)line; + sloc.col = (unsigned)col; + + trace.inliners.push_back(sloc); + break; + }; + } + ResolvedTrace &trace; + inliners_search_cb(ResolvedTrace &t) : trace(t) {} + }; + + static bool die_has_pc(Dwarf_Die *die, Dwarf_Addr pc) { + Dwarf_Addr low, high; + + // continuous range + if (dwarf_hasattr(die, DW_AT_low_pc) && dwarf_hasattr(die, DW_AT_high_pc)) { + if (dwarf_lowpc(die, &low) != 0) { + return false; + } + if (dwarf_highpc(die, &high) != 0) { + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr(die, DW_AT_high_pc, &attr_mem); + Dwarf_Word value; + if (dwarf_formudata(attr, &value) != 0) { + return false; + } + high = low + value; + } + return pc >= low && pc < high; + } + + // non-continuous range. + Dwarf_Addr base; + ptrdiff_t offset = 0; + while ((offset = dwarf_ranges(die, offset, &base, &low, &high)) > 0) { + if (pc >= low && pc < high) { + return true; + } + } + return false; + } + + static Dwarf_Die *find_fundie_by_pc(Dwarf_Die *parent_die, Dwarf_Addr pc, + Dwarf_Die *result) { + if (dwarf_child(parent_die, result) != 0) { + return 0; + } + + Dwarf_Die *die = result; + do { + switch (dwarf_tag(die)) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: + if (die_has_pc(die, pc)) { + return result; + } + }; + bool declaration = false; + Dwarf_Attribute attr_mem; + dwarf_formflag(dwarf_attr(die, DW_AT_declaration, &attr_mem), + &declaration); + if (!declaration) { + // let's be curious and look deeper in the tree, + // function are not necessarily at the first level, but + // might be nested inside a namespace, structure etc. + Dwarf_Die die_mem; + Dwarf_Die *indie = find_fundie_by_pc(die, pc, &die_mem); + if (indie) { + *result = die_mem; + return result; + } + } + } while (dwarf_siblingof(die, result) == 0); + return 0; + } + + template + static bool deep_first_search_by_pc(Dwarf_Die *parent_die, Dwarf_Addr pc, + CB cb) { + Dwarf_Die die_mem; + if (dwarf_child(parent_die, &die_mem) != 0) { + return false; + } + + bool branch_has_pc = false; + Dwarf_Die *die = &die_mem; + do { + bool declaration = false; + Dwarf_Attribute attr_mem; + dwarf_formflag(dwarf_attr(die, DW_AT_declaration, &attr_mem), + &declaration); + if (!declaration) { + // let's be curious and look deeper in the tree, function are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + branch_has_pc = deep_first_search_by_pc(die, pc, cb); + } + if (!branch_has_pc) { + branch_has_pc = die_has_pc(die, pc); + } + if (branch_has_pc) { + cb(die); + } + } while (dwarf_siblingof(die, &die_mem) == 0); + return branch_has_pc; + } + + static const char *die_call_file(Dwarf_Die *die) { + Dwarf_Attribute attr_mem; + Dwarf_Word file_idx = 0; + + dwarf_formudata(dwarf_attr(die, DW_AT_call_file, &attr_mem), &file_idx); + + if (file_idx == 0) { + return 0; + } + + Dwarf_Die die_mem; + Dwarf_Die *cudie = dwarf_diecu(die, &die_mem, 0, 0); + if (!cudie) { + return 0; + } + + Dwarf_Files *files = 0; + size_t nfiles; + dwarf_getsrcfiles(cudie, &files, &nfiles); + if (!files) { + return 0; + } + + return dwarf_filesrc(files, file_idx, 0, 0); + } +}; +#endif // BACKWARD_HAS_DW == 1 + +#if BACKWARD_HAS_DWARF == 1 + +template <> +class TraceResolverLinuxImpl + : public TraceResolverLinuxBase { +public: + TraceResolverLinuxImpl() : _dwarf_loaded(false) {} + + template void load_stacktrace(ST &) {} + + ResolvedTrace resolve(ResolvedTrace trace) { + // trace.addr is a virtual address in memory pointing to some code. + // Let's try to find from which loaded object it comes from. + // The loaded object can be yourself btw. + + Dl_info symbol_info; + int dladdr_result = 0; +#if defined(__GLIBC__) + link_map *link_map; + // We request the link map so we can get information about offsets + dladdr_result = + dladdr1(trace.addr, &symbol_info, reinterpret_cast(&link_map), + RTLD_DL_LINKMAP); +#else + // Android doesn't have dladdr1. Don't use the linker map. + dladdr_result = dladdr(trace.addr, &symbol_info); +#endif + if (!dladdr_result) { + return trace; // dat broken trace... + } + + // Now we get in symbol_info: + // .dli_fname: + // pathname of the shared object that contains the address. + // .dli_fbase: + // where the object is loaded in memory. + // .dli_sname: + // the name of the nearest symbol to trace.addr, we expect a + // function name. + // .dli_saddr: + // the exact address corresponding to .dli_sname. + // + // And in link_map: + // .l_addr: + // difference between the address in the ELF file and the address + // in memory + // l_name: + // absolute pathname where the object was found + + if (symbol_info.dli_sname) { + trace.object_function = demangle(symbol_info.dli_sname); + } + + if (!symbol_info.dli_fname) { + return trace; + } + + trace.object_filename = resolve_exec_path(symbol_info); + dwarf_fileobject &fobj = load_object_with_dwarf(symbol_info.dli_fname); + if (!fobj.dwarf_handle) { + return trace; // sad, we couldn't load the object :( + } + +#if defined(__GLIBC__) + // Convert the address to a module relative one by looking at + // the module's loading address in the link map + Dwarf_Addr address = reinterpret_cast(trace.addr) - + reinterpret_cast(link_map->l_addr); +#else + Dwarf_Addr address = reinterpret_cast(trace.addr); +#endif + + if (trace.object_function.empty()) { + symbol_cache_t::iterator it = fobj.symbol_cache.lower_bound(address); + + if (it != fobj.symbol_cache.end()) { + if (it->first != address) { + if (it != fobj.symbol_cache.begin()) { + --it; + } + } + trace.object_function = demangle(it->second.c_str()); + } + } + + // Get the Compilation Unit DIE for the address + Dwarf_Die die = find_die(fobj, address); + + if (!die) { + return trace; // this time we lost the game :/ + } + + // libdwarf doesn't give us direct access to its objects, it always + // allocates a copy for the caller. We keep that copy alive in a cache + // and we deallocate it later when it's no longer required. + die_cache_entry &die_object = get_die_cache(fobj, die); + if (die_object.isEmpty()) + return trace; // We have no line section for this DIE + + die_linemap_t::iterator it = die_object.line_section.lower_bound(address); + + if (it != die_object.line_section.end()) { + if (it->first != address) { + if (it == die_object.line_section.begin()) { + // If we are on the first item of the line section + // but the address does not match it means that + // the address is below the range of the DIE. Give up. + return trace; + } else { + --it; + } + } + } else { + return trace; // We didn't find the address. + } + + // Get the Dwarf_Line that the address points to and call libdwarf + // to get source file, line and column info. + Dwarf_Line line = die_object.line_buffer[it->second]; + Dwarf_Error error = DW_DLE_NE; + + char *filename; + if (dwarf_linesrc(line, &filename, &error) == DW_DLV_OK) { + trace.source.filename = std::string(filename); + dwarf_dealloc(fobj.dwarf_handle.get(), filename, DW_DLA_STRING); + } + + Dwarf_Unsigned number = 0; + if (dwarf_lineno(line, &number, &error) == DW_DLV_OK) { + trace.source.line = number; + } else { + trace.source.line = 0; + } + + if (dwarf_lineoff_b(line, &number, &error) == DW_DLV_OK) { + trace.source.col = number; + } else { + trace.source.col = 0; + } + + std::vector namespace_stack; + deep_first_search_by_pc(fobj, die, address, namespace_stack, + inliners_search_cb(trace, fobj, die)); + + dwarf_dealloc(fobj.dwarf_handle.get(), die, DW_DLA_DIE); + + return trace; + } + +public: + static int close_dwarf(Dwarf_Debug dwarf) { + return dwarf_finish(dwarf, NULL); + } + +private: + bool _dwarf_loaded; + + typedef details::handle> + dwarf_file_t; + + typedef details::handle> + dwarf_elf_t; + + typedef details::handle> + dwarf_handle_t; + + typedef std::map die_linemap_t; + + typedef std::map die_specmap_t; + + struct die_cache_entry { + die_specmap_t spec_section; + die_linemap_t line_section; + Dwarf_Line *line_buffer; + Dwarf_Signed line_count; + Dwarf_Line_Context line_context; + + inline bool isEmpty() { + return line_buffer == NULL || line_count == 0 || line_context == NULL || + line_section.empty(); + } + + die_cache_entry() : line_buffer(0), line_count(0), line_context(0) {} + + ~die_cache_entry() { + if (line_context) { + dwarf_srclines_dealloc_b(line_context); + } + } + }; + + typedef std::map die_cache_t; + + typedef std::map symbol_cache_t; + + struct dwarf_fileobject { + dwarf_file_t file_handle; + dwarf_elf_t elf_handle; + dwarf_handle_t dwarf_handle; + symbol_cache_t symbol_cache; + + // Die cache + die_cache_t die_cache; + die_cache_entry *current_cu; + }; + + typedef details::hashtable::type + fobj_dwarf_map_t; + fobj_dwarf_map_t _fobj_dwarf_map; + + static bool cstrings_eq(const char *a, const char *b) { + if (!a || !b) { + return false; + } + return strcmp(a, b) == 0; + } + + dwarf_fileobject &load_object_with_dwarf(const std::string &filename_object) { + + if (!_dwarf_loaded) { + // Set the ELF library operating version + // If that fails there's nothing we can do + _dwarf_loaded = elf_version(EV_CURRENT) != EV_NONE; + } + + fobj_dwarf_map_t::iterator it = _fobj_dwarf_map.find(filename_object); + if (it != _fobj_dwarf_map.end()) { + return it->second; + } + + // this new object is empty for now + dwarf_fileobject &r = _fobj_dwarf_map[filename_object]; + + dwarf_file_t file_handle; + file_handle.reset(open(filename_object.c_str(), O_RDONLY)); + if (file_handle.get() < 0) { + return r; + } + + // Try to get an ELF handle. We need to read the ELF sections + // because we want to see if there is a .gnu_debuglink section + // that points to a split debug file + dwarf_elf_t elf_handle; + elf_handle.reset(elf_begin(file_handle.get(), ELF_C_READ, NULL)); + if (!elf_handle) { + return r; + } + + const char *e_ident = elf_getident(elf_handle.get(), 0); + if (!e_ident) { + return r; + } + + // Get the number of sections + // We use the new APIs as elf_getshnum is deprecated + size_t shdrnum = 0; + if (elf_getshdrnum(elf_handle.get(), &shdrnum) == -1) { + return r; + } + + // Get the index to the string section + size_t shdrstrndx = 0; + if (elf_getshdrstrndx(elf_handle.get(), &shdrstrndx) == -1) { + return r; + } + + std::string debuglink; + // Iterate through the ELF sections to try to get a gnu_debuglink + // note and also to cache the symbol table. + // We go the preprocessor way to avoid having to create templated + // classes or using gelf (which might throw a compiler error if 64 bit + // is not supported +#define ELF_GET_DATA(ARCH) \ + Elf_Scn *elf_section = 0; \ + Elf_Data *elf_data = 0; \ + Elf##ARCH##_Shdr *section_header = 0; \ + Elf_Scn *symbol_section = 0; \ + size_t symbol_count = 0; \ + size_t symbol_strings = 0; \ + Elf##ARCH##_Sym *symbol = 0; \ + const char *section_name = 0; \ + \ + while ((elf_section = elf_nextscn(elf_handle.get(), elf_section)) != NULL) { \ + section_header = elf##ARCH##_getshdr(elf_section); \ + if (section_header == NULL) { \ + return r; \ + } \ + \ + if ((section_name = elf_strptr(elf_handle.get(), shdrstrndx, \ + section_header->sh_name)) == NULL) { \ + return r; \ + } \ + \ + if (cstrings_eq(section_name, ".gnu_debuglink")) { \ + elf_data = elf_getdata(elf_section, NULL); \ + if (elf_data && elf_data->d_size > 0) { \ + debuglink = \ + std::string(reinterpret_cast(elf_data->d_buf)); \ + } \ + } \ + \ + switch (section_header->sh_type) { \ + case SHT_SYMTAB: \ + symbol_section = elf_section; \ + symbol_count = section_header->sh_size / section_header->sh_entsize; \ + symbol_strings = section_header->sh_link; \ + break; \ + \ + /* We use .dynsyms as a last resort, we prefer .symtab */ \ + case SHT_DYNSYM: \ + if (!symbol_section) { \ + symbol_section = elf_section; \ + symbol_count = section_header->sh_size / section_header->sh_entsize; \ + symbol_strings = section_header->sh_link; \ + } \ + break; \ + } \ + } \ + \ + if (symbol_section && symbol_count && symbol_strings) { \ + elf_data = elf_getdata(symbol_section, NULL); \ + symbol = reinterpret_cast(elf_data->d_buf); \ + for (size_t i = 0; i < symbol_count; ++i) { \ + int type = ELF##ARCH##_ST_TYPE(symbol->st_info); \ + if (type == STT_FUNC && symbol->st_value > 0) { \ + r.symbol_cache[symbol->st_value] = std::string( \ + elf_strptr(elf_handle.get(), symbol_strings, symbol->st_name)); \ + } \ + ++symbol; \ + } \ + } + + if (e_ident[EI_CLASS] == ELFCLASS32) { + ELF_GET_DATA(32) + } else if (e_ident[EI_CLASS] == ELFCLASS64) { + // libelf might have been built without 64 bit support +#if __LIBELF64 + ELF_GET_DATA(64) +#endif + } + + if (!debuglink.empty()) { + // We have a debuglink section! Open an elf instance on that + // file instead. If we can't open the file, then return + // the elf handle we had already opened. + dwarf_file_t debuglink_file; + debuglink_file.reset(open(debuglink.c_str(), O_RDONLY)); + if (debuglink_file.get() > 0) { + dwarf_elf_t debuglink_elf; + debuglink_elf.reset(elf_begin(debuglink_file.get(), ELF_C_READ, NULL)); + + // If we have a valid elf handle, return the new elf handle + // and file handle and discard the original ones + if (debuglink_elf) { + elf_handle = move(debuglink_elf); + file_handle = move(debuglink_file); + } + } + } + + // Ok, we have a valid ELF handle, let's try to get debug symbols + Dwarf_Debug dwarf_debug; + Dwarf_Error error = DW_DLE_NE; + dwarf_handle_t dwarf_handle; + + int dwarf_result = dwarf_elf_init(elf_handle.get(), DW_DLC_READ, NULL, NULL, + &dwarf_debug, &error); + + // We don't do any special handling for DW_DLV_NO_ENTRY specially. + // If we get an error, or the file doesn't have debug information + // we just return. + if (dwarf_result != DW_DLV_OK) { + return r; + } + + dwarf_handle.reset(dwarf_debug); + + r.file_handle = move(file_handle); + r.elf_handle = move(elf_handle); + r.dwarf_handle = move(dwarf_handle); + + return r; + } + + die_cache_entry &get_die_cache(dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Error error = DW_DLE_NE; + + // Get the die offset, we use it as the cache key + Dwarf_Off die_offset; + if (dwarf_dieoffset(die, &die_offset, &error) != DW_DLV_OK) { + die_offset = 0; + } + + die_cache_t::iterator it = fobj.die_cache.find(die_offset); + + if (it != fobj.die_cache.end()) { + fobj.current_cu = &it->second; + return it->second; + } + + die_cache_entry &de = fobj.die_cache[die_offset]; + fobj.current_cu = &de; + + Dwarf_Addr line_addr; + Dwarf_Small table_count; + + // The addresses in the line section are not fully sorted (they might + // be sorted by block of code belonging to the same file), which makes + // it necessary to do so before searching is possible. + // + // As libdwarf allocates a copy of everything, let's get the contents + // of the line section and keep it around. We also create a map of + // program counter to line table indices so we can search by address + // and get the line buffer index. + // + // To make things more difficult, the same address can span more than + // one line, so we need to keep the index pointing to the first line + // by using insert instead of the map's [ operator. + + // Get the line context for the DIE + if (dwarf_srclines_b(die, 0, &table_count, &de.line_context, &error) == + DW_DLV_OK) { + // Get the source lines for this line context, to be deallocated + // later + if (dwarf_srclines_from_linecontext(de.line_context, &de.line_buffer, + &de.line_count, + &error) == DW_DLV_OK) { + + // Add all the addresses to our map + for (int i = 0; i < de.line_count; i++) { + if (dwarf_lineaddr(de.line_buffer[i], &line_addr, &error) != + DW_DLV_OK) { + line_addr = 0; + } + de.line_section.insert(std::pair(line_addr, i)); + } + } + } + + // For each CU, cache the function DIEs that contain the + // DW_AT_specification attribute. When building with -g3 the function + // DIEs are separated in declaration and specification, with the + // declaration containing only the name and parameters and the + // specification the low/high pc and other compiler attributes. + // + // We cache those specifications so we don't skip over the declarations, + // because they have no pc, and we can do namespace resolution for + // DWARF function names. + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Die current_die = 0; + if (dwarf_child(die, ¤t_die, &error) == DW_DLV_OK) { + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + if (tag_value == DW_TAG_subprogram || + tag_value == DW_TAG_inlined_subroutine) { + + Dwarf_Bool has_attr = 0; + if (dwarf_hasattr(current_die, DW_AT_specification, &has_attr, + &error) == DW_DLV_OK) { + if (has_attr) { + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_specification, &attr_mem, + &error) == DW_DLV_OK) { + Dwarf_Off spec_offset = 0; + if (dwarf_formref(attr_mem, &spec_offset, &error) == + DW_DLV_OK) { + Dwarf_Off spec_die_offset; + if (dwarf_dieoffset(current_die, &spec_die_offset, &error) == + DW_DLV_OK) { + de.spec_section[spec_offset] = spec_die_offset; + } + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + } + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + break; + } else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + } + return de; + } + + static Dwarf_Die get_referenced_die(Dwarf_Debug dwarf, Dwarf_Die die, + Dwarf_Half attr, bool global) { + Dwarf_Error error = DW_DLE_NE; + Dwarf_Attribute attr_mem; + + Dwarf_Die found_die = NULL; + if (dwarf_attr(die, attr, &attr_mem, &error) == DW_DLV_OK) { + Dwarf_Off offset; + int result = 0; + if (global) { + result = dwarf_global_formref(attr_mem, &offset, &error); + } else { + result = dwarf_formref(attr_mem, &offset, &error); + } + + if (result == DW_DLV_OK) { + if (dwarf_offdie(dwarf, offset, &found_die, &error) != DW_DLV_OK) { + found_die = NULL; + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + return found_die; + } + + static std::string get_referenced_die_name(Dwarf_Debug dwarf, Dwarf_Die die, + Dwarf_Half attr, bool global) { + Dwarf_Error error = DW_DLE_NE; + std::string value; + + Dwarf_Die found_die = get_referenced_die(dwarf, die, attr, global); + + if (found_die) { + char *name; + if (dwarf_diename(found_die, &name, &error) == DW_DLV_OK) { + if (name) { + value = std::string(name); + } + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } + dwarf_dealloc(dwarf, found_die, DW_DLA_DIE); + } + + return value; + } + + // Returns a spec DIE linked to the passed one. The caller should + // deallocate the DIE + static Dwarf_Die get_spec_die(dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Off die_offset; + if (fobj.current_cu && + dwarf_die_CU_offset(die, &die_offset, &error) == DW_DLV_OK) { + die_specmap_t::iterator it = + fobj.current_cu->spec_section.find(die_offset); + + // If we have a DIE that completes the current one, check if + // that one has the pc we are looking for + if (it != fobj.current_cu->spec_section.end()) { + Dwarf_Die spec_die = 0; + if (dwarf_offdie(dwarf, it->second, &spec_die, &error) == DW_DLV_OK) { + return spec_die; + } + } + } + + // Maybe we have an abstract origin DIE with the function information? + return get_referenced_die(fobj.dwarf_handle.get(), die, + DW_AT_abstract_origin, true); + } + + static bool die_has_pc(dwarf_fileobject &fobj, Dwarf_Die die, Dwarf_Addr pc) { + Dwarf_Addr low_pc = 0, high_pc = 0; + Dwarf_Half high_pc_form = 0; + Dwarf_Form_Class return_class; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + bool has_lowpc = false; + bool has_highpc = false; + bool has_ranges = false; + + if (dwarf_lowpc(die, &low_pc, &error) == DW_DLV_OK) { + // If we have a low_pc check if there is a high pc. + // If we don't have a high pc this might mean we have a base + // address for the ranges list or just an address. + has_lowpc = true; + + if (dwarf_highpc_b(die, &high_pc, &high_pc_form, &return_class, &error) == + DW_DLV_OK) { + // We do have a high pc. In DWARF 4+ this is an offset from the + // low pc, but in earlier versions it's an absolute address. + + has_highpc = true; + // In DWARF 2/3 this would be a DW_FORM_CLASS_ADDRESS + if (return_class == DW_FORM_CLASS_CONSTANT) { + high_pc = low_pc + high_pc; + } + + // We have low and high pc, check if our address + // is in that range + return pc >= low_pc && pc < high_pc; + } + } else { + // Reset the low_pc, in case dwarf_lowpc failing set it to some + // undefined value. + low_pc = 0; + } + + // Check if DW_AT_ranges is present and search for the PC in the + // returned ranges list. We always add the low_pc, as it not set it will + // be 0, in case we had a DW_AT_low_pc and DW_AT_ranges pair + bool result = false; + + Dwarf_Attribute attr; + if (dwarf_attr(die, DW_AT_ranges, &attr, &error) == DW_DLV_OK) { + + Dwarf_Off offset; + if (dwarf_global_formref(attr, &offset, &error) == DW_DLV_OK) { + Dwarf_Ranges *ranges; + Dwarf_Signed ranges_count = 0; + Dwarf_Unsigned byte_count = 0; + + if (dwarf_get_ranges_a(dwarf, offset, die, &ranges, &ranges_count, + &byte_count, &error) == DW_DLV_OK) { + has_ranges = ranges_count != 0; + for (int i = 0; i < ranges_count; i++) { + if (ranges[i].dwr_addr1 != 0 && + pc >= ranges[i].dwr_addr1 + low_pc && + pc < ranges[i].dwr_addr2 + low_pc) { + result = true; + break; + } + } + dwarf_ranges_dealloc(dwarf, ranges, ranges_count); + } + } + } + + // Last attempt. We might have a single address set as low_pc. + if (!result && low_pc != 0 && pc == low_pc) { + result = true; + } + + // If we don't have lowpc, highpc and ranges maybe this DIE is a + // declaration that relies on a DW_AT_specification DIE that happens + // later. Use the specification cache we filled when we loaded this CU. + if (!result && (!has_lowpc && !has_highpc && !has_ranges)) { + Dwarf_Die spec_die = get_spec_die(fobj, die); + if (spec_die) { + result = die_has_pc(fobj, spec_die, pc); + dwarf_dealloc(dwarf, spec_die, DW_DLA_DIE); + } + } + + return result; + } + + static void get_type(Dwarf_Debug dwarf, Dwarf_Die die, std::string &type) { + Dwarf_Error error = DW_DLE_NE; + + Dwarf_Die child = 0; + if (dwarf_child(die, &child, &error) == DW_DLV_OK) { + get_type(dwarf, child, type); + } + + if (child) { + type.insert(0, "::"); + dwarf_dealloc(dwarf, child, DW_DLA_DIE); + } + + char *name; + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + type.insert(0, std::string(name)); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } else { + type.insert(0, ""); + } + } + + static std::string get_type_by_signature(Dwarf_Debug dwarf, Dwarf_Die die) { + Dwarf_Error error = DW_DLE_NE; + + Dwarf_Sig8 signature; + Dwarf_Bool has_attr = 0; + if (dwarf_hasattr(die, DW_AT_signature, &has_attr, &error) == DW_DLV_OK) { + if (has_attr) { + Dwarf_Attribute attr_mem; + if (dwarf_attr(die, DW_AT_signature, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formsig8(attr_mem, &signature, &error) != DW_DLV_OK) { + return std::string(""); + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + } + + Dwarf_Unsigned next_cu_header; + Dwarf_Sig8 tu_signature; + std::string result; + bool found = false; + + while (dwarf_next_cu_header_d(dwarf, 0, 0, 0, 0, 0, 0, 0, &tu_signature, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + + if (strncmp(signature.signature, tu_signature.signature, 8) == 0) { + Dwarf_Die type_cu_die = 0; + if (dwarf_siblingof_b(dwarf, 0, 0, &type_cu_die, &error) == DW_DLV_OK) { + Dwarf_Die child_die = 0; + if (dwarf_child(type_cu_die, &child_die, &error) == DW_DLV_OK) { + get_type(dwarf, child_die, result); + found = !result.empty(); + dwarf_dealloc(dwarf, child_die, DW_DLA_DIE); + } + dwarf_dealloc(dwarf, type_cu_die, DW_DLA_DIE); + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 0, 0, 0, 0, 0, 0, 0, 0, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + // Reset the cu header state. Unfortunately, libdwarf's + // next_cu_header API keeps its own iterator per Dwarf_Debug + // that can't be reset. We need to keep fetching elements until + // the end. + } + } else { + // If we couldn't resolve the type just print out the signature + std::ostringstream string_stream; + string_stream << "<0x" << std::hex << std::setfill('0'); + for (int i = 0; i < 8; ++i) { + string_stream << std::setw(2) << std::hex + << (int)(unsigned char)(signature.signature[i]); + } + string_stream << ">"; + result = string_stream.str(); + } + return result; + } + + struct type_context_t { + bool is_const; + bool is_typedef; + bool has_type; + bool has_name; + std::string text; + + type_context_t() + : is_const(false), is_typedef(false), has_type(false), has_name(false) { + } + }; + + // Types are resolved from right to left: we get the variable name first + // and then all specifiers (like const or pointer) in a chain of DW_AT_type + // DIEs. Call this function recursively until we get a complete type + // string. + static void set_parameter_string(dwarf_fileobject &fobj, Dwarf_Die die, + type_context_t &context) { + char *name; + Dwarf_Error error = DW_DLE_NE; + + // typedefs contain also the base type, so we skip it and only + // print the typedef name + if (!context.is_typedef) { + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + if (!context.text.empty()) { + context.text.insert(0, " "); + } + context.text.insert(0, std::string(name)); + dwarf_dealloc(fobj.dwarf_handle.get(), name, DW_DLA_STRING); + } + } else { + context.is_typedef = false; + context.has_type = true; + if (context.is_const) { + context.text.insert(0, "const "); + context.is_const = false; + } + } + + bool next_type_is_const = false; + bool is_keyword = true; + + Dwarf_Half tag = 0; + Dwarf_Bool has_attr = 0; + if (dwarf_tag(die, &tag, &error) == DW_DLV_OK) { + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + context.has_type = true; + if (dwarf_hasattr(die, DW_AT_signature, &has_attr, &error) == + DW_DLV_OK) { + // If we have a signature it means the type is defined + // in .debug_types, so we need to load the DIE pointed + // at by the signature and resolve it + if (has_attr) { + std::string type = + get_type_by_signature(fobj.dwarf_handle.get(), die); + if (context.is_const) + type.insert(0, "const "); + + if (!context.text.empty()) + context.text.insert(0, " "); + context.text.insert(0, type); + } + + // Treat enums like typedefs, and skip printing its + // base type + context.is_typedef = (tag == DW_TAG_enumeration_type); + } + break; + case DW_TAG_const_type: + next_type_is_const = true; + break; + case DW_TAG_pointer_type: + context.text.insert(0, "*"); + break; + case DW_TAG_reference_type: + context.text.insert(0, "&"); + break; + case DW_TAG_restrict_type: + context.text.insert(0, "restrict "); + break; + case DW_TAG_rvalue_reference_type: + context.text.insert(0, "&&"); + break; + case DW_TAG_volatile_type: + context.text.insert(0, "volatile "); + break; + case DW_TAG_typedef: + // Propagate the const-ness to the next type + // as typedefs are linked to its base type + next_type_is_const = context.is_const; + context.is_typedef = true; + context.has_type = true; + break; + case DW_TAG_base_type: + context.has_type = true; + break; + case DW_TAG_formal_parameter: + context.has_name = true; + break; + default: + is_keyword = false; + break; + } + } + + if (!is_keyword && context.is_const) { + context.text.insert(0, "const "); + } + + context.is_const = next_type_is_const; + + Dwarf_Die ref = + get_referenced_die(fobj.dwarf_handle.get(), die, DW_AT_type, true); + if (ref) { + set_parameter_string(fobj, ref, context); + dwarf_dealloc(fobj.dwarf_handle.get(), ref, DW_DLA_DIE); + } + + if (!context.has_type && context.has_name) { + context.text.insert(0, "void "); + context.has_type = true; + } + } + + // Resolve the function return type and parameters + static void set_function_parameters(std::string &function_name, + std::vector &ns, + dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Die current_die = 0; + std::string parameters; + bool has_spec = true; + // Check if we have a spec DIE. If we do we use it as it contains + // more information, like parameter names. + Dwarf_Die spec_die = get_spec_die(fobj, die); + if (!spec_die) { + has_spec = false; + spec_die = die; + } + + std::vector::const_iterator it = ns.begin(); + std::string ns_name; + for (it = ns.begin(); it < ns.end(); ++it) { + ns_name.append(*it).append("::"); + } + + if (!ns_name.empty()) { + function_name.insert(0, ns_name); + } + + // See if we have a function return type. It can be either on the + // current die or in its spec one (usually true for inlined functions) + std::string return_type = + get_referenced_die_name(dwarf, die, DW_AT_type, true); + if (return_type.empty()) { + return_type = get_referenced_die_name(dwarf, spec_die, DW_AT_type, true); + } + if (!return_type.empty()) { + return_type.append(" "); + function_name.insert(0, return_type); + } + + if (dwarf_child(spec_die, ¤t_die, &error) == DW_DLV_OK) { + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + if (tag_value == DW_TAG_formal_parameter) { + // Ignore artificial (ie, compiler generated) parameters + bool is_artificial = false; + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_artificial, &attr_mem, &error) == + DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + is_artificial = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!is_artificial) { + type_context_t context; + set_parameter_string(fobj, current_die, context); + + if (parameters.empty()) { + parameters.append("("); + } else { + parameters.append(", "); + } + parameters.append(context.text); + } + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + break; + } else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + } + if (parameters.empty()) + parameters = "("; + parameters.append(")"); + + // If we got a spec DIE we need to deallocate it + if (has_spec) + dwarf_dealloc(dwarf, spec_die, DW_DLA_DIE); + + function_name.append(parameters); + } + + // defined here because in C++98, template function cannot take locally + // defined types... grrr. + struct inliners_search_cb { + void operator()(Dwarf_Die die, std::vector &ns) { + Dwarf_Error error = DW_DLE_NE; + Dwarf_Half tag_value; + Dwarf_Attribute attr_mem; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + + dwarf_tag(die, &tag_value, &error); + + switch (tag_value) { + char *name; + case DW_TAG_subprogram: + if (!trace.source.function.empty()) + break; + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + trace.source.function = std::string(name); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } else { + // We don't have a function name in this DIE. + // Check if there is a referenced non-defining + // declaration. + trace.source.function = + get_referenced_die_name(dwarf, die, DW_AT_abstract_origin, true); + if (trace.source.function.empty()) { + trace.source.function = + get_referenced_die_name(dwarf, die, DW_AT_specification, true); + } + } + + // Append the function parameters, if available + set_function_parameters(trace.source.function, ns, fobj, die); + + // If the object function name is empty, it's possible that + // there is no dynamic symbol table (maybe the executable + // was stripped or not built with -rdynamic). See if we have + // a DWARF linkage name to use instead. We try both + // linkage_name and MIPS_linkage_name because the MIPS tag + // was the unofficial one until it was adopted in DWARF4. + // Old gcc versions generate MIPS_linkage_name + if (trace.object_function.empty()) { + details::demangler demangler; + + if (dwarf_attr(die, DW_AT_linkage_name, &attr_mem, &error) != + DW_DLV_OK) { + if (dwarf_attr(die, DW_AT_MIPS_linkage_name, &attr_mem, &error) != + DW_DLV_OK) { + break; + } + } + + char *linkage; + if (dwarf_formstring(attr_mem, &linkage, &error) == DW_DLV_OK) { + trace.object_function = demangler.demangle(linkage); + dwarf_dealloc(dwarf, linkage, DW_DLA_STRING); + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + break; + + case DW_TAG_inlined_subroutine: + ResolvedTrace::SourceLoc sloc; + + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + sloc.function = std::string(name); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } else { + // We don't have a name for this inlined DIE, it could + // be that there is an abstract origin instead. + // Get the DW_AT_abstract_origin value, which is a + // reference to the source DIE and try to get its name + sloc.function = + get_referenced_die_name(dwarf, die, DW_AT_abstract_origin, true); + } + + set_function_parameters(sloc.function, ns, fobj, die); + + std::string file = die_call_file(dwarf, die, cu_die); + if (!file.empty()) + sloc.filename = file; + + Dwarf_Unsigned number = 0; + if (dwarf_attr(die, DW_AT_call_line, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &number, &error) == DW_DLV_OK) { + sloc.line = number; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (dwarf_attr(die, DW_AT_call_column, &attr_mem, &error) == + DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &number, &error) == DW_DLV_OK) { + sloc.col = number; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + trace.inliners.push_back(sloc); + break; + }; + } + ResolvedTrace &trace; + dwarf_fileobject &fobj; + Dwarf_Die cu_die; + inliners_search_cb(ResolvedTrace &t, dwarf_fileobject &f, Dwarf_Die c) + : trace(t), fobj(f), cu_die(c) {} + }; + + static Dwarf_Die find_fundie_by_pc(dwarf_fileobject &fobj, + Dwarf_Die parent_die, Dwarf_Addr pc, + Dwarf_Die result) { + Dwarf_Die current_die = 0; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + + if (dwarf_child(parent_die, ¤t_die, &error) != DW_DLV_OK) { + return NULL; + } + + for (;;) { + Dwarf_Die sibling_die = 0; + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + switch (tag_value) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: + if (die_has_pc(fobj, current_die, pc)) { + return current_die; + } + }; + bool declaration = false; + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_declaration, &attr_mem, &error) == + DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + declaration = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!declaration) { + // let's be curious and look deeper in the tree, functions are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + Dwarf_Die die_mem = 0; + Dwarf_Die indie = find_fundie_by_pc(fobj, current_die, pc, die_mem); + if (indie) { + result = die_mem; + return result; + } + } + + int res = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (res == DW_DLV_ERROR) { + return NULL; + } else if (res == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != parent_die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + return NULL; + } + + template + static bool deep_first_search_by_pc(dwarf_fileobject &fobj, + Dwarf_Die parent_die, Dwarf_Addr pc, + std::vector &ns, CB cb) { + Dwarf_Die current_die = 0; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + + if (dwarf_child(parent_die, ¤t_die, &error) != DW_DLV_OK) { + return false; + } + + bool branch_has_pc = false; + bool has_namespace = false; + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag; + if (dwarf_tag(current_die, &tag, &error) == DW_DLV_OK) { + if (tag == DW_TAG_namespace || tag == DW_TAG_class_type) { + char *ns_name = NULL; + if (dwarf_diename(current_die, &ns_name, &error) == DW_DLV_OK) { + if (ns_name) { + ns.push_back(std::string(ns_name)); + } else { + ns.push_back(""); + } + dwarf_dealloc(dwarf, ns_name, DW_DLA_STRING); + } else { + ns.push_back(""); + } + has_namespace = true; + } + } + + bool declaration = false; + Dwarf_Attribute attr_mem; + if (tag != DW_TAG_class_type && + dwarf_attr(current_die, DW_AT_declaration, &attr_mem, &error) == + DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + declaration = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!declaration) { + // let's be curious and look deeper in the tree, function are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + branch_has_pc = deep_first_search_by_pc(fobj, current_die, pc, ns, cb); + } + + if (!branch_has_pc) { + branch_has_pc = die_has_pc(fobj, current_die, pc); + } + + if (branch_has_pc) { + cb(current_die, ns); + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + return false; + } else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != parent_die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + if (has_namespace) { + has_namespace = false; + ns.pop_back(); + } + current_die = sibling_die; + } + + if (has_namespace) { + ns.pop_back(); + } + return branch_has_pc; + } + + static std::string die_call_file(Dwarf_Debug dwarf, Dwarf_Die die, + Dwarf_Die cu_die) { + Dwarf_Attribute attr_mem; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Unsigned file_index; + + std::string file; + + if (dwarf_attr(die, DW_AT_call_file, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &file_index, &error) != DW_DLV_OK) { + file_index = 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + + if (file_index == 0) { + return file; + } + + char **srcfiles = 0; + Dwarf_Signed file_count = 0; + if (dwarf_srcfiles(cu_die, &srcfiles, &file_count, &error) == DW_DLV_OK) { + if (file_count > 0 && file_index <= static_cast(file_count)) { + file = std::string(srcfiles[file_index - 1]); + } + + // Deallocate all strings! + for (int i = 0; i < file_count; ++i) { + dwarf_dealloc(dwarf, srcfiles[i], DW_DLA_STRING); + } + dwarf_dealloc(dwarf, srcfiles, DW_DLA_LIST); + } + } + return file; + } + + Dwarf_Die find_die(dwarf_fileobject &fobj, Dwarf_Addr addr) { + // Let's get to work! First see if we have a debug_aranges section so + // we can speed up the search + + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Arange *aranges; + Dwarf_Signed arange_count; + + Dwarf_Die returnDie; + bool found = false; + if (dwarf_get_aranges(dwarf, &aranges, &arange_count, &error) != + DW_DLV_OK) { + aranges = NULL; + } + + if (aranges) { + // We have aranges. Get the one where our address is. + Dwarf_Arange arange; + if (dwarf_get_arange(aranges, arange_count, addr, &arange, &error) == + DW_DLV_OK) { + + // We found our address. Get the compilation-unit DIE offset + // represented by the given address range. + Dwarf_Off cu_die_offset; + if (dwarf_get_cu_die_offset(arange, &cu_die_offset, &error) == + DW_DLV_OK) { + // Get the DIE at the offset returned by the aranges search. + // We set is_info to 1 to specify that the offset is from + // the .debug_info section (and not .debug_types) + int dwarf_result = + dwarf_offdie_b(dwarf, cu_die_offset, 1, &returnDie, &error); + + found = dwarf_result == DW_DLV_OK; + } + dwarf_dealloc(dwarf, arange, DW_DLA_ARANGE); + } + } + + if (found) + return returnDie; // The caller is responsible for freeing the die + + // The search for aranges failed. Try to find our address by scanning + // all compilation units. + Dwarf_Unsigned next_cu_header; + Dwarf_Half tag = 0; + returnDie = 0; + + while (!found && + dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + + if (returnDie) + dwarf_dealloc(dwarf, returnDie, DW_DLA_DIE); + + if (dwarf_siblingof(dwarf, 0, &returnDie, &error) == DW_DLV_OK) { + if ((dwarf_tag(returnDie, &tag, &error) == DW_DLV_OK) && + tag == DW_TAG_compile_unit) { + if (die_has_pc(fobj, returnDie, addr)) { + found = true; + } + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + // Reset the cu header state. Libdwarf's next_cu_header API + // keeps its own iterator per Dwarf_Debug that can't be reset. + // We need to keep fetching elements until the end. + } + } + + if (found) + return returnDie; + + // We couldn't find any compilation units with ranges or a high/low pc. + // Try again by looking at all DIEs in all compilation units. + Dwarf_Die cudie; + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + if (dwarf_siblingof(dwarf, 0, &cudie, &error) == DW_DLV_OK) { + Dwarf_Die die_mem = 0; + Dwarf_Die resultDie = find_fundie_by_pc(fobj, cudie, addr, die_mem); + + if (resultDie) { + found = true; + break; + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, + &next_cu_header, 0, &error) == DW_DLV_OK) { + // Reset the cu header state. Libdwarf's next_cu_header API + // keeps its own iterator per Dwarf_Debug that can't be reset. + // We need to keep fetching elements until the end. + } + } + + if (found) + return cudie; + + // We failed. + return NULL; + } +}; +#endif // BACKWARD_HAS_DWARF == 1 + +template <> +class TraceResolverImpl + : public TraceResolverLinuxImpl {}; + +#endif // BACKWARD_SYSTEM_LINUX + +#ifdef BACKWARD_SYSTEM_DARWIN + +template class TraceResolverDarwinImpl; + +template <> +class TraceResolverDarwinImpl + : public TraceResolverImplBase { +public: + template void load_stacktrace(ST &st) { + using namespace details; + if (st.size() == 0) { + return; + } + _symbols.reset(backtrace_symbols(st.begin(), st.size())); + } + + ResolvedTrace resolve(ResolvedTrace trace) { + // parse: + // + + char *filename = _symbols[trace.idx]; + + // skip " " + while (*filename && *filename != ' ') + filename++; + while (*filename == ' ') + filename++; + + // find start of from end ( may contain a space) + char *p = filename + strlen(filename) - 1; + // skip to start of " + " + while (p > filename && *p != ' ') + p--; + while (p > filename && *p == ' ') + p--; + while (p > filename && *p != ' ') + p--; + while (p > filename && *p == ' ') + p--; + char *funcname_end = p + 1; + + // skip to start of "" + while (p > filename && *p != ' ') + p--; + char *funcname = p + 1; + + // skip to start of " " + while (p > filename && *p == ' ') + p--; + while (p > filename && *p != ' ') + p--; + while (p > filename && *p == ' ') + p--; + + // skip "", handling the case where it contains a + char *filename_end = p + 1; + if (p == filename) { + // something went wrong, give up + filename_end = filename + strlen(filename); + funcname = filename_end; + } + trace.object_filename.assign( + filename, filename_end); // ok even if filename_end is the ending \0 + // (then we assign entire string) + + if (*funcname) { // if it's not end of string + *funcname_end = '\0'; + + trace.object_function = this->demangle(funcname); + trace.object_function += " "; + trace.object_function += (funcname_end + 1); + trace.source.function = trace.object_function; // we cannot do better. + } + return trace; + } + +private: + details::handle _symbols; +}; + +template <> +class TraceResolverImpl + : public TraceResolverDarwinImpl {}; + +#endif // BACKWARD_SYSTEM_DARWIN + +#ifdef BACKWARD_SYSTEM_WINDOWS + +// Load all symbol info +// Based on: +// https://stackoverflow.com/questions/6205981/windows-c-stack-trace-from-a-running-app/28276227#28276227 + +struct module_data { + std::string image_name; + std::string module_name; + void *base_address; + DWORD load_size; +}; + +class get_mod_info { + HANDLE process; + static const int buffer_length = 4096; + +public: + get_mod_info(HANDLE h) : process(h) {} + + module_data operator()(HMODULE module) { + module_data ret; + char temp[buffer_length]; + MODULEINFO mi; + + GetModuleInformation(process, module, &mi, sizeof(mi)); + ret.base_address = mi.lpBaseOfDll; + ret.load_size = mi.SizeOfImage; + + GetModuleFileNameExA(process, module, temp, sizeof(temp)); + ret.image_name = temp; + GetModuleBaseNameA(process, module, temp, sizeof(temp)); + ret.module_name = temp; + std::vector img(ret.image_name.begin(), ret.image_name.end()); + std::vector mod(ret.module_name.begin(), ret.module_name.end()); + SymLoadModule64(process, 0, &img[0], &mod[0], (DWORD64)ret.base_address, + ret.load_size); + return ret; + } +}; + +template <> class TraceResolverImpl { +public: + TraceResolverImpl() { + + HANDLE process = GetCurrentProcess(); + + std::vector modules; + DWORD cbNeeded; + std::vector module_handles(1); + SymInitialize(process, NULL, false); + DWORD symOptions = SymGetOptions(); + symOptions |= SYMOPT_LOAD_LINES | SYMOPT_UNDNAME; + SymSetOptions(symOptions); + EnumProcessModules(process, &module_handles[0], + module_handles.size() * sizeof(HMODULE), &cbNeeded); + module_handles.resize(cbNeeded / sizeof(HMODULE)); + EnumProcessModules(process, &module_handles[0], + module_handles.size() * sizeof(HMODULE), &cbNeeded); + std::transform(module_handles.begin(), module_handles.end(), + std::back_inserter(modules), get_mod_info(process)); + void *base = modules[0].base_address; + IMAGE_NT_HEADERS *h = ImageNtHeader(base); + image_type = h->FileHeader.Machine; + } + + template void load_stacktrace(ST &) {} + + static const int max_sym_len = 255; + struct symbol_t { + SYMBOL_INFO sym; + char buffer[max_sym_len]; + } sym; + + DWORD64 displacement; + + ResolvedTrace resolve(ResolvedTrace t) { + HANDLE process = GetCurrentProcess(); + + char name[256]; + + memset(&sym, 0, sizeof(sym)); + sym.sym.SizeOfStruct = sizeof(SYMBOL_INFO); + sym.sym.MaxNameLen = max_sym_len; + + if (!SymFromAddr(process, (ULONG64)t.addr, &displacement, &sym.sym)) { + // TODO: error handling everywhere + char* lpMsgBuf; + DWORD dw = GetLastError(); + + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (char*)&lpMsgBuf, 0, NULL); + + printf(lpMsgBuf); + + // abort(); + } + UnDecorateSymbolName(sym.sym.Name, (PSTR)name, 256, UNDNAME_COMPLETE); + + DWORD offset = 0; + IMAGEHLP_LINE line; + if (SymGetLineFromAddr(process, (ULONG64)t.addr, &offset, &line)) { + t.object_filename = line.FileName; + t.source.filename = line.FileName; + t.source.line = line.LineNumber; + t.source.col = offset; + } + + t.source.function = name; + t.object_filename = ""; + t.object_function = name; + + return t; + } + + DWORD machine_type() const { return image_type; } + +private: + DWORD image_type; +}; + +#endif + +class TraceResolver : public TraceResolverImpl {}; + +/*************** CODE SNIPPET ***************/ + +class SourceFile { +public: + typedef std::vector> lines_t; + + SourceFile() {} + SourceFile(const std::string &path) { + // 1. If BACKWARD_CXX_SOURCE_PREFIXES is set then assume it contains + // a colon-separated list of path prefixes. Try prepending each + // to the given path until a valid file is found. + const std::vector &prefixes = get_paths_from_env_variable(); + for (size_t i = 0; i < prefixes.size(); ++i) { + // Double slashes (//) should not be a problem. + std::string new_path = prefixes[i] + '/' + path; + _file.reset(new std::ifstream(new_path.c_str())); + if (is_open()) + break; + } + // 2. If no valid file found then fallback to opening the path as-is. + if (!_file || !is_open()) { + _file.reset(new std::ifstream(path.c_str())); + } + } + bool is_open() const { return _file->is_open(); } + + lines_t &get_lines(unsigned line_start, unsigned line_count, lines_t &lines) { + using namespace std; + // This function make uses of the dumbest algo ever: + // 1) seek(0) + // 2) read lines one by one and discard until line_start + // 3) read line one by one until line_start + line_count + // + // If you are getting snippets many time from the same file, it is + // somewhat a waste of CPU, feel free to benchmark and propose a + // better solution ;) + + _file->clear(); + _file->seekg(0); + string line; + unsigned line_idx; + + for (line_idx = 1; line_idx < line_start; ++line_idx) { + std::getline(*_file, line); + if (!*_file) { + return lines; + } + } + + // think of it like a lambda in C++98 ;) + // but look, I will reuse it two times! + // What a good boy am I. + struct isspace { + bool operator()(char c) { return std::isspace(c); } + }; + + bool started = false; + for (; line_idx < line_start + line_count; ++line_idx) { + getline(*_file, line); + if (!*_file) { + return lines; + } + if (!started) { + if (std::find_if(line.begin(), line.end(), not_isspace()) == line.end()) + continue; + started = true; + } + lines.push_back(make_pair(line_idx, line)); + } + + lines.erase( + std::find_if(lines.rbegin(), lines.rend(), not_isempty()).base(), + lines.end()); + return lines; + } + + lines_t get_lines(unsigned line_start, unsigned line_count) { + lines_t lines; + return get_lines(line_start, line_count, lines); + } + + // there is no find_if_not in C++98, lets do something crappy to + // workaround. + struct not_isspace { + bool operator()(char c) { return !std::isspace(c); } + }; + // and define this one here because C++98 is not happy with local defined + // struct passed to template functions, fuuuu. + struct not_isempty { + bool operator()(const lines_t::value_type &p) { + return !(std::find_if(p.second.begin(), p.second.end(), not_isspace()) == + p.second.end()); + } + }; + + void swap(SourceFile &b) { _file.swap(b._file); } + +#ifdef BACKWARD_ATLEAST_CXX11 + SourceFile(SourceFile &&from) : _file(nullptr) { swap(from); } + SourceFile &operator=(SourceFile &&from) { + swap(from); + return *this; + } +#else + explicit SourceFile(const SourceFile &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + } + SourceFile &operator=(const SourceFile &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + return *this; + } +#endif + +private: + details::handle> + _file; + + std::vector get_paths_from_env_variable_impl() { + std::vector paths; + const char *prefixes_str = std::getenv("BACKWARD_CXX_SOURCE_PREFIXES"); + if (prefixes_str && prefixes_str[0]) { + paths = details::split_source_prefixes(prefixes_str); + } + return paths; + } + + const std::vector &get_paths_from_env_variable() { + static std::vector paths = get_paths_from_env_variable_impl(); + return paths; + } + +#ifdef BACKWARD_ATLEAST_CXX11 + SourceFile(const SourceFile &) = delete; + SourceFile &operator=(const SourceFile &) = delete; +#endif +}; + +class SnippetFactory { +public: + typedef SourceFile::lines_t lines_t; + + lines_t get_snippet(const std::string &filename, unsigned line_start, + unsigned context_size) { + + SourceFile &src_file = get_src_file(filename); + unsigned start = line_start - context_size / 2; + return src_file.get_lines(start, context_size); + } + + lines_t get_combined_snippet(const std::string &filename_a, unsigned line_a, + const std::string &filename_b, unsigned line_b, + unsigned context_size) { + SourceFile &src_file_a = get_src_file(filename_a); + SourceFile &src_file_b = get_src_file(filename_b); + + lines_t lines = + src_file_a.get_lines(line_a - context_size / 4, context_size / 2); + src_file_b.get_lines(line_b - context_size / 4, context_size / 2, lines); + return lines; + } + + lines_t get_coalesced_snippet(const std::string &filename, unsigned line_a, + unsigned line_b, unsigned context_size) { + SourceFile &src_file = get_src_file(filename); + + using std::max; + using std::min; + unsigned a = min(line_a, line_b); + unsigned b = max(line_a, line_b); + + if ((b - a) < (context_size / 3)) { + return src_file.get_lines((a + b - context_size + 1) / 2, context_size); + } + + lines_t lines = src_file.get_lines(a - context_size / 4, context_size / 2); + src_file.get_lines(b - context_size / 4, context_size / 2, lines); + return lines; + } + +private: + typedef details::hashtable::type src_files_t; + src_files_t _src_files; + + SourceFile &get_src_file(const std::string &filename) { + src_files_t::iterator it = _src_files.find(filename); + if (it != _src_files.end()) { + return it->second; + } + SourceFile &new_src_file = _src_files[filename]; + new_src_file = SourceFile(filename); + return new_src_file; + } +}; + +/*************** PRINTER ***************/ + +namespace ColorMode { +enum type { automatic, never, always }; +} + +class cfile_streambuf : public std::streambuf { +public: + cfile_streambuf(FILE *_sink) : sink(_sink) {} + int_type underflow() override { return traits_type::eof(); } + int_type overflow(int_type ch) override { + if (traits_type::not_eof(ch) && fwrite(&ch, sizeof ch, 1, sink) == 1) { + return ch; + } + return traits_type::eof(); + } + + std::streamsize xsputn(const char_type *s, std::streamsize count) override { + return static_cast( + fwrite(s, sizeof *s, static_cast(count), sink)); + } + +#ifdef BACKWARD_ATLEAST_CXX11 +public: + cfile_streambuf(const cfile_streambuf &) = delete; + cfile_streambuf &operator=(const cfile_streambuf &) = delete; +#else + private: + cfile_streambuf(const cfile_streambuf &); + cfile_streambuf &operator=(const cfile_streambuf &); +#endif + +private: + FILE *sink; + std::vector buffer; +}; + +#ifdef BACKWARD_SYSTEM_LINUX + +namespace Color { +enum type { yellow = 33, purple = 35, reset = 39 }; +} // namespace Color + +class Colorize { +public: + Colorize(std::ostream &os) : _os(os), _reset(false), _enabled(false) {} + + void activate(ColorMode::type mode) { _enabled = mode == ColorMode::always; } + + void activate(ColorMode::type mode, FILE *fp) { activate(mode, fileno(fp)); } + + void set_color(Color::type ccode) { + if (!_enabled) + return; + + // I assume that the terminal can handle basic colors. Seriously I + // don't want to deal with all the termcap shit. + _os << "\033[" << static_cast(ccode) << "m"; + _reset = (ccode != Color::reset); + } + + ~Colorize() { + if (_reset) { + set_color(Color::reset); + } + } + +private: + void activate(ColorMode::type mode, int fd) { + activate(mode == ColorMode::automatic && isatty(fd) ? ColorMode::always + : mode); + } + + std::ostream &_os; + bool _reset; + bool _enabled; +}; + +#else // ndef BACKWARD_SYSTEM_LINUX + +namespace Color { +enum type { yellow = 0, purple = 0, reset = 0 }; +} // namespace Color + +class Colorize { +public: + Colorize(std::ostream &) {} + void activate(ColorMode::type) {} + void activate(ColorMode::type, FILE *) {} + void set_color(Color::type) {} +}; + +#endif // BACKWARD_SYSTEM_LINUX + +class Printer { +public: + bool snippet; + ColorMode::type color_mode; + bool address; + bool object; + int inliner_context_size; + int trace_context_size; + + Printer() + : snippet(true), color_mode(ColorMode::automatic), address(false), + object(false), inliner_context_size(5), trace_context_size(7) {} + + template FILE *print(ST &st, FILE *fp = stderr) { + cfile_streambuf obuf(fp); + std::ostream os(&obuf); + Colorize colorize(os); + colorize.activate(color_mode, fp); + print_stacktrace(st, os, colorize); + return fp; + } + + template std::ostream &print(ST &st, std::ostream &os) { + Colorize colorize(os); + colorize.activate(color_mode); + print_stacktrace(st, os, colorize); + return os; + } + + template + FILE *print(IT begin, IT end, FILE *fp = stderr, size_t thread_id = 0) { + cfile_streambuf obuf(fp); + std::ostream os(&obuf); + Colorize colorize(os); + colorize.activate(color_mode, fp); + print_stacktrace(begin, end, os, thread_id, colorize); + return fp; + } + + template + std::ostream &print(IT begin, IT end, std::ostream &os, + size_t thread_id = 0) { + Colorize colorize(os); + colorize.activate(color_mode); + print_stacktrace(begin, end, os, thread_id, colorize); + return os; + } + + TraceResolver const &resolver() const { return _resolver; } + +private: + TraceResolver _resolver; + SnippetFactory _snippets; + + template + void print_stacktrace(ST &st, std::ostream &os, Colorize &colorize) { + print_header(os, st.thread_id()); + _resolver.load_stacktrace(st); + for (size_t trace_idx = st.size(); trace_idx > 0; --trace_idx) { + print_trace(os, _resolver.resolve(st[trace_idx - 1]), colorize); + } + } + + template + void print_stacktrace(IT begin, IT end, std::ostream &os, size_t thread_id, + Colorize &colorize) { + print_header(os, thread_id); + for (; begin != end; ++begin) { + print_trace(os, *begin, colorize); + } + } + + void print_header(std::ostream &os, size_t thread_id) { + os << "Stack trace (most recent call last)"; + if (thread_id) { + os << " in thread " << thread_id; + } + os << ":\n"; + } + + void print_trace(std::ostream &os, const ResolvedTrace &trace, + Colorize &colorize) { + os << "#" << std::left << std::setw(2) << trace.idx << std::right; + bool already_indented = true; + + if (!trace.source.filename.size() || object) { + os << " Object \"" << trace.object_filename << "\", at " << trace.addr + << ", in " << trace.object_function << "\n"; + already_indented = false; + } + + for (size_t inliner_idx = trace.inliners.size(); inliner_idx > 0; + --inliner_idx) { + if (!already_indented) { + os << " "; + } + const ResolvedTrace::SourceLoc &inliner_loc = + trace.inliners[inliner_idx - 1]; + print_source_loc(os, " | ", inliner_loc); + if (snippet) { + print_snippet(os, " | ", inliner_loc, colorize, Color::purple, + inliner_context_size); + } + already_indented = false; + } + + if (trace.source.filename.size()) { + if (!already_indented) { + os << " "; + } + print_source_loc(os, " ", trace.source, trace.addr); + if (snippet) { + print_snippet(os, " ", trace.source, colorize, Color::yellow, + trace_context_size); + } + } + } + + void print_snippet(std::ostream &os, const char *indent, + const ResolvedTrace::SourceLoc &source_loc, + Colorize &colorize, Color::type color_code, + int context_size) { + using namespace std; + typedef SnippetFactory::lines_t lines_t; + + lines_t lines = _snippets.get_snippet(source_loc.filename, source_loc.line, + static_cast(context_size)); + + for (lines_t::const_iterator it = lines.begin(); it != lines.end(); ++it) { + if (it->first == source_loc.line) { + colorize.set_color(color_code); + os << indent << ">"; + } else { + os << indent << " "; + } + os << std::setw(4) << it->first << ": " << it->second << "\n"; + if (it->first == source_loc.line) { + colorize.set_color(Color::reset); + } + } + } + + void print_source_loc(std::ostream &os, const char *indent, + const ResolvedTrace::SourceLoc &source_loc, + void *addr = nullptr) { + os << indent << "Source \"" << source_loc.filename << "\", line " + << source_loc.line << ", in " << source_loc.function; + + if (address && addr != nullptr) { + os << " [" << addr << "]"; + } + os << "\n"; + } +}; + +/*************** SIGNALS HANDLING ***************/ + +#if defined(BACKWARD_SYSTEM_LINUX) || defined(BACKWARD_SYSTEM_DARWIN) + +class SignalHandling { +public: + static std::vector make_default_signals() { + const int posix_signals[] = { + // Signals for which the default action is "Core". + SIGABRT, // Abort signal from abort(3) + SIGBUS, // Bus error (bad memory access) + SIGFPE, // Floating point exception + SIGILL, // Illegal Instruction + SIGIOT, // IOT trap. A synonym for SIGABRT + SIGQUIT, // Quit from keyboard + SIGSEGV, // Invalid memory reference + SIGSYS, // Bad argument to routine (SVr4) + SIGTRAP, // Trace/breakpoint trap + SIGXCPU, // CPU time limit exceeded (4.2BSD) + SIGXFSZ, // File size limit exceeded (4.2BSD) +#if defined(BACKWARD_SYSTEM_DARWIN) + SIGEMT, // emulation instruction executed +#endif + }; + return std::vector(posix_signals, + posix_signals + + sizeof posix_signals / sizeof posix_signals[0]); + } + + SignalHandling(const std::vector &posix_signals = make_default_signals()) + : _loaded(false) { + bool success = true; + + const size_t stack_size = 1024 * 1024 * 8; + _stack_content.reset(static_cast(malloc(stack_size))); + if (_stack_content) { + stack_t ss; + ss.ss_sp = _stack_content.get(); + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, nullptr) < 0) { + success = false; + } + } else { + success = false; + } + + for (size_t i = 0; i < posix_signals.size(); ++i) { + struct sigaction action; + memset(&action, 0, sizeof action); + action.sa_flags = + static_cast(SA_SIGINFO | SA_ONSTACK | SA_NODEFER | SA_RESETHAND); + sigfillset(&action.sa_mask); + sigdelset(&action.sa_mask, posix_signals[i]); +#if defined(__clang__) + #pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdisabled-macro-expansion" +#endif + action.sa_sigaction = &sig_handler; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + + int r = sigaction(posix_signals[i], &action, nullptr); + if (r < 0) + success = false; + } + + _loaded = success; + } + + bool loaded() const { return _loaded; } + + static void handleSignal(int, siginfo_t *info, void *_ctx) { + ucontext_t *uctx = static_cast(_ctx); + + StackTrace st; + void *error_addr = nullptr; +#ifdef REG_RIP // x86_64 + error_addr = reinterpret_cast(uctx->uc_mcontext.gregs[REG_RIP]); +#elif defined(REG_EIP) // x86_32 + error_addr = reinterpret_cast(uctx->uc_mcontext.gregs[REG_EIP]); +#elif defined(__arm__) + error_addr = reinterpret_cast(uctx->uc_mcontext.arm_pc); +#elif defined(__aarch64__) + #if defined(__APPLE__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__pc); + #else + error_addr = reinterpret_cast(uctx->uc_mcontext.pc); + #endif +#elif defined(__mips__) + error_addr = reinterpret_cast( + reinterpret_cast(&uctx->uc_mcontext)->sc_pc); +#elif defined(__ppc__) || defined(__powerpc) || defined(__powerpc__) || \ + defined(__POWERPC__) + error_addr = reinterpret_cast(uctx->uc_mcontext.regs->nip); +#elif defined(__s390x__) + error_addr = reinterpret_cast(uctx->uc_mcontext.psw.addr); +#elif defined(__APPLE__) && defined(__x86_64__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__rip); +#elif defined(__APPLE__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__eip); +#else +#warning ":/ sorry, ain't know no nothing none not of your architecture!" +#endif + if (error_addr) { + st.load_from(error_addr, 32, reinterpret_cast(uctx), + info->si_addr); + } else { + st.load_here(32, reinterpret_cast(uctx), info->si_addr); + } + + Printer printer; + printer.address = true; + printer.print(st, stderr); + +#if _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L + psiginfo(info, nullptr); +#else + (void)info; +#endif + } + +private: + details::handle _stack_content; + bool _loaded; + +#ifdef __GNUC__ + __attribute__((noreturn)) +#endif + static void + sig_handler(int signo, siginfo_t *info, void *_ctx) { + handleSignal(signo, info, _ctx); + + // try to forward the signal. + raise(info->si_signo); + + // terminate the process immediately. + puts("watf? exit"); + _exit(EXIT_FAILURE); + } +}; + +#endif // BACKWARD_SYSTEM_LINUX || BACKWARD_SYSTEM_DARWIN + +#ifdef BACKWARD_SYSTEM_WINDOWS + +class SignalHandling { +public: + SignalHandling(const std::vector & = std::vector()) + : reporter_thread_([]() { + /* We handle crashes in a utility thread: + backward structures and some Windows functions called here + need stack space, which we do not have when we encounter a + stack overflow. + To support reporting stack traces during a stack overflow, + we create a utility thread at startup, which waits until a + crash happens or the program exits normally. */ + + { + std::unique_lock lk(mtx()); + cv().wait(lk, [] { return crashed() != crash_status::running; }); + } + if (crashed() == crash_status::crashed) { + handle_stacktrace(skip_recs()); + } + { + std::unique_lock lk(mtx()); + crashed() = crash_status::ending; + } + cv().notify_one(); + }) { + SetUnhandledExceptionFilter(crash_handler); + + signal(SIGABRT, signal_handler); + _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); + + std::set_terminate(&terminator); + std::set_unexpected(&terminator); + _set_purecall_handler(&terminator); + _set_invalid_parameter_handler(&invalid_parameter_handler); + } + bool loaded() const { return true; } + + ~SignalHandling() { + { + std::unique_lock lk(mtx()); + crashed() = crash_status::normal_exit; + } + + cv().notify_one(); + + reporter_thread_.join(); + } + +private: + static CONTEXT *ctx() { + static CONTEXT data; + return &data; + } + + enum class crash_status { running, crashed, normal_exit, ending }; + + static crash_status &crashed() { + static crash_status data; + return data; + } + + static std::mutex &mtx() { + static std::mutex data; + return data; + } + + static std::condition_variable &cv() { + static std::condition_variable data; + return data; + } + + static HANDLE &thread_handle() { + static HANDLE handle; + return handle; + } + + std::thread reporter_thread_; + + // TODO: how not to hardcode these? + static const constexpr int signal_skip_recs = +#ifdef __clang__ + // With clang, RtlCaptureContext also captures the stack frame of the + // current function Below that, there ar 3 internal Windows functions + 4 +#else + // With MSVC cl, RtlCaptureContext misses the stack frame of the current + // function The first entries during StackWalk are the 3 internal Windows + // functions + 3 +#endif + ; + + static int &skip_recs() { + static int data; + return data; + } + + static inline void terminator() { + crash_handler(signal_skip_recs); + abort(); + } + + static inline void signal_handler(int) { + crash_handler(signal_skip_recs); + abort(); + } + + static inline void __cdecl invalid_parameter_handler(const wchar_t *, + const wchar_t *, + const wchar_t *, + unsigned int, + uintptr_t) { + crash_handler(signal_skip_recs); + abort(); + } + + NOINLINE static LONG WINAPI crash_handler(EXCEPTION_POINTERS *info) { + // The exception info supplies a trace from exactly where the issue was, + // no need to skip records + crash_handler(0, info->ContextRecord); + return EXCEPTION_CONTINUE_SEARCH; + } + + NOINLINE static void crash_handler(int skip, CONTEXT *ct = nullptr) { + + if (ct == nullptr) { + RtlCaptureContext(ctx()); + } else { + memcpy(ctx(), ct, sizeof(CONTEXT)); + } + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &thread_handle(), 0, FALSE, + DUPLICATE_SAME_ACCESS); + + skip_recs() = skip; + + { + std::unique_lock lk(mtx()); + crashed() = crash_status::crashed; + } + + cv().notify_one(); + + { + std::unique_lock lk(mtx()); + cv().wait(lk, [] { return crashed() != crash_status::crashed; }); + } + } + + static void handle_stacktrace(int skip_frames = 0) { + // printer creates the TraceResolver, which can supply us a machine type + // for stack walking. Without this, StackTrace can only guess using some + // macros. + // StackTrace also requires that the PDBs are already loaded, which is done + // in the constructor of TraceResolver + Printer printer; + + StackTrace st; + st.set_machine_type(printer.resolver().machine_type()); + st.set_context(ctx()); + st.set_thread_handle(thread_handle()); + st.load_here(32 + skip_frames); + st.skip_n_firsts(skip_frames); + + printer.address = true; + printer.print(st, std::cerr); + } +}; + +#endif // BACKWARD_SYSTEM_WINDOWS + +#ifdef BACKWARD_SYSTEM_UNKNOWN + +class SignalHandling { +public: + SignalHandling(const std::vector & = std::vector()) {} + bool init() { return false; } + bool loaded() { return false; } +}; + +#endif // BACKWARD_SYSTEM_UNKNOWN + +} // namespace backward + +#endif /* H_GUARD */ \ No newline at end of file diff --git a/scripts/portable_binary_builder/build_portable_binary_core.sh b/scripts/portable_binary_builder/build_portable_binary_core.sh index 85a25b98..26eca935 100644 --- a/scripts/portable_binary_builder/build_portable_binary_core.sh +++ b/scripts/portable_binary_builder/build_portable_binary_core.sh @@ -8,14 +8,14 @@ source /hbb_exe/activate set -eux # install packages needed to compile -yum install wget git -y +yum install wget git binutils-devel -y # compile pandora cd io mkdir build_portable_executable cd build_portable_executable -cmake .. -make VERBOSE=1 -j 8 +cmake -DPRINT_STACKTRACE=True .. +make VERBOSE=1 -j 4 ctest -VV # verify if the binary is portable diff --git a/src/backward.cpp b/src/backward.cpp new file mode 100644 index 00000000..36616fc6 --- /dev/null +++ b/src/backward.cpp @@ -0,0 +1,42 @@ +// Pick your poison. +// +// On GNU/Linux, you have few choices to get the most out of your stack trace. +// +// By default you get: +// - object filename +// - function name +// +// In order to add: +// - source filename +// - line and column numbers +// - source code snippet (assuming the file is accessible) + +// Install one of the following libraries then uncomment one of the macro (or +// better, add the detection of the lib and the macro definition in your build +// system) + +// - apt-get install libdw-dev ... +// - g++/clang++ -ldw ... +// #define BACKWARD_HAS_DW 1 + +// - apt-get install binutils-dev ... +// - g++/clang++ -lbfd ... +// #define BACKWARD_HAS_BFD 1 + +// - apt-get install libdwarf-dev ... +// - g++/clang++ -ldwarf ... +// #define BACKWARD_HAS_DWARF 1 + +// Regardless of the library you choose to read the debug information, +// for potentially more detailed stack traces you can use libunwind +// - apt-get install libunwind-dev +// - g++/clang++ -lunwind +// #define BACKWARD_HAS_LIBUNWIND 1 + +#include "backward.hpp" + +namespace backward { + +backward::SignalHandling sh; + +} // namespace backward \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8d224d89..f3833537 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,6 +21,7 @@ target_link_libraries(${PROJECT_NAME}_test ${ZLIB_LIBRARY} ${CMAKE_DL_LIBS} ${STATIC_C_CXX} + ${BACKWARD_LIBRARIES} ) add_test(NAME ${PROJECT_NAME}_test COMMAND ${PROJECT_NAME}_test) From 93abb1ff86bc31e644416f0f1f48fb3535374931 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 19 Jan 2021 22:35:58 -0400 Subject: [PATCH 08/37] Refactoring backward, CLI11 and inthash into thirdparty --- CMakeLists.txt | 7 ++++++- {include => src}/OptionsAggregator.cpp | 0 test/CMakeLists.txt | 3 +-- {include => thirdparty/include}/CLI11.hpp | 0 {include => thirdparty/include}/backward.hpp | 0 {include => thirdparty/include}/inthash.h | 0 {src => thirdparty/src}/backward.cpp | 0 {src => thirdparty/src}/inthash.cpp | 0 8 files changed, 7 insertions(+), 3 deletions(-) rename {include => src}/OptionsAggregator.cpp (100%) rename {include => thirdparty/include}/CLI11.hpp (100%) rename {include => thirdparty/include}/backward.hpp (100%) rename {include => thirdparty/include}/inthash.h (100%) rename {src => thirdparty/src}/backward.cpp (100%) rename {src => thirdparty/src}/inthash.cpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e879bde..19b45bd9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,6 @@ else() set(BACKWARD_LIBRARIES "") endif() - # add a RELEASE_WITH_ASSERTS build type - TODO: FIX THIS set(CMAKE_CXX_FLAGS_RELEASE_WITH_ASSERTS "-O3") @@ -110,6 +109,8 @@ include_directories(SYSTEM include_directories( ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src + ${PROJECT_SOURCE_DIR}/thirdparty/include + ${PROJECT_SOURCE_DIR}/thirdparty/src ) file(GLOB_RECURSE SRC_FILES @@ -120,6 +121,10 @@ file(GLOB_RECURSE SRC_FILES ${PROJECT_SOURCE_DIR}/include/*.h ${PROJECT_SOURCE_DIR}/include/*/*.hpp ${PROJECT_SOURCE_DIR}/include/*/*.h + + ${PROJECT_SOURCE_DIR}/thirdparty/src/*.cpp + ${PROJECT_SOURCE_DIR}/thirdparty/include/*.hpp + ${PROJECT_SOURCE_DIR}/thirdparty/include/*.h ) add_executable(${PROJECT_NAME} ${SRC_FILES}) diff --git a/include/OptionsAggregator.cpp b/src/OptionsAggregator.cpp similarity index 100% rename from include/OptionsAggregator.cpp rename to src/OptionsAggregator.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f3833537..e4dabd8c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,5 @@ file(GLOB_RECURSE TEST_SRC_FILES - ${PROJECT_SOURCE_DIR}/src/*.cpp - ${PROJECT_SOURCE_DIR}/src/*/*.cpp + ${SRC_FILES} ${PROJECT_SOURCE_DIR}/test/*.cpp) list(REMOVE_ITEM TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/src/main.cpp) add_executable(${PROJECT_NAME}_test ${TEST_SRC_FILES}) diff --git a/include/CLI11.hpp b/thirdparty/include/CLI11.hpp similarity index 100% rename from include/CLI11.hpp rename to thirdparty/include/CLI11.hpp diff --git a/include/backward.hpp b/thirdparty/include/backward.hpp similarity index 100% rename from include/backward.hpp rename to thirdparty/include/backward.hpp diff --git a/include/inthash.h b/thirdparty/include/inthash.h similarity index 100% rename from include/inthash.h rename to thirdparty/include/inthash.h diff --git a/src/backward.cpp b/thirdparty/src/backward.cpp similarity index 100% rename from src/backward.cpp rename to thirdparty/src/backward.cpp diff --git a/src/inthash.cpp b/thirdparty/src/inthash.cpp similarity index 100% rename from src/inthash.cpp rename to thirdparty/src/inthash.cpp From 5cc162e779510160e0159eda114543588f1ac203 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 19 Jan 2021 23:34:05 -0400 Subject: [PATCH 09/37] Refactoring fatal_errors() as specified by the first batch of Michael's reviews --- include/OptionsAggregator.h | 3 + include/compare_main.h | 1 + include/estimate_parameters.h | 1 + include/fastaq.h | 3 +- include/fatal_error.h | 45 ++++++++++ include/kmergraph.h | 1 + include/kmergraphwithcoverage.h | 1 + include/localgraph.h | 7 ++ include/utils.h | 22 +---- src/compare_main.cpp | 7 +- src/estimate_parameters.cpp | 7 +- src/fastaq.cpp | 16 +++- src/kmergraph.cpp | 105 ++++++++++------------ src/kmergraphwithcoverage.cpp | 130 ++++++++++++---------------- src/localgraph.cpp | 21 +++-- src/prg/path.cpp | 2 +- src/utils.cpp | 4 +- test/estimate_parameters_test.cpp | 22 +++-- test/fastaq_test.cpp | 13 +-- test/kmergraph_test.cpp | 7 +- test/kmergraphwithcoverage_test.cpp | 28 +++--- test/localgraph_test.cpp | 7 +- test/test_helpers.h | 26 ++++++ 23 files changed, 273 insertions(+), 206 deletions(-) create mode 100644 include/fatal_error.h diff --git a/include/OptionsAggregator.h b/include/OptionsAggregator.h index ec835b8f..c7cd1e22 100644 --- a/include/OptionsAggregator.h +++ b/include/OptionsAggregator.h @@ -1,6 +1,9 @@ #ifndef PANDORA_OPTIONSAGGREGATOR_H #define PANDORA_OPTIONSAGGREGATOR_H +#include +#include + class GenotypingOptions { private: std::vector sample_index_to_exp_depth_covg; diff --git a/include/compare_main.h b/include/compare_main.h index 3998af14..3d85cf3d 100644 --- a/include/compare_main.h +++ b/include/compare_main.h @@ -24,6 +24,7 @@ #include "estimate_parameters.h" #include "OptionsAggregator.h" #include "CLI11.hpp" +#include "fatal_error.h" using std::set; using std::vector; diff --git a/include/estimate_parameters.h b/include/estimate_parameters.h index 4a3259c5..fb58fdd1 100644 --- a/include/estimate_parameters.h +++ b/include/estimate_parameters.h @@ -5,6 +5,7 @@ #include #include #include +#include "fatal_error.h" namespace fs = boost::filesystem; diff --git a/include/fastaq.h b/include/fastaq.h index 761915a0..181577e9 100644 --- a/include/fastaq.h +++ b/include/fastaq.h @@ -5,8 +5,6 @@ #include #include #include -#include - #include #include #include @@ -14,6 +12,7 @@ #include #include #include +#include "fatal_error.h" namespace fs = boost::filesystem; diff --git a/include/fatal_error.h b/include/fatal_error.h new file mode 100644 index 00000000..b9e51670 --- /dev/null +++ b/include/fatal_error.h @@ -0,0 +1,45 @@ +#ifndef PANDORA_FATAL_ERROR_H +#define PANDORA_FATAL_ERROR_H + +#include "backward.hpp" +#include "utils.h" +#include + +template +std::string to_string(const T& element) +{ + std::stringstream ss; + ss << element; + return ss.str(); +} + +// From https://stackoverflow.com/a/21806609 +template +std::string stringer(Args const&... args) +{ + std::string result; + using ::to_string; + using std::to_string; + int unpack[]{0, (result += to_string(args), 0)...}; + static_cast(unpack); + return result; +} + + +class FatalRuntimeError : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + +template +void fatal_error(Args const&... args) { + backward::StackTrace stacktrace; + stacktrace.load_here(32); + backward::Printer stacktrace_printer; + stacktrace_printer.address = true; + stacktrace_printer.object = true; + std::stringstream ss; + stacktrace_printer.print(stacktrace, ss); + throw FatalRuntimeError(stringer("[FATAL ERROR]: ", args..., "\nAborting...\n\nDEBUG info (for developers, provide this if opening an issue):\n", ss.str())); +} + +#endif // PANDORA_FATAL_ERROR_H diff --git a/include/kmergraph.h b/include/kmergraph.h index fc9b85fc..710b4248 100644 --- a/include/kmergraph.h +++ b/include/kmergraph.h @@ -11,6 +11,7 @@ class LocalPRG; #include "prg/path.h" #include "kmernode.h" #include "pangenome/ns.cpp" +#include "fatal_error.h" namespace fs = boost::filesystem; diff --git a/include/kmergraphwithcoverage.h b/include/kmergraphwithcoverage.h index d56b80a5..39a8fc7d 100644 --- a/include/kmergraphwithcoverage.h +++ b/include/kmergraphwithcoverage.h @@ -13,6 +13,7 @@ class LocalPRG; #include "kmergraph.h" #include "pangenome/ns.cpp" #include "utils.h" +#include "fatal_error.h" /** * Represents an annotated KmerGraph, where the annotation is only the coverage on the diff --git a/include/localgraph.h b/include/localgraph.h index 50d11721..3982fb35 100644 --- a/include/localgraph.h +++ b/include/localgraph.h @@ -10,6 +10,13 @@ #include "prg/path.h" #include "localnode.h" #include "IITree.h" +#include +#include +#include +#include +#include "utils.h" +#include "fatal_error.h" + class LocalGraph { public: diff --git a/include/utils.h b/include/utils.h index 516e96d5..d79ef621 100644 --- a/include/utils.h +++ b/include/utils.h @@ -12,6 +12,8 @@ #include "minihits.h" #include "pangenome/ns.cpp" #include +#include +#include "fatal_error.h" namespace fs = boost::filesystem; @@ -102,26 +104,6 @@ uint32_t pangraph_from_read_file(const std::string&, std::shared_ptr>&, PanNode*, uint32_t, float); -class FatalError { -public: - FatalError(){ - BOOST_LOG_TRIVIAL(error) << "[FATAL ERROR] "; - } - - virtual ~FatalError() { - BOOST_LOG_TRIVIAL(error) << std::endl << std::endl; - std::exit(EXIT_FAILURE); - } - - template - FatalError& operator<<(const T &element) { - BOOST_LOG_TRIVIAL(error) << element; - return *this; - } -}; - -void fatal_error(const std::string& message); - // TODO : refactor all file open and closing to use these functions void open_file_for_reading(const std::string& file_path, std::ifstream& stream); void open_file_for_writing(const std::string& file_path, std::ofstream& stream); diff --git a/src/compare_main.cpp b/src/compare_main.cpp index 33eec1b4..2f66692a 100644 --- a/src/compare_main.cpp +++ b/src/compare_main.cpp @@ -408,11 +408,10 @@ int pandora_compare(CompareOptions& opt) const auto& prg_id = pangraph_node.prg_id; - bool valid_prg_id = prgs.size() > prg_id; + const bool valid_prg_id = prgs.size() > prg_id; if (!valid_prg_id) { - FatalError() << "[Error on PanRG] A PRG has an invalid ID (" << prg_id - << "), >= than the number of PRGs (" << prgs.size() - << ") in the PanRG"; + fatal_error("Error reading PanRG: a PRG has an invalid ID (", prg_id, "), >= than the number of PRGs (", + prgs.size(), ") in the PanRG"); } const auto& prg_ptr = prgs[prg_id]; diff --git a/src/estimate_parameters.cpp b/src/estimate_parameters.cpp index d04c84eb..72fc194e 100644 --- a/src/estimate_parameters.cpp +++ b/src/estimate_parameters.cpp @@ -46,11 +46,10 @@ double fit_variance_covg(const std::vector& kmer_covg_dist, double& me void fit_negative_binomial(double& mean, double& variance, float& p, float& r) { - - bool negative_binomial_parameters_are_ok = mean > 0 and variance > 0 and mean < variance; + const bool negative_binomial_parameters_are_ok = mean > 0 and variance > 0 and mean < variance; if (!negative_binomial_parameters_are_ok) { - FatalError() << "Negative binomial parameters are invalid " - << "(mean is " << mean << ", variance is " << variance << ")"; + fatal_error("Negative binomial parameters are invalid: mean is ", mean, + ", variance is ", variance); } p = mean / variance; r = (mean * p / (1 - p) + variance * p * p / (1 - p)) / 2; diff --git a/src/fastaq.cpp b/src/fastaq.cpp index 493778d9..4eab3183 100644 --- a/src/fastaq.cpp +++ b/src/fastaq.cpp @@ -49,8 +49,15 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, const std::vector& covgs, const uint_least16_t global_covg, const std::string header) { - assert(name != ""); - assert(covgs.size() == sequence.length()); + bool fasta_entry_has_a_name = name.length() > 0; + if (!fasta_entry_has_a_name) { + fatal_error("Error adding entry to Fasta/q file: empty names are invalid"); + } + bool quality_string_has_same_length_as_sequence = covgs.size() == sequence.length(); + if (!quality_string_has_same_length_as_sequence) { + fatal_error("Error adding entry to Fasta/q file: sequence and quality string have different lengths"); + } + auto mod_global_covg = global_covg; if (global_covg < 1) { mod_global_covg = 1; @@ -74,7 +81,10 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, void Fastaq::add_entry( const std::string& name, const std::string& sequence, const std::string header) { - assert(name != ""); + bool fasta_entry_has_a_name = name.length() > 0; + if (!fasta_entry_has_a_name) { + fatal_error("Error adding entry to Fasta/q file: empty names are invalid"); + } names.push_back(name); headers[name] = header; diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index b13aabdf..bc3d632e 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -100,8 +100,7 @@ KmerNodePtr KmerGraph::add_node(const prg::Path& p) const bool path_is_valid = k == 0 or p.length() == 0 or p.length() == k; if (!path_is_valid) { - FatalError() << "In KmerGraph::add_node(), the node path is not valid (k is " << k - << ", p.length() is " << p.length(); + fatal_error("Error adding node to Kmer Graph: the node path is not valid (k is ", k, ", p.length() is ", p.length()); } if (k == 0 and p.length() > 0) { k = p.length(); @@ -131,19 +130,19 @@ void KmerGraph::add_edge(KmerNodePtr from, KmerNodePtr to) { const bool from_node_is_valid = from->id < nodes.size() and nodes[from->id] == from; if (!from_node_is_valid) { - FatalError() << "In KmerGraph::add_edge(), from node is invalid"; + fatal_error("Error adding edge to Kmer Graph: from node is invalid: ", from->id); } const bool to_node_is_valid = to->id < nodes.size() and nodes[to->id] == to; if (!to_node_is_valid) { - FatalError() << "In KmerGraph::add_edge(), to node is invalid"; + fatal_error("Error adding edge to Kmer Graph: to node is invalid: ", to->id); } - bool path_order_is_valid = from->path < to->path; + const bool path_order_is_valid = from->path < to->path; if (!path_order_is_valid) { - FatalError() << "In KmerGraph::add_edge(), cannot add edge from " << from->id - << " to " << to->id << " because " << from->path - << " is not less than " << to->path << " (path order is invalid)"; + fatal_error("Error adding edge to Kmer Graph: cannot add edge from ", from->id, + " to ", to->id, " because ", from->path, + " is not less than ", to->path, " (path order is invalid)"); } if (from->find_node_ptr_in_out_nodes(to) == from->out_nodes.end()) { @@ -203,32 +202,31 @@ void KmerGraph::check() const { // should not have any leaves, only nodes with degree 0 are start and end for (auto c = sorted_nodes.begin(); c != sorted_nodes.end(); ++c) { - bool is_start_node = (*c) == (*sorted_nodes.begin()); - bool is_end_node = (*c) == *(sorted_nodes.rbegin()); - bool indegree_zero = (*c)->in_nodes.empty(); - bool outdegree_zero = (*c)->out_nodes.empty(); + const bool is_start_node = (*c) == (*sorted_nodes.begin()); + const bool is_end_node = (*c) == *(sorted_nodes.rbegin()); + const bool indegree_zero = (*c)->in_nodes.empty(); + const bool outdegree_zero = (*c)->out_nodes.empty(); if (indegree_zero and !is_start_node) { - FatalError() << "In KmerGraph::check(), node " << **c << "has indegree 0 and is not a start node"; + fatal_error("Error checking Kmer Graph: node ", **c, "has indegree 0 and is not a start node"); } if (outdegree_zero and !is_end_node) { - FatalError() << "In KmerGraph::check(), node " << **c << "has outdegree 0 and is not an end node"; + fatal_error("Error checking Kmer Graph: node ", **c, "has outdegree 0 and is not an end node"); } for (const auto& d : (*c)->out_nodes) { auto dAsSharedPtr = d.lock(); - bool c_path_is_less_than_neighbours_path = (*c)->path < dAsSharedPtr->path; + const bool c_path_is_less_than_neighbours_path = (*c)->path < dAsSharedPtr->path; if (!c_path_is_less_than_neighbours_path) { - FatalError() << "In KmerGraph::check(), path " << (*c)->path - << " is not less than path " << dAsSharedPtr->path - << " (invalid neighbour path order)"; + fatal_error("Error checking Kmer Graph: path ", (*c)->path, + " is not less than path ", dAsSharedPtr->path, " (invalid neighbour path order)"); } - bool neighbour_is_later_in_topological_order = + const bool neighbour_is_later_in_topological_order = find(c, sorted_nodes.end(), dAsSharedPtr) != sorted_nodes.end(); if (!neighbour_is_later_in_topological_order) { - FatalError() << "In KmerGraph::check(), node " << dAsSharedPtr->id - << " does not occur later in sorted list than node " << (*c)->id - << ", but it should due to the topological order"; + fatal_error("Error checking Kmer Graph: node ", dAsSharedPtr->id, + " does not occur later in sorted list than node ", (*c)->id, + ", but it should due to the topological order"); } } } @@ -302,10 +300,9 @@ void KmerGraph::load(const fs::path& filepath) if (line[0] == 'S') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 4; + const bool line_is_consistent = split_line.size() >= 4; if (!line_is_consistent) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } id = std::stoi(split_line[1]); @@ -322,10 +319,9 @@ void KmerGraph::load(const fs::path& filepath) if (line[0] == 'S') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 4; + const bool line_is_consistent = split_line.size() >= 4; if (!line_is_consistent) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } id = stoi(split_line[1]); @@ -333,9 +329,9 @@ void KmerGraph::load(const fs::path& filepath) char c = ss.peek(); if (!isdigit(c)) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\": " - << "Cannot read in this sort of kmergraph GFA as it " - << "does not label nodes with their PRG path"; + fatal_error("Error reading GFA: cannot read in this sort of kmergraph GFA as it ", + "does not label nodes with their PRG path. ", + "Offending line: ", line); } ss >> p; @@ -343,12 +339,11 @@ void KmerGraph::load(const fs::path& filepath) KmerNodePtr kmer_node = std::make_shared(id, p); - bool id_is_consistent = (id == nodes.size() or num_nodes - id == nodes.size()); + const bool id_is_consistent = (id == nodes.size() or num_nodes - id == nodes.size()); if (!id_is_consistent) { - FatalError() << "In KmerGraph::load(), id is inconsistent." - << "id = " << id << ", " - << "nodes.size() = " << nodes.size() << ", " - << "num_nodes = " << num_nodes; + fatal_error("Error reading GFA: node ID is inconsistent.", + "id = ", id, ", ", "nodes.size() = ", nodes.size(), ", ", + "num_nodes = ", num_nodes); } nodes.push_back(kmer_node); @@ -362,26 +357,24 @@ void KmerGraph::load(const fs::path& filepath) } else if (line[0] == 'L') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 5; + const bool line_is_consistent = split_line.size() >= 5; if (!line_is_consistent) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } - int from_node = stoi(split_line[1]); - int to_node = stoi(split_line[3]); - - bool from_node_in_range = from_node < (int)outnode_counts.size(); - bool to_node_in_range = to_node < (int)innode_counts.size(); + const int from_node = stoi(split_line[1]); + const int to_node = stoi(split_line[3]); + const bool from_node_in_range = from_node < (int)outnode_counts.size(); + const bool to_node_in_range = to_node < (int)innode_counts.size(); if (!from_node_in_range) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\": " - << "from_node out of range: " - << from_node << ">=" << outnode_counts.size(); + fatal_error("Error reading GFA: from_node out of range: " + , from_node, ">=", outnode_counts.size(), + ". Offending line: ", line); } if (!to_node_in_range) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\": " - << "to_node out of range: " - << to_node << ">=" << innode_counts.size(); + fatal_error("Error reading GFA: to_node out of range: " + , to_node, ">=", innode_counts.size(), + ". Offending line: ", line); } outnode_counts[stoi(split_line[1])] += 1; @@ -395,9 +388,9 @@ void KmerGraph::load(const fs::path& filepath) id = 0; for (const auto& n : nodes) { - bool id_is_consistent = (nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); + const bool id_is_consistent = (nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); if (!id_is_consistent) { - FatalError() << "In KmerGraph::load(), Node: " << n << " has inconsistent id, should be " << id; + fatal_error("Error reading GFA: node: ", n, " has inconsistent id, should be ", id); } id++; n->out_nodes.reserve(outnode_counts[n->id]); @@ -411,10 +404,9 @@ void KmerGraph::load(const fs::path& filepath) if (line[0] == 'L') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 5; + const bool line_is_consistent = split_line.size() >= 5; if (!line_is_consistent) { - FatalError() << "In KmerGraph::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } if (split_line[2] == split_line[4]) { @@ -429,8 +421,7 @@ void KmerGraph::load(const fs::path& filepath) } } } else { - BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath; - exit(1); + fatal_error("Error reading GFA: unable to open kmergraph file", filepath); } } diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp index 8dfdc0be..2cb1a02b 100644 --- a/src/kmergraphwithcoverage.cpp +++ b/src/kmergraphwithcoverage.cpp @@ -9,15 +9,13 @@ #include "kmergraphwithcoverage.h" #include "localPRG.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace prg; void KmerGraphWithCoverage::set_exp_depth_covg(const uint32_t edp) { - bool exp_depth_covg_parameter_is_valid = edp > 0; + const bool exp_depth_covg_parameter_is_valid = edp > 0; if (!exp_depth_covg_parameter_is_valid) { - FatalError() << "In KmerGraphWithCoverage::set_exp_depth_covg(): exp_depth_covg is invalid, must be > 0, is " << edp; + fatal_error("Error setting exp_depth_covg: exp_depth_covg is invalid, must be > 0, is ", edp); } exp_depth_covg = edp; } @@ -26,11 +24,10 @@ void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate) { BOOST_LOG_TRIVIAL(debug) << "Set p in kmergraph"; - bool valid_parameters_to_set_p = (kmer_prg->k != 0) && (0 < e_rate and e_rate < 1); + const bool valid_parameters_to_set_p = (kmer_prg->k != 0) && (0 < e_rate and e_rate < 1); if (!valid_parameters_to_set_p) { - FatalError() << "In KmerGraphWithCoverage::set_binomial_parameter_p(): " - << "Parameters to set p are not valid " - << "(kmer_prg->k = " << kmer_prg->k << ", e_rate = " << e_rate << ")"; + fatal_error("Error setting binomial parameter p, invalid parameters: " + , "kmer_prg->k = ", kmer_prg->k, ", e_rate = ", e_rate); } binomial_parameter_p = 1 / exp(e_rate * kmer_prg->k); @@ -39,10 +36,9 @@ void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate) void KmerGraphWithCoverage::increment_covg( uint32_t node_id, pandora::Strand strand, uint32_t sample_id) { - bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; + const bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; if (!sample_is_valid) { - FatalError() << "In KmerGraphWithCoverage::increment_covg(), sample_id is invalid (" - << sample_id << ")"; + fatal_error("Error incrementing coverage: sample_id is invalid (", sample_id, ")"); } // get a pointer to the value we want to increment @@ -79,10 +75,9 @@ uint32_t KmerGraphWithCoverage::get_covg( void KmerGraphWithCoverage::set_covg( uint32_t node_id, uint16_t value, pandora::Strand strand, uint32_t sample_id) { - bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; + const bool sample_is_valid = this->node_index_to_sample_coverage[node_id].size() > sample_id; if (!sample_is_valid) { - FatalError() << "In KmerGraphWithCoverage::set_covg(), sample_id is invalid (" - << sample_id << ")"; + fatal_error("Error setting coverage: sample_id is invalid (", sample_id, ")"); } if (strand == pandora::Strand::Forward) { @@ -98,14 +93,13 @@ void KmerGraphWithCoverage::set_negative_binomial_parameters( if (nbin_prob == 0 and nb_fail == 0) return; - bool negative_binomial_parameters_were_previously_set = + const bool negative_binomial_parameters_were_previously_set = (negative_binomial_parameter_p > 0 and negative_binomial_parameter_p < 1) && (negative_binomial_parameter_r > 0); if (!(negative_binomial_parameters_were_previously_set)) { - FatalError() << "In KmerGraphWithCoverage::set_negative_binomial_parameters(): " - << "negative_binomial_parameter_p (" << negative_binomial_parameter_p << ")" - << " or negative_binomial_parameter_r (" << negative_binomial_parameter_r << ") " - << "were not correctly set"; + fatal_error("Error setting negative_binomial_parameters: negative_binomial_parameter_p (", negative_binomial_parameter_p, ")" + , " or negative_binomial_parameter_r (", negative_binomial_parameter_r, ") " + , "were not correctly set"); } negative_binomial_parameter_p += nbin_prob; @@ -126,10 +120,9 @@ float KmerGraphWithCoverage::nbin_prob(uint32_t node_id, const uint32_t& sample_ float KmerGraphWithCoverage::lin_prob(uint32_t node_id, const uint32_t& sample_id) { - bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; + const bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; if (!reads_were_mapped_to_this_kmer_graph) { - FatalError() << "In KmerGraphWithCoverage::lin_prob(): impossible to compute " - << "lin_prob, no reads were mapped to this kmer graph"; + fatal_error("Impossible to compute lin_prob, no reads were mapped to this kmer graph"); } auto k = this->get_forward_covg(node_id, sample_id) + this->get_reverse_covg(node_id, sample_id); @@ -138,10 +131,9 @@ float KmerGraphWithCoverage::lin_prob(uint32_t node_id, const uint32_t& sample_i float KmerGraphWithCoverage::bin_prob(uint32_t node_id, const uint32_t& sample_id) { - bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; + const bool reads_were_mapped_to_this_kmer_graph = num_reads != 0; if (!reads_were_mapped_to_this_kmer_graph) { - FatalError() << "In KmerGraphWithCoverage::bin_prob(): impossible to compute " - << "bin_prob, no reads were mapped to this kmer graph"; + fatal_error("Impossible to compute bin_prob, no reads were mapped to this kmer graph"); } return bin_prob(node_id, num_reads, sample_id); } @@ -149,17 +141,14 @@ float KmerGraphWithCoverage::bin_prob(uint32_t node_id, const uint32_t& sample_i float KmerGraphWithCoverage::bin_prob( const uint32_t& node_id, const uint32_t& num, const uint32_t& sample_id) { - bool binomial_parameter_p_is_set_correctly = binomial_parameter_p != 1; + const bool binomial_parameter_p_is_set_correctly = binomial_parameter_p != 1; if (!binomial_parameter_p_is_set_correctly) { - FatalError() << "In KmerGraphWithCoverage::bin_prob(): " - << "binomial_parameter_p (" << binomial_parameter_p << ")" - << " is not correctly set"; + fatal_error("Error when computing bin_prob: binomial_parameter_p (", binomial_parameter_p, ") is not correctly set"); } - bool node_exists = node_id < kmer_prg->nodes.size(); + const bool node_exists = node_id < kmer_prg->nodes.size(); if (!node_exists) { - FatalError() << "In KmerGraphWithCoverage::bin_prob(): " - << "attempt to access inexistent node" << node_id; + fatal_error("Error when computing bin_prob: attempt to access inexistent node ", node_id); } uint32_t sum_coverages = this->get_forward_covg(node_id, sample_id) @@ -192,19 +181,18 @@ float KmerGraphWithCoverage::get_prob( // is there no parameter check here? return nbin_prob(node_id, sample_id); } else if (prob_model == "bin") { - bool binomial_parameters_are_ok = (binomial_parameter_p < 1) && (num_reads > 0); + const bool binomial_parameters_are_ok = (binomial_parameter_p < 1) && (num_reads > 0); if (!binomial_parameters_are_ok) { - FatalError() << "In KmerGraphWithCoverage::get_prob(): binomial parameters " - << "are not ok (binomial_parameter_p = " << binomial_parameter_p << ", " - << "num_reads = " << num_reads; + fatal_error("Error when computing kmer prob: binomial parameters are not ok (binomial_parameter_p = ", binomial_parameter_p, ", " + , "num_reads = ", num_reads); } return bin_prob(node_id, sample_id); } else if (prob_model == "lin") { // is there no parameter check here? return lin_prob(node_id, sample_id); } else { - FatalError() << "Invalid probability model for kmer coverage distribution: " - << "should be nbin, bin or lin"; + fatal_error("Invalid probability model for kmer coverage distribution: ", prob_model, + ". Should be nbin, bin or lin"); } } @@ -318,9 +306,9 @@ float KmerGraphWithCoverage::find_max_path(std::vector& maxpath, } } - bool path_was_found_through_the_kmer_PRG = length_of_maxpath_from_node[0] > 0; + const bool path_was_found_through_the_kmer_PRG = length_of_maxpath_from_node[0] > 0; if (!path_was_found_through_the_kmer_PRG) { - FatalError() << "In KmerGraphWithCoverage::find_max_path(), found no path through kmer prg"; + fatal_error("Error when finding max path: found no path through kmer prg"); } return prob_path(maxpath, sample_id, prob_model); @@ -458,10 +446,9 @@ void KmerGraphWithCoverage::load(const std::string& filepath) if (line[0] == 'S') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 4; + const bool line_is_consistent = split_line.size() >= 4; if (!line_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } id = std::stoi(split_line[1]); @@ -478,10 +465,9 @@ void KmerGraphWithCoverage::load(const std::string& filepath) if (line[0] == 'S') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 4; + const bool line_is_consistent = split_line.size() >= 4; if (!line_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } id = stoi(split_line[1]); @@ -489,9 +475,9 @@ void KmerGraphWithCoverage::load(const std::string& filepath) char c = ss.peek(); if (!isdigit(c)) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " - << "Cannot read in this sort of kmergraph GFA as it " - << "does not label nodes with their PRG path"; + fatal_error("Error reading GFA: cannot read in this sort of kmergraph GFA as it ", + "does not label nodes with their PRG path. ", + "Offending line: ", line); } ss >> p; @@ -499,12 +485,11 @@ void KmerGraphWithCoverage::load(const std::string& filepath) // add_node(p); KmerNodePtr n = std::make_shared(id, p); - bool id_is_consistent = (id == kmer_prg->nodes.size() or num_nodes - id == kmer_prg->nodes.size()); + const bool id_is_consistent = (id == kmer_prg->nodes.size() or num_nodes - id == kmer_prg->nodes.size()); if (!id_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), id is inconsistent." - << "id = " << id << ", " - << "nodes.size() = " << kmer_prg->nodes.size() << ", " - << "num_nodes = " << num_nodes; + fatal_error("Error reading GFA: node ID is inconsistent.", + "id = ", id, ", ", "nodes.size() = ", kmer_prg->nodes.size(), ", ", + "num_nodes = ", num_nodes); } kmer_prg->nodes.push_back(n); @@ -522,26 +507,24 @@ void KmerGraphWithCoverage::load(const std::string& filepath) } else if (line[0] == 'L') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 5; + const bool line_is_consistent = split_line.size() >= 5; if (!line_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } - int from_node = stoi(split_line[1]); - int to_node = stoi(split_line[3]); - - bool from_node_in_range = from_node < (int)outnode_counts.size(); - bool to_node_in_range = to_node < (int)innode_counts.size(); + const int from_node = stoi(split_line[1]); + const int to_node = stoi(split_line[3]); + const bool from_node_in_range = from_node < (int)outnode_counts.size(); + const bool to_node_in_range = to_node < (int)innode_counts.size(); if (!from_node_in_range) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " - << "from_node out of range: " - << from_node << ">=" << outnode_counts.size(); + fatal_error("Error reading GFA: from_node out of range: " + , from_node, ">=", outnode_counts.size(), + ". Offending line: ", line); } if (!to_node_in_range) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\": " - << "to_node out of range: " - << to_node << ">=" << innode_counts.size(); + fatal_error("Error reading GFA: to_node out of range: " + , to_node, ">=", innode_counts.size(), + ". Offending line: ", line); } outnode_counts[stoi(split_line[1])] += 1; @@ -555,9 +538,9 @@ void KmerGraphWithCoverage::load(const std::string& filepath) id = 0; for (const auto& n : kmer_prg->nodes) { - bool id_is_consistent = (kmer_prg->nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); + const bool id_is_consistent = (kmer_prg->nodes[id]->id == id) && (n->id < outnode_counts.size()) && (n->id < innode_counts.size()); if (!id_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), Node: " << n << " has inconsistent id, should be " << id; + fatal_error("Error reading GFA: node: ", n, " has inconsistent id, should be ", id); } id++; n->out_nodes.reserve(outnode_counts[n->id]); @@ -571,10 +554,9 @@ void KmerGraphWithCoverage::load(const std::string& filepath) if (line[0] == 'L') { split_line = split(line, "\t"); - bool line_is_consistent = split_line.size() >= 5; + const bool line_is_consistent = split_line.size() >= 5; if (!line_is_consistent) { - FatalError() << "In KmerGraphWithCoverage::load(), line \"" << line << "\" " - << "is inconsistent"; + fatal_error("Error reading GFA. Offending line: ", line); } if (split_line[2] == split_line[4]) { @@ -589,7 +571,7 @@ void KmerGraphWithCoverage::load(const std::string& filepath) } } } else { - BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath; + fatal_error("Error reading GFA: unable to open kmergraph file", filepath); exit(1); } } \ No newline at end of file diff --git a/src/localgraph.cpp b/src/localgraph.cpp index 2ce6de6f..3a895fc1 100644 --- a/src/localgraph.cpp +++ b/src/localgraph.cpp @@ -1,11 +1,4 @@ -#include -#include -#include - -#include - #include "localgraph.h" -#include "utils.h" #define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) @@ -16,9 +9,11 @@ LocalGraph::~LocalGraph() { nodes.clear(); } void LocalGraph::add_node( const uint32_t& id, const std::string& seq, const Interval& pos) { - assert(seq.length() == pos.length); - assert(id < std::numeric_limits::max() - || assert_msg("WARNING, reached max local graph node size")); + const bool sequence_and_interval_length_match = seq.length() == pos.length; + if (!sequence_and_interval_length_match) { + fatal_error("Error adding node to Local Graph: sequence and interval length do not match"); + } + auto it = nodes.find(id); if (it == nodes.end()) { LocalNodePtr n(std::make_shared(seq, pos, id)); @@ -30,7 +25,11 @@ void LocalGraph::add_node( intervalTree.add(pos.start, pos.get_end(), n); startIndexOfAllIntervals[pos.start] = n; } else { - assert((it->second->seq == seq) && (it->second->pos == pos)); + bool node_with_same_id_seq_and_pos_already_added = (it->second->seq == seq) && (it->second->pos == pos); + if (!node_with_same_id_seq_and_pos_already_added) { + fatal_error("Error adding node to Local Graph: node with ID ", id, + " already exists in graph, but with different sequence or pos"); + } } } diff --git a/src/prg/path.cpp b/src/prg/path.cpp index bedadaa5..39f82bb1 100644 --- a/src/prg/path.cpp +++ b/src/prg/path.cpp @@ -62,7 +62,7 @@ std::vector prg::Path::nodes_along_path(const LocalPRG& localPrg) // redudant call, return the memoized local node path return memoizedLocalNodePath; } else { - FatalError() << "Bug on prg::Path::nodes_along_path()"; + fatal_error("Bug on prg::Path::nodes_along_path()"); } } diff --git a/src/utils.cpp b/src/utils.cpp index 1c698ad6..9ef569ed 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -535,7 +535,7 @@ void open_file_for_reading(const std::string& file_path, std::ifstream& stream) { stream.open(file_path); if (!stream.is_open()) { - FatalError() << "Error opening file " << file_path; + fatal_error("Error opening file ", file_path); } } @@ -543,7 +543,7 @@ void open_file_for_writing(const std::string& file_path, std::ofstream& stream) { stream.open(file_path); if (!stream.is_open()) { - FatalError() << "Error opening file " << file_path; + fatal_error("Error opening file ", file_path); } } diff --git a/test/estimate_parameters_test.cpp b/test/estimate_parameters_test.cpp index 57235846..e2c4ddd0 100644 --- a/test/estimate_parameters_test.cpp +++ b/test/estimate_parameters_test.cpp @@ -1,9 +1,9 @@ #include -#include #include #include "gtest/gtest.h" #include "pangenome/pangraph.h" #include "estimate_parameters.h" +#include "test_helpers.h" using namespace std; @@ -61,32 +61,36 @@ TEST(EstimateParameters_FitVarianceCovg, SimpleVar_ZeroThreshTwo) EXPECT_NEAR(5.529475392, fit_variance_covg(v, mean, 2), 0.000001); } -TEST(EstimateParameters_FitNegativeBinomial, MeanZero_Death) +TEST(EstimateParameters_FitNegativeBinomial, MeanZero_FatalRuntimeError) { double mean = 0, variance = 1; float p, r; - EXPECT_DEATH(fit_negative_binomial(mean, variance, p, r), ""); + ASSERT_EXCEPTION(fit_negative_binomial(mean, variance, p, r), FatalRuntimeError, + "Negative binomial parameters are invalid"); } -TEST(EstimateParameters_FitNegativeBinomial, VarianceZero_Death) +TEST(EstimateParameters_FitNegativeBinomial, VarianceZero_FatalRuntimeError) { double mean = 1, variance = 0; float p, r; - EXPECT_DEATH(fit_negative_binomial(mean, variance, p, r), ""); + ASSERT_EXCEPTION(fit_negative_binomial(mean, variance, p, r), FatalRuntimeError, + "Negative binomial parameters are invalid"); } -TEST(EstimateParameters_FitNegativeBinomial, MeanVarianceEqual_Death) +TEST(EstimateParameters_FitNegativeBinomial, MeanVarianceEqual_FatalRuntimeError) { double mean = 1, variance = 1; float p, r; - EXPECT_DEATH(fit_negative_binomial(mean, variance, p, r), ""); + ASSERT_EXCEPTION(fit_negative_binomial(mean, variance, p, r), FatalRuntimeError, + "Negative binomial parameters are invalid"); } -TEST(EstimateParameters_FitNegativeBinomial, MeanGreaterThanVariance_Death) +TEST(EstimateParameters_FitNegativeBinomial, MeanGreaterThanVariance_FatalRuntimeError) { double mean = 2, variance = 1; float p, r; - EXPECT_DEATH(fit_negative_binomial(mean, variance, p, r), ""); + ASSERT_EXCEPTION(fit_negative_binomial(mean, variance, p, r), FatalRuntimeError, + "Negative binomial parameters are invalid"); } TEST(EstimateParameters_FitNegativeBinomial, SimpleFit) diff --git a/test/fastaq_test.cpp b/test/fastaq_test.cpp index c69a9dd1..bec09357 100644 --- a/test/fastaq_test.cpp +++ b/test/fastaq_test.cpp @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include "fastaq.h" #include +#include "test_helpers.h" using namespace std; @@ -137,13 +138,15 @@ TEST(AltCovgToScore, CrazyHighCovg_ReturnLastPrintableAscii) EXPECT_EQ(result, expected); } -TEST(FastaqTest, add_entry_catch_asserts) +TEST(FastaqTest, add_entry_FatalRuntimeError) { Fastaq f; - EXPECT_DEATH(f.add_entry("", "ACGT", { 0, 1, 2, 3 }, 40), ""); - EXPECT_DEATH(f.add_entry("dummy", "ACGT", { 0, 1, 2 }, 40), ""); - EXPECT_DEATH(f.add_entry("dummy", "ACG", { 0, 1, 2, 3 }, 40), ""); - // EXPECT_DEATH(f.add_entry("dummy", "ACGT", {0, 1, 2, 3}, 0), ""); + ASSERT_EXCEPTION(f.add_entry("", "ACGT", { 0, 1, 2, 3 }, 40), FatalRuntimeError, + "Error adding entry to Fasta/q file"); + ASSERT_EXCEPTION(f.add_entry("dummy", "ACGT", { 0, 1, 2 }, 40), FatalRuntimeError, + "Error adding entry to Fasta/q file"); + ASSERT_EXCEPTION(f.add_entry("dummy", "ACG", { 0, 1, 2, 3 }, 40), FatalRuntimeError, + "Error adding entry to Fasta/q file"); } TEST(FastaqTest, add_entry_works) diff --git a/test/kmergraph_test.cpp b/test/kmergraph_test.cpp index 746d1731..3b31f520 100644 --- a/test/kmergraph_test.cpp +++ b/test/kmergraph_test.cpp @@ -9,6 +9,8 @@ #include #include #include +#include "fatal_error.h" +#include "test_helpers.h" using namespace prg; @@ -442,8 +444,9 @@ TEST(KmerGraphTest, load) EXPECT_EQ(kg, read_kg); } -TEST(KmerGraphTest, load_prg) +TEST(KmerGraphTest, load_prg_FatalRuntimeError) { KmerGraph read_kg; - EXPECT_DEATH(read_kg.load("kmergraph_test.gfa"), ""); + ASSERT_EXCEPTION(read_kg.load("kmergraph_test.gfa"), FatalRuntimeError, + "Error reading GFA"); } diff --git a/test/kmergraphwithcoverage_test.cpp b/test/kmergraphwithcoverage_test.cpp index 4bef2ee5..1d7d544b 100644 --- a/test/kmergraphwithcoverage_test.cpp +++ b/test/kmergraphwithcoverage_test.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "test_helpers.h" using namespace prg; @@ -240,10 +241,13 @@ TEST(KmerGraphWithCoverageTest, set_p) { KmerGraph kmergraph; KmerGraphWithCoverage kmergraph_with_coverage(&kmergraph); - EXPECT_DEATH(kmergraph_with_coverage.set_binomial_parameter_p(0.4), ""); + ASSERT_EXCEPTION(kmergraph_with_coverage.set_binomial_parameter_p(0.4), FatalRuntimeError, + "Error setting binomial parameter p, invalid parameters"); kmergraph_with_coverage.kmer_prg->k = 3; - EXPECT_DEATH(kmergraph_with_coverage.set_binomial_parameter_p(0), ""); - EXPECT_DEATH(kmergraph_with_coverage.set_binomial_parameter_p(1), ""); + ASSERT_EXCEPTION(kmergraph_with_coverage.set_binomial_parameter_p(0), FatalRuntimeError, + "Error setting binomial parameter p, invalid parameters"); + ASSERT_EXCEPTION(kmergraph_with_coverage.set_binomial_parameter_p(1), FatalRuntimeError, + "Error setting binomial parameter p, invalid parameters"); kmergraph_with_coverage.set_binomial_parameter_p(0.5); EXPECT_EQ(1 / exp(1.5) - 0.00001 <= kmergraph_with_coverage.binomial_parameter_p and 1 / exp(1.5) + 0.00001 >= kmergraph_with_coverage.binomial_parameter_p, @@ -264,7 +268,8 @@ TEST(KmerGraphWithCoverageTest, prob_failNoNodes) uint32_t sample_id = 0; KmerGraph kmergraph; KmerGraphWithCoverage kmergraph_with_coverage(&kmergraph); - EXPECT_DEATH(kmergraph_with_coverage.bin_prob(0, sample_id), ""); + ASSERT_EXCEPTION(kmergraph_with_coverage.bin_prob(0, sample_id), + FatalRuntimeError, "Impossible to compute bin_prob, no reads were mapped to this kmer graph"); } TEST(KmerGraphWithCoverageTest, prob_failNoP) @@ -278,7 +283,8 @@ TEST(KmerGraphWithCoverageTest, prob_failNoP) kmergraph.add_node(p); KmerGraphWithCoverage kmergraph_with_coverage(&kmergraph); - EXPECT_DEATH(kmergraph_with_coverage.bin_prob(0, sample_id), ""); + ASSERT_EXCEPTION(kmergraph_with_coverage.bin_prob(0, sample_id), + FatalRuntimeError, "Impossible to compute bin_prob, no reads were mapped to this kmer graph"); } TEST(KmerGraphWithCoverageTest, prob_failNoNumReads) @@ -295,7 +301,8 @@ TEST(KmerGraphWithCoverageTest, prob_failNoNumReads) kmergraph_with_coverage.kmer_prg->k = 3; kmergraph_with_coverage.set_binomial_parameter_p(0.5); - EXPECT_DEATH(kmergraph_with_coverage.bin_prob(0, sample_id), ""); + ASSERT_EXCEPTION(kmergraph_with_coverage.bin_prob(0, sample_id), + FatalRuntimeError, "Impossible to compute bin_prob, no reads were mapped to this kmer graph"); } TEST(KmerGraphWithCoverageTest, prob_simple) @@ -400,9 +407,9 @@ TEST(KmerGraphWithCoverageTest, findMaxPath_InvalidProbModel) vector mp; kmergraph_with_coverage.set_binomial_parameter_p(0.01); - EXPECT_DEATH(kmergraph_with_coverage.find_max_path( - mp, "exp", max_num_kmers_to_average, sample_id), - ""); + ASSERT_EXCEPTION( + kmergraph_with_coverage.find_max_path(mp, "exp", max_num_kmers_to_average, sample_id), + FatalRuntimeError, "Invalid probability model for kmer coverage distribution"); } TEST(KmerGraphWithCoverageTest, findMaxPathSimple) @@ -809,5 +816,6 @@ TEST(KmerGraphWithCoverageTest, load_prg) { KmerGraph kmergraph; KmerGraphWithCoverage read_kmergraph_with_coverage(&kmergraph); - EXPECT_DEATH(read_kmergraph_with_coverage.load("kmergraph_test.gfa"), ""); + ASSERT_EXCEPTION(read_kmergraph_with_coverage.load("kmergraph_test.gfa"), + FatalRuntimeError, "Error reading GFA"); } diff --git a/test/localgraph_test.cpp b/test/localgraph_test.cpp index f74f344b..393cfbd5 100644 --- a/test/localgraph_test.cpp +++ b/test/localgraph_test.cpp @@ -7,6 +7,7 @@ #include "localnode.h" #include #include +#include "test_helpers.h" using namespace std; @@ -23,8 +24,10 @@ TEST(LocalGraphTest, add_node) EXPECT_EQ(ln1, *lg1.nodes[0]); // add impossible nodes and expect and error - EXPECT_DEATH(lg1.add_node(0, "AGGT", Interval(0, 4)), ""); - EXPECT_DEATH(lg1.add_node(1, "AGG", Interval(0, 4)), ""); + ASSERT_EXCEPTION(lg1.add_node(0, "AGGT", Interval(0, 4)), FatalRuntimeError, + "Error adding node to Local Graph"); + ASSERT_EXCEPTION(lg1.add_node(1, "AGG", Interval(0, 4)), FatalRuntimeError, + "Error adding node to Local Graph"); } TEST(LocalGraphTest, add_edge) diff --git a/test/test_helpers.h b/test/test_helpers.h index 18511256..3a86bb44 100644 --- a/test/test_helpers.h +++ b/test/test_helpers.h @@ -2,8 +2,12 @@ #define PANDORA_TEST_HELPERS_H #include +#include +#include #include "vcf.h" +using ::testing::HasSubstr; + template > class CONT_TYPE> bool equal_containers(const CONT_TYPE& lhs, const CONT_TYPE& rhs) @@ -25,4 +29,26 @@ extern GenotypingOptions default_genotyping_options; VCF create_VCF_with_default_parameters(size_t nb_of_samples = 1); + +// Adapted from https://stackoverflow.com/a/39578934 +#define ASSERT_EXCEPTION( TRY_BLOCK, EXCEPTION_TYPE, MESSAGE ) \ +try \ +{ \ + { TRY_BLOCK; } \ + FAIL() << "exception '" << MESSAGE << "' not thrown at all!"; \ +} \ +catch( const EXCEPTION_TYPE& e ) \ +{ \ + EXPECT_THAT(e.what(), HasSubstr(MESSAGE)) \ + << " exception message is incorrect. Expected the following " \ + "message:\n\n" \ + << MESSAGE << "\n"; \ +} \ +catch( ... ) \ +{ \ + FAIL() << "exception '" << MESSAGE \ + << "' not thrown with expected type '" << #EXCEPTION_TYPE \ + << "'!"; \ +} + #endif // PANDORA_TEST_HELPERS_H From a894a7d4fca08caf8fb9fc7ad526ed8a92fd28cb Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Wed, 3 Feb 2021 01:37:06 -0400 Subject: [PATCH 10/37] WIP: refactoring asserts --- include/fatal_error.h | 1 - include/interval.h | 3 + include/kmergraphwithcoverage.h | 5 +- include/localPRG.h | 4 + include/minihit.h | 1 + include/minimizer.h | 1 + include/noise_filtering.h | 2 - include/sampleinfo.h | 15 +- include/utils.h | 1 + src/compare_main.cpp | 7 +- src/de_bruijn/graph.cpp | 27 +-- src/denovo_discovery/denovo_utils.cpp | 2 - src/denovo_discovery/discover_main.cpp | 6 + src/index_main.cpp | 6 + src/interval.cpp | 12 +- src/localPRG.cpp | 177 ++++++++++++------ src/localgraph.cpp | 82 +++++--- src/map_main.cpp | 6 + src/minihit.cpp | 17 +- src/minihits.cpp | 2 - src/minimizer.cpp | 10 +- src/noise_filtering.cpp | 20 +- src/pangenome/pangraph.cpp | 68 ++++--- src/sampleinfo.cpp | 21 ++- src/seq.cpp | 12 +- src/utils.cpp | 26 ++- src/vcf.cpp | 66 ++++--- src/vcfrecord.cpp | 18 +- test/de_bruijn_graph_test.cpp | 4 +- test/interval_test.cpp | 7 +- test/localPRG_test.cpp | 10 +- test/localgraph_test.cpp | 3 +- test/minihit_test.cpp | 4 +- test/minimizer_test.cpp | 9 +- test/noise_filtering_test.cpp | 6 +- test/pangraph_test.cpp | 7 +- test/sampleinfo_test.cpp | 64 ++++--- .../noisefiltering_test.pangraph.gfa | 16 +- test/vcf_test.cpp | 4 +- test/vcfrecord_test.cpp | 38 ++-- 40 files changed, 521 insertions(+), 269 deletions(-) diff --git a/include/fatal_error.h b/include/fatal_error.h index b9e51670..3b8648a3 100644 --- a/include/fatal_error.h +++ b/include/fatal_error.h @@ -2,7 +2,6 @@ #define PANDORA_FATAL_ERROR_H #include "backward.hpp" -#include "utils.h" #include template diff --git a/include/interval.h b/include/interval.h index 92ceef79..e239e47a 100644 --- a/include/interval.h +++ b/include/interval.h @@ -7,6 +7,7 @@ #include #include #include +#include "fatal_error.h" struct Interval { uint32_t start; @@ -30,6 +31,8 @@ struct Interval { bool empty() const; bool is_close(const Interval& other, uint32_t dist = 0) const; + + static bool sorted_interval_vector_has_overlapping_intervals (const std::vector &intervals); }; // Merge intervals within dist of each other. Changes the vector inplace. diff --git a/include/kmergraphwithcoverage.h b/include/kmergraphwithcoverage.h index 39a8fc7d..cf12b022 100644 --- a/include/kmergraphwithcoverage.h +++ b/include/kmergraphwithcoverage.h @@ -60,7 +60,10 @@ class KmerGraphWithCoverage { , num_reads { 0 } , kmer_prg { kmer_prg } { - assert(kmer_prg != nullptr); + const bool kmer_prg_is_invalid = kmer_prg == nullptr; + if(kmer_prg_is_invalid) { + fatal_error("Error building Kmer Graph With Coverage: kmer PRG is invalid"); + } zeroCoverages(); } KmerGraphWithCoverage(const KmerGraphWithCoverage& other) diff --git a/include/localPRG.h b/include/localPRG.h index d233b12a..33dbc8a0 100644 --- a/include/localPRG.h +++ b/include/localPRG.h @@ -31,6 +31,10 @@ class LocalPRG { // works only in a method, not an object variable std::vector nodes_along_path_core(const prg::Path&) const; + static void check_if_vector_of_subintervals_is_consistent_with_envelopping_interval( + const std::vector &subintervals, const Interval& envelopping_interval + ); + public: uint32_t next_site; // denotes the id of the next variant site to be processed - // TODO: maybe this should not be an object variable diff --git a/include/minihit.h b/include/minihit.h index 3027a20e..18ef5c30 100644 --- a/include/minihit.h +++ b/include/minihit.h @@ -5,6 +5,7 @@ #include #include "minimizer.h" #include "minirecord.h" +#include "fatal_error.h" /** * Describes a hit between a read an a minimizer from the PRG diff --git a/include/minimizer.h b/include/minimizer.h index e8eef85f..1ef32885 100644 --- a/include/minimizer.h +++ b/include/minimizer.h @@ -4,6 +4,7 @@ #include #include #include "interval.h" +#include "fatal_error.h" /** * Represents a minimizer from a read or sequence (not from a graph, as MiniRecord) diff --git a/include/noise_filtering.h b/include/noise_filtering.h index 4141c679..11c43883 100644 --- a/include/noise_filtering.h +++ b/include/noise_filtering.h @@ -7,8 +7,6 @@ #include "pangenome/pangraph.h" #include "de_bruijn/graph.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - uint_least32_t node_plus_orientation_to_num(const uint_least32_t, const bool); void num_to_node_plus_orientation(uint_least32_t&, bool&, const uint_least32_t); diff --git a/include/sampleinfo.h b/include/sampleinfo.h index aa44683b..f51391b1 100644 --- a/include/sampleinfo.h +++ b/include/sampleinfo.h @@ -10,6 +10,7 @@ #include "Maths.h" #include "OptionsAggregator.h" #include +#include "fatal_error.h" // TODO: use memoization to speed up everything here // TODO: this class is doing too much. There is the concept of an allele info which can @@ -29,7 +30,9 @@ class SampleInfo { genotyping_options->get_sample_index_to_exp_depth_covg()[sample_index]) { bool at_least_one_allele = number_of_alleles >= 1; - assert(at_least_one_allele); + if (!at_least_one_allele) { + fatal_error("Error on creating VCF Sample INFOs: the VCF record has no alleles"); + } resize_to_the_number_of_alleles(); } @@ -413,7 +416,10 @@ template class SampleIndexToSampleInfoTemplate { SampleIndexToSampleInfoTemplate& other) { bool same_number_of_samples = this->size() == other.size(); - assert(same_number_of_samples); + if(!same_number_of_samples) { + fatal_error("Error solving genotype conflicts between two records: " + "number of samples is not consistent between both records"); + } for (size_t sample_index = 0; sample_index < this->size(); ++sample_index) { (*this)[sample_index].solve_incompatible_gt_conflict_with( @@ -458,7 +464,10 @@ template class SampleIndexToSampleInfoTemplate { const SampleIndexToSampleInfoTemplate& other) { bool same_number_of_samples = this->size() == other.size(); - assert(same_number_of_samples); + if(!same_number_of_samples) { + fatal_error("Error merging two records: " + "number of samples is not consistent between both records"); + } for (size_t sample_index = 0; sample_index < this->size(); ++sample_index) { (*this)[sample_index].merge_other_sample_info_into_this( diff --git a/include/utils.h b/include/utils.h index d79ef621..1866725c 100644 --- a/include/utils.h +++ b/include/utils.h @@ -107,6 +107,7 @@ void infer_most_likely_prg_path_for_pannode( // TODO : refactor all file open and closing to use these functions void open_file_for_reading(const std::string& file_path, std::ifstream& stream); void open_file_for_writing(const std::string& file_path, std::ofstream& stream); +void open_file_for_appending(const std::string& file_path, std::ofstream& stream); std::vector get_vector_of_strings_from_file(const std::string& file_path); diff --git a/src/compare_main.cpp b/src/compare_main.cpp index 2f66692a..4f82945c 100644 --- a/src/compare_main.cpp +++ b/src/compare_main.cpp @@ -233,10 +233,15 @@ int pandora_compare(CompareOptions& opt) opt.max_diff = 2 * opt.kmer_size + 1; } // ========== - if (opt.window_size > opt.kmer_size) { throw std::logic_error("W must NOT be greater than K"); } + if (opt.window_size <= 0) { + throw std::logic_error("W must be a positive integer"); + } + if (opt.kmer_size <= 0) { + throw std::logic_error("K must be a positive integer"); + } if (opt.genotype) { opt.output_vcf = true; diff --git a/src/de_bruijn/graph.cpp b/src/de_bruijn/graph.cpp index 9ca2bd2f..3d7041f8 100644 --- a/src/de_bruijn/graph.cpp +++ b/src/de_bruijn/graph.cpp @@ -11,8 +11,6 @@ #include "de_bruijn/graph.h" #include "noise_filtering.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace debruijn; // Define a debruijn graph with s-mers of genes as nodes @@ -30,7 +28,11 @@ debruijn::Graph::~Graph() { nodes.clear(); } OrientedNodePtr debruijn::Graph::add_node( const std::deque& node_ids, uint32_t read_id) { - assert(node_ids.size() == size); + const bool correct_number_of_nodes_to_add = node_ids.size() == size; + if(!correct_number_of_nodes_to_add) { + fatal_error("Error adding node to de Bruijn Graph: expected node of size ", size, + ", received node of size ", node_ids.size()); + } if (node_hash.find(node_ids) != node_hash.end()) { nodes[node_hash[node_ids]]->read_ids.insert(read_id); @@ -43,7 +45,6 @@ OrientedNodePtr debruijn::Graph::add_node( NodePtr n; n = std::make_shared(next_id, node_ids, read_id); - assert(n != nullptr); nodes[next_id] = n; node_hash[node_ids] = next_id; @@ -52,8 +53,6 @@ OrientedNodePtr debruijn::Graph::add_node( } next_id++; - assert(next_id < std::numeric_limits::max() - || assert_msg("WARNING, reached max de bruijn graph node size")); return make_pair(n, true); } @@ -80,10 +79,15 @@ bool edge_is_valid(OrientedNodePtr from, OrientedNodePtr to) // Add directed edge between from and to void debruijn::Graph::add_edge(OrientedNodePtr from, OrientedNodePtr to) { - assert(from.first != nullptr and to.first != nullptr); - assert(edge_is_valid(from, to) - or assert_msg( - "edge from " << *from.first << " to " << *to.first << " is invalid")); + bool nodes_are_valid = from.first != nullptr and to.first != nullptr; + if(!nodes_are_valid) { + fatal_error("Error adding edge to de Bruijn Graph: from or to node is invalid"); + } + + if (!edge_is_valid(from, to)) { + fatal_error("Error adding edge to de Bruijn Graph: edge from ", *from.first, + " to ", *to.first, " is invalid"); + } if (from.second and from.first->out_nodes.find(to.first->id) == from.first->out_nodes.end()) { @@ -204,10 +208,7 @@ std::set> debruijn::Graph::get_unitigs() for (const auto& node_entry : nodes) { const auto& id = node_entry.first; - assert(id <= next_id); - const auto& node_ptr = node_entry.second; - assert(node_ptr != nullptr); bool node_seen = seen.find(id) != seen.end(); bool at_branch diff --git a/src/denovo_discovery/denovo_utils.cpp b/src/denovo_discovery/denovo_utils.cpp index ecea4258..bc754f6f 100644 --- a/src/denovo_discovery/denovo_utils.cpp +++ b/src/denovo_discovery/denovo_utils.cpp @@ -1,7 +1,5 @@ #include "denovo_discovery/denovo_utils.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - PathComponents find_interval_and_flanks_in_localpath(const Interval& interval, const std::vector& local_node_max_likelihood_path) { diff --git a/src/denovo_discovery/discover_main.cpp b/src/denovo_discovery/discover_main.cpp index c23a05b9..0cfce26a 100644 --- a/src/denovo_discovery/discover_main.cpp +++ b/src/denovo_discovery/discover_main.cpp @@ -214,6 +214,12 @@ int pandora_discover(DiscoverOptions& opt) if (opt.window_size > opt.kmer_size) { throw std::logic_error("W must NOT be greater than K"); } + if (opt.window_size <= 0) { + throw std::logic_error("W must be a positive integer"); + } + if (opt.kmer_size <= 0) { + throw std::logic_error("K must be a positive integer"); + } fs::create_directories(opt.outdir); diff --git a/src/index_main.cpp b/src/index_main.cpp index 21e00d91..5bfa9963 100644 --- a/src/index_main.cpp +++ b/src/index_main.cpp @@ -51,6 +51,12 @@ int pandora_index(IndexOptions const& opt) if (opt.window_size > opt.kmer_size) { throw std::logic_error("W must NOT be greater than K"); } + if (opt.window_size <= 0) { + throw std::logic_error("W must be a positive integer"); + } + if (opt.kmer_size <= 0) { + throw std::logic_error("K must be a positive integer"); + } LocalPRG::do_path_memoization_in_nodes_along_path_method = true; diff --git a/src/interval.cpp b/src/interval.cpp index 2eed4dbf..7fff067e 100644 --- a/src/interval.cpp +++ b/src/interval.cpp @@ -4,7 +4,7 @@ Interval::Interval(uint32_t s, uint32_t e) : start(s) { if (e < start) { - throw std::logic_error("Interval end cannot be less than the interval start"); + fatal_error("Error when building interval: interval end cannot be less than the interval start"); } // intervals need to be exclusive of end so that empty strings can be represented length = e - start; @@ -94,3 +94,13 @@ void merge_intervals_within(std::vector& intervals, const uint32_t dis intervals.resize(prev_idx + 1); } + +bool Interval::sorted_interval_vector_has_overlapping_intervals (const std::vector &intervals) { + for (uint32_t index = 1; index < intervals.size(); ++index) { + bool there_is_overlap = intervals[index - 1].get_end() > intervals[index].start; + if (there_is_overlap) { + return true; + } + } + return false; +} \ No newline at end of file diff --git a/src/localPRG.cpp b/src/localPRG.cpp index aacfa3c9..2603c2d8 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -52,15 +52,21 @@ bool LocalPRG::isalpha_string(const std::string& s) const std::string LocalPRG::string_along_path(const prg::Path& p) const { - assert(p.get_start() <= seq.length()); - assert(p.get_end() <= seq.length()); + const bool path_is_inside_the_PRG = (p.get_start() <= seq.length()) && + (p.get_end() <= seq.length()); + if(!path_is_inside_the_PRG) { + fatal_error("Error getting sequence along PRG path: path is out of range"); + } std::string s; for (const auto& it : p) { s += seq.substr(it.start, it.length); } - assert(s.length() == p.length() - || assert_msg("sequence length " - << s.length() << " is not equal to path length " << p.length())); + + bool sequence_and_path_have_the_same_length = s.length() == p.length(); + if(!sequence_and_path_have_the_same_length) { + fatal_error("Error getting sequence along PRG path: the sequence generated ", + "has a length different from the path"); + } return s; } @@ -129,6 +135,28 @@ std::vector LocalPRG::nodes_along_path_core(const prg::Path& p) co return path_nodes; } +void LocalPRG::check_if_vector_of_subintervals_is_consistent_with_envelopping_interval( + const std::vector &subintervals, const Interval& envelopping_interval +) { + bool invariant_region_starts_at_or_after_given_interval = subintervals[0].start >= envelopping_interval.start; + if (!invariant_region_starts_at_or_after_given_interval) { + fatal_error("When splitting PRG by site, invariant region starts before (", + subintervals[0].start, ") the given interval (", envelopping_interval.start, ")"); + } + + + bool there_is_overlap = Interval::sorted_interval_vector_has_overlapping_intervals(subintervals); + if (there_is_overlap) { + fatal_error("When splitting PRG by site, there are overlapping intervals"); + } + + bool site_ends_before_given_interval = subintervals.back().get_end() <= envelopping_interval.get_end(); + if (!site_ends_before_given_interval) { + fatal_error("When splitting PRG by site, site ends (", + subintervals.back().get_end(), ") after given interval (", envelopping_interval.get_end(), ")"); + } +} + /* Split the interval first into the invariant region coming before it, all its alleles and then the rest of the PRG. * Example: @@ -197,14 +225,7 @@ std::vector LocalPRG::split_by_site(const Interval& i) const v.emplace_back(Interval(k, i.get_end())); } - assert(v[0].start >= i.start); - for (uint32_t l = 1; l != v.size(); ++l) { - assert(v[l - 1].get_end() <= v[l].start - || assert_msg(v[l - 1].get_end() - << ">" << v[l].start << " giving overlapping intervals " << v[l - 1] - << " and " << v[l])); - } - assert(v.back().get_end() <= i.get_end()); + check_if_vector_of_subintervals_is_consistent_with_envelopping_interval(v, i); // then split by var site + 1, I.E. SPLITTING BY THE INTERVALS OF THE ALLELES - THIS // IS WHAT IS RETURNED @@ -251,15 +272,7 @@ std::vector LocalPRG::split_by_site(const Interval& i) const w = x; } - assert(w[0].start >= i.start); - for (uint32_t l = 1; l != w.size(); ++l) { - assert(w[l - 1].get_end() <= w[l].start - || assert_msg(w[l - 1].get_end() - << ">" << w[l].start << " giving overlapping intervals " << w[l - 1] - << " and " << w[l] - << " when splitting seq :" << seq.substr(i.start, i.length))); - } - assert(w.back().get_end() <= i.get_end()); + check_if_vector_of_subintervals_is_consistent_with_envelopping_interval(w, i); return w; } @@ -356,7 +369,10 @@ LocalPRG::build_graph( end_ids = build_graph(v.back(), end_ids, current_level); } if (start_id == 0) { - assert(end_ids.size() == 1); + bool graph_has_a_sink_node = end_ids.size() == 1; + if (!graph_has_a_sink_node) { + fatal_error("Error building local PRG graph from interval: built graph has no sink node"); + } } return end_ids; } @@ -574,7 +590,6 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 // from each previous walk kn = current_leaves.front(); current_leaves.pop_front(); - assert(kn->khash < std::numeric_limits::max()); // find all paths which are this kmer-minimizer shifted by one place along the // graph @@ -594,7 +609,12 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 while (!shifts.empty()) { // goes through all shifted paths v = shifts.front(); // get the first shifted path shifts.pop_front(); - assert(v.back()->length() == k); + + bool shifted_path_has_k_bases = v.back()->length() == k; + if(!shifted_path_has_k_bases) { + fatal_error("Error when minimizing a local PRG: shifted path does not have k (", + k, ") bases"); + } kmer = string_along_path(*(v.back())); kh = hash.kmerhash(kmer, k); if (std::min(kh.first, kh.second) <= kn->khash) { @@ -714,7 +734,11 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 } // create a null end node, and for each end leaf add an edge to this terminus - assert(!end_leaves.empty()); + bool kmer_graph_has_leaves = !end_leaves.empty(); + if (!kmer_graph_has_leaves) { + fatal_error("Error when minimizing a local PRG: kmer graph does not have any leaves"); + } + d = { Interval((--(prg.nodes.end()))->second->pos.get_end(), (--(prg.nodes.end()))->second->pos.get_end()) }; kmer_path.initialize(d); @@ -725,9 +749,11 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 } // print, check and return - assert(num_kmers_added == 0 or kmer_prg.nodes.size() == num_kmers_added - || assert_msg("nodes.size(): " << kmer_prg.nodes.size() - << " and num minikmers: " << num_kmers_added)); + bool number_of_kmers_added_is_consistent = (num_kmers_added == 0) or + (kmer_prg.nodes.size() == num_kmers_added); + if (!number_of_kmers_added_is_consistent) { + fatal_error("Error when minimizing a local PRG: incorrect number of kmers added"); + } kmer_prg.remove_shortcut_edges(); kmer_prg.check(); } @@ -772,7 +798,11 @@ std::vector LocalPRG::kmernode_path_from_localnode_path( } } - assert(!kmernode_path.empty()); + bool kmernode_path_is_empty = kmernode_path.empty(); + if (kmernode_path_is_empty) { + fatal_error("Error when converting local node path to kmer node path: received " + "non-empty local node path and returned an empty kmer node path"); + } return kmernode_path; } @@ -925,18 +955,27 @@ std::vector get_covgs_along_localnode_path(const PanNodePtr pan_node, k = j; for (const auto& interval : kmernode_ptr->path) { - assert(localnode_path[k]->pos.start <= interval.start - and localnode_path[k]->pos.get_end() >= interval.get_end()); + const LocalNodePtr &localnode = localnode_path[k]; + bool local_node_is_inside_kmer_path_interval = + (localnode->pos.start <= interval.start) and + (localnode->pos.get_end() >= interval.get_end()); + + if (!local_node_is_inside_kmer_path_interval) { + fatal_error("Error when getting coverages along local node path: " + "local node path and kmer node path are not consistent"); + } - start = interval.start - localnode_path[k]->pos.start; - end = std::min(start + interval.length, localnode_path[k]->pos.get_end()); + start = interval.start - localnode->pos.start; + end = std::min(start + interval.length, localnode->pos.get_end()); for (uint32_t l = start; l < end; ++l) { - assert(kmernode_ptr->id - < pan_node->kmer_prg_with_coverage.kmer_prg->nodes.size() - and pan_node->kmer_prg_with_coverage.kmer_prg - ->nodes[kmernode_ptr->id] - != nullptr); + bool kmernode_is_valid = + (kmernode_ptr->id < pan_node->kmer_prg_with_coverage.kmer_prg->nodes.size()) and + (pan_node->kmer_prg_with_coverage.kmer_prg->nodes[kmernode_ptr->id] != nullptr); + if (!kmernode_is_valid) { + fatal_error("Error when getting coverages along local node path: " + "kmer node is not valid"); + } coverages_for_each_base_in_localnode_path[k][l] = std::max(coverages_for_each_base_in_localnode_path[k][l], @@ -962,8 +1001,7 @@ void LocalPRG::write_covgs_to_file( const boost::filesystem::path& filepath, const std::vector& covgs) const { std::ofstream handle; - handle.open(filepath.string()); - assert(!handle.fail() or assert_msg("Could not open file " << filepath.string())); + open_file_for_writing(filepath.string(), handle); handle << ">" << name << std::endl; for (const auto& i : covgs) { @@ -978,8 +1016,7 @@ void LocalPRG::write_path_to_fasta(const boost::filesystem::path& filepath, const std::vector& lmp, const float& ppath) const { std::ofstream handle; - handle.open(filepath.string()); - assert(!handle.fail() or assert_msg("Could not open file " << filepath.string())); + open_file_for_writing(filepath.string(), handle); handle << ">" << name << "\tlog P(data|sequence)=" << ppath << std::endl; for (uint32_t j = 0; j != lmp.size(); ++j) { @@ -994,8 +1031,7 @@ void LocalPRG::append_path_to_fasta(const boost::filesystem::path& filepath, const std::vector& lmp, const float& ppath) const { std::ofstream handle; - handle.open(filepath.string(), std::ios::app); - assert(!handle.fail() or assert_msg("Could not open file " << filepath.string())); + open_file_for_appending(filepath.string(), handle); handle << ">" << name << "\tlog P(data|sequence)=" << ppath << std::endl; for (uint32_t j = 0; j != lmp.size(); ++j) { @@ -1010,8 +1046,7 @@ void LocalPRG::write_aligned_path_to_fasta(const boost::filesystem::path& filepa const std::vector& lmp, const float& ppath) const { std::ofstream handle; - handle.open(filepath.string()); - assert(!handle.fail() or assert_msg("Could not open file " << filepath.string())); + open_file_for_writing(filepath.string(), handle); handle << ">" << name << "\tlog P(data|sequence)=" << ppath << std::endl; @@ -1037,7 +1072,11 @@ void LocalPRG::build_vcf_from_reference_path( VCF& vcf, const std::vector& ref) const { BOOST_LOG_TRIVIAL(debug) << "Build VCF for prg " << name; - assert(!prg.nodes.empty()); // otherwise empty nodes -> segfault + + bool prg_is_empty = prg.nodes.empty(); + if (prg_is_empty) { + fatal_error("Error when building VCF from reference path: PRG is empty"); + } std::vector varpath; varpath.reserve(100); @@ -1077,8 +1116,17 @@ void LocalPRG::build_vcf_from_reference_path( } else { // we have come down a level, add the alts compared to this region level -= 1; - assert(level >= 0); - assert(!level_start.empty()); + + bool level_is_valid = level >= 0; + if (!level_is_valid) { + fatal_error("Error when building VCF from reference path: PRG level is negative"); + } + + bool previous_levels_are_empty = level_start.empty(); + if (previous_levels_are_empty) { + fatal_error("Error when building VCF from reference path: PRG or path is inconsistent (a site was closed without opening it)"); + } + // define ref and pos pos = 0; ref_seq = ""; @@ -1139,7 +1187,11 @@ void LocalPRG::build_vcf_from_reference_path( } // add sites to vcf - assert(pos + ref_seq.length() <= ref_length); + bool record_sequence_is_valid = pos + ref_seq.length() <= ref_length; + if (!record_sequence_is_valid) { + fatal_error("Error when building VCF from reference path: record sequence end (", pos + ref_seq.length(), + ") overflows reference length (", ref_length, ")"); + } for (auto& alt : alts) { for (auto& j : alt) { alt_seq += j->seq; @@ -1153,7 +1205,11 @@ void LocalPRG::build_vcf_from_reference_path( level_start.pop_back(); if (level == 0) { - assert(level_start.empty()); + bool all_sites_were_closed = level_start.empty(); + if (!all_sites_were_closed) { + fatal_error("Error when building VCF from reference path: PRG or path is inconsistent (reached level 0 without closing all sites)"); + } + vartype = "GRAPHTYPE=SIMPLE"; } } @@ -1168,7 +1224,20 @@ void LocalPRG:: const std::string& sample_name) const { BOOST_LOG_TRIVIAL(debug) << "Update VCF with sample path"; - assert(!prg.nodes.empty()); // otherwise empty nodes -> segfault + + bool prg_is_empty = prg.nodes.empty(); + if (prg_is_empty) { + fatal_error("Error when genotyping using max likelihood path: PRG is empty"); + } + bool reference_path_is_empty = rpath.empty(); + if (reference_path_is_empty) { + fatal_error("Error when genotyping using max likelihood path: reference path is empty"); + } + bool sample_path_is_empty = sample_path.empty(); + if (sample_path_is_empty) { + fatal_error("Error when genotyping using max likelihood path: sample path is empty"); + } + // if prg has only one node, simple case if (prg.nodes.size() == 1) { @@ -1177,10 +1246,8 @@ void LocalPRG:: std::vector refpath, samplepath; refpath.reserve(100); - assert(not rpath.empty()); refpath.push_back(rpath[0]); samplepath.reserve(100); - assert(not sample_path.empty()); samplepath.push_back(sample_path[0]); uint32_t ref_i = 1, sample_id = 1, pos = 0, pos_to = 0; std::vector sample_covg(6, 0); diff --git a/src/localgraph.cpp b/src/localgraph.cpp index 3a895fc1..6574cc68 100644 --- a/src/localgraph.cpp +++ b/src/localgraph.cpp @@ -1,7 +1,5 @@ #include "localgraph.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - LocalGraph::LocalGraph() { } LocalGraph::~LocalGraph() { nodes.clear(); } @@ -37,16 +35,21 @@ void LocalGraph::add_edge(const uint32_t& from, const uint32_t& to) { auto from_it = nodes.find(from); auto to_it = nodes.find(to); - assert((from_it != nodes.end()) && (to_it != nodes.end())); - if ((from_it != nodes.end()) && (to_it != nodes.end())) { - LocalNodePtr f = (nodes.find(from)->second); - LocalNodePtr t = (nodes.find(to)->second); - assert(f->pos.get_end() <= t->pos.start - || assert_msg(f->pos.get_end() - << ">" << t->pos.start << " so cannot add edge from node " << *f - << " to node " << *t)); - f->outNodes.push_back(t); + + const bool both_nodes_exist = (from_it != nodes.end()) && (to_it != nodes.end()); + if(!both_nodes_exist) { + fatal_error("Cannot add edge to Local Graph: source (", from, ") or target (", + to, ") node does not exist"); + } + + LocalNodePtr f = (nodes.find(from)->second); + LocalNodePtr t = (nodes.find(to)->second); + + const bool nodes_do_not_overlap = f->pos.get_end() > t->pos.start; + if (nodes_do_not_overlap) { + fatal_error("Cannot add edge to Local Graph: source and target nodes do not overlap"); } + f->outNodes.push_back(t); } void LocalGraph::write_gfa(const std::string& filepath) const @@ -82,7 +85,12 @@ void LocalGraph::read_gfa(const std::string& filepath) while (getline(myfile, line).good()) { if (line[0] == 'S') { split_line = split(line, "\t"); - assert(split_line.size() >= 3); + + const bool line_is_consistent = split_line.size() >= 3; + if (!line_is_consistent) { + fatal_error("Error reading GFA. Offending line: ", line); + } + if (split_line[2] == "*") { split_line[2] = ""; } @@ -98,7 +106,12 @@ void LocalGraph::read_gfa(const std::string& filepath) while (getline(myfile, line).good()) { if (line[0] == 'L') { split_line = split(line, "\t"); - assert(split_line.size() >= 5); + + const bool line_is_consistent = split_line.size() >= 5; + if (!line_is_consistent) { + fatal_error("Error reading GFA. Offending line: ", line); + } + if (split_line[2] == split_line[4]) { from = stoi(split_line[1]); to = stoi(split_line[3]); @@ -120,11 +133,12 @@ std::vector LocalGraph::walk( { // node_id: where to start the walk, pos: the position in the node_id, len = k+w-1 -> // the length that the walk has to go through - we are sketching kmers in a graph // walks from position pos in node node for length len bases - assert( - (nodes.at(node_id)->pos.start <= pos && nodes.at(node_id)->pos.get_end() >= pos) - || assert_msg(nodes.at(node_id)->pos.start - << "<=" << pos << " and " << nodes.at(node_id)->pos.get_end() - << ">=" << pos)); // if this fails, pos given lies on a different node + const bool pos_exists_in_node = (nodes.at(node_id)->pos.start <= pos) && + (pos <= nodes.at(node_id)->pos.get_end()); + if(!pos_exists_in_node) { + fatal_error("Error walking Local Graph: pos ", pos, " does not exist in node ", node_id); + } + std::vector return_paths, walk_paths; return_paths.reserve(20); walk_paths.reserve(20); @@ -163,11 +177,12 @@ std::vector LocalGraph::walk_back( const uint32_t& node_id, const uint32_t& pos, const uint32_t& len) const { // walks from position pos in node back through prg for length len bases - assert( - (nodes.at(node_id)->pos.start <= pos && nodes.at(node_id)->pos.get_end() >= pos) - || assert_msg(nodes.at(node_id)->pos.start - << "<=" << pos << " and " << nodes.at(node_id)->pos.get_end() - << ">=" << pos)); // if this fails, pos given lies on a different node + const bool pos_exists_in_node = (nodes.at(node_id)->pos.start <= pos) && + (pos <= nodes.at(node_id)->pos.get_end()); + if(!pos_exists_in_node) { + fatal_error("Error walking Local Graph: pos ", pos, " does not exist in node ", node_id); + } + std::vector return_paths, walk_paths; return_paths.reserve(20); walk_paths.reserve(20); @@ -227,6 +242,11 @@ std::vector LocalGraph::nodes_along_string( { // Note expects the query string to start at the start of the PRG - can change this // later + const bool graph_is_empty = nodes.empty(); + if (graph_is_empty) { + fatal_error("Error getting nodes along a sequence: graph is empty"); + } + std::vector> u, v, w; // u <=> v -> w // ie reject paths in u, or extend and add to v // then set u=v and continue @@ -238,8 +258,6 @@ std::vector LocalGraph::nodes_along_string( std::string candidate_string = ""; bool extended = true; - assert(!nodes.empty()); // otherwise empty nodes -> segfault - // if there is only one node in PRG, simple case, do simple string compare if (nodes.size() == 1 and strcasecmp(query_string.c_str(), nodes.at(0)->seq.c_str()) == 0) { @@ -326,9 +344,12 @@ std::vector LocalGraph::nodes_along_string( std::vector LocalGraph::top_path() const { - std::vector npath; + const bool graph_is_empty = nodes.empty(); + if (graph_is_empty) { + fatal_error("Error getting top path in the graph: graph is empty"); + } - assert(!nodes.empty()); // otherwise empty nodes -> segfault + std::vector npath; npath.push_back(nodes.at(0)); while (not npath.back()->outNodes.empty()) { @@ -340,9 +361,12 @@ std::vector LocalGraph::top_path() const std::vector LocalGraph::bottom_path() const { - std::vector npath; + const bool graph_is_empty = nodes.empty(); + if (graph_is_empty) { + fatal_error("Error getting bottom path in the graph: graph is empty"); + } - assert(!nodes.empty()); // otherwise empty nodes -> segfault + std::vector npath; npath.push_back(nodes.at(0)); while (!npath.back()->outNodes.empty()) { diff --git a/src/map_main.cpp b/src/map_main.cpp index 004b722d..e1d77764 100644 --- a/src/map_main.cpp +++ b/src/map_main.cpp @@ -225,6 +225,12 @@ int pandora_map(MapOptions& opt) if (opt.window_size > opt.kmer_size) { throw std::logic_error("W must NOT be greater than K"); } + if (opt.window_size <= 0) { + throw std::logic_error("W must be a positive integer"); + } + if (opt.kmer_size <= 0) { + throw std::logic_error("K must be a positive integer"); + } if (opt.genotype) { opt.output_vcf = true; diff --git a/src/minihit.cpp b/src/minihit.cpp index cd8f4f79..fe2baa1b 100644 --- a/src/minihit.cpp +++ b/src/minihit.cpp @@ -7,8 +7,6 @@ #include "minihit.h" #include "prg/path.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - MinimizerHit::MinimizerHit(const uint32_t i, const Minimizer& minimizer_from_read, const MiniRecord& minimizer_from_PRG) : read_id { i } @@ -16,15 +14,12 @@ MinimizerHit::MinimizerHit(const uint32_t i, const Minimizer& minimizer_from_rea , read_strand { minimizer_from_read.is_forward_strand } , minimizer_from_PRG { minimizer_from_PRG } { - - assert(minimizer_from_read.pos_of_kmer_in_read.length - == minimizer_from_PRG.path.length()); - assert(read_id < std::numeric_limits::max() - || assert_msg("Variable sizes too small to handle this number of reads")); - assert(minimizer_from_PRG.prg_id < std::numeric_limits::max() - || assert_msg("Variable sizes too small to handle this number of prgs")); - assert(minimizer_from_read.pos_of_kmer_in_read.length - == minimizer_from_PRG.path.length()); + bool both_minimizers_have_same_length = minimizer_from_read.pos_of_kmer_in_read.length + == minimizer_from_PRG.path.length(); + if(!both_minimizers_have_same_length) { + fatal_error("Error when storing minimizers: minimizer from read/sequence " + "and from PRG have different lengths"); + } } bool MinimizerHit::operator==(const MinimizerHit& y) const diff --git a/src/minihits.cpp b/src/minihits.cpp index 75daac52..5d3579a2 100644 --- a/src/minihits.cpp +++ b/src/minihits.cpp @@ -9,8 +9,6 @@ #include "minimizer.h" #include "utils.h" // for pointer_values_equal -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - void MinimizerHits::add_hit(const uint32_t i, const Minimizer& minimizer_from_read, const MiniRecord& minimizer_from_PRG) { diff --git a/src/minimizer.cpp b/src/minimizer.cpp index 91f83b60..1373391e 100644 --- a/src/minimizer.cpp +++ b/src/minimizer.cpp @@ -10,10 +10,12 @@ Minimizer::Minimizer(uint64_t s, uint32_t a, uint32_t b, bool c) , pos_of_kmer_in_read(Interval(a, b)) , is_forward_strand(c) { - assert(s <= pow(4, pos_of_kmer_in_read.length)); // used to check kmer length same - // as interval length. - // Can't any more but at least know if s is too big for a kmer of interval size to - // have generated it. + bool hash_value_is_consistend_with_kmer_interval_size + = s <= pow(4, pos_of_kmer_in_read.length); + if (!hash_value_is_consistend_with_kmer_interval_size) { + fatal_error("Error when building minimizer: hash value (", s, + ") is too big for kmer ", "of interval size ", pos_of_kmer_in_read.length); + } } Minimizer::~Minimizer() diff --git a/src/noise_filtering.cpp b/src/noise_filtering.cpp index 41698b89..7bd57f79 100644 --- a/src/noise_filtering.cpp +++ b/src/noise_filtering.cpp @@ -14,7 +14,11 @@ uint_least32_t node_plus_orientation_to_num( const uint_least32_t node_id, const bool orientation) { - assert(node_id < UINT_LEAST32_MAX / 2); + const bool node_id_is_consistent = node_id < UINT_LEAST32_MAX / 2; + if(!node_id_is_consistent) { + fatal_error("Error on converting node id and orientation to id only: " + "node_id (", node_id, ") should be < than ", UINT_LEAST32_MAX / 2); + } uint_least32_t r = 2 * node_id; if (orientation) { r += 1; @@ -59,7 +63,11 @@ bool overlap_forwards( const std::deque& node1, const std::deque& node2) { // second deque should extend first by 1 - assert(node1.size() >= node2.size()); + bool first_node_is_larger_or_same_size = node1.size() >= node2.size(); + if(!first_node_is_larger_or_same_size) { + fatal_error("Error on checking for overlaps in noise filtering: first node must be larger or have the same size as the second"); + } + uint32_t i = node1.size() - node2.size() + 1; uint32_t j = 0; while (i < node1.size() and j < node2.size()) { @@ -159,7 +167,13 @@ void dbg_node_ids_to_ids_and_orientations(const debruijn::Graph& dbg, if (hashed_pg_node_ids.empty()) { hashed_pg_node_ids = extend_hashed_pg_node_ids_forwards(dbg, dbg_node_ids); } - assert(!hashed_pg_node_ids.empty()); + + // TODO: give a better name to this bool once we understand what id does + bool hashed_pg_node_ids_is_empty = hashed_pg_node_ids.empty(); + if(hashed_pg_node_ids_is_empty) { + // TODO: improve this message + fatal_error("Error when noise filtering: hashed_pg_node_ids is empty"); + } hashed_node_ids_to_ids_and_orientations(hashed_pg_node_ids, node_ids, node_orients); } diff --git a/src/pangenome/pangraph.cpp b/src/pangenome/pangraph.cpp index 53c4512f..5f6e527f 100644 --- a/src/pangenome/pangraph.cpp +++ b/src/pangenome/pangraph.cpp @@ -14,8 +14,7 @@ #include "pangenome/panread.h" #include "pangenome/pansample.h" #include "fastaq_handler.h" - -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) +#include "fatal_error.h" using namespace pangenome; @@ -38,7 +37,6 @@ void pangenome::Graph::add_read(const uint32_t& read_id) bool found = it != reads.end(); if (not found) { auto read_ptr = std::make_shared(read_id); - assert(read_ptr != nullptr); reads[read_id] = read_ptr; } } @@ -52,11 +50,8 @@ void pangenome::Graph::add_node(const std::shared_ptr& prg, uint32_t n node_ptr = std::make_shared( prg, node_id, samples.size()); // TODO: refactor this - holding the // reference to PRG is enough - assert(node_ptr != nullptr); nodes[node_id] = node_ptr; } - assert(node_id < std::numeric_limits::max() - or assert_msg("WARNING, node_id reached max pangraph node size")); } /** @@ -70,7 +65,13 @@ void update_node_info_with_this_read(const NodePtr& node_ptr, const ReadPtr& rea { node_ptr->covg += 1; node_ptr->reads.insert(read_ptr); - assert(node_ptr->covg == node_ptr->reads.size()); + + bool coverage_information_is_consistent_with_read_information = + node_ptr->covg == node_ptr->reads.size(); + if (!coverage_information_is_consistent_with_read_information) { + fatal_error("Error updating Pangraph node with read: coverage information " + "is not consistent with read information"); + } } // Checks that all hits in the cluster are from the given prg and read @@ -79,10 +80,16 @@ void check_correct_hits(const uint32_t prg_id, const uint32_t read_id, { for (const auto& hit_ptr : cluster) { bool hits_correspond_to_correct_read = read_id == hit_ptr->get_read_id(); - assert(hits_correspond_to_correct_read); + if (!hits_correspond_to_correct_read) { + fatal_error("Minimizer hits error: hit should be on read id ", read_id, + ", but it is on read id ", hit_ptr->get_read_id()); + } bool hits_correspond_to_correct_prg = prg_id == hit_ptr->get_prg_id(); - assert(hits_correspond_to_correct_prg); + if (!hits_correspond_to_correct_prg) { + fatal_error("Minimizer hits error: hit should be on PRG id ", prg_id, + ", but it is on PRG id ", hit_ptr->get_prg_id()); + } } } @@ -111,7 +118,6 @@ void pangenome::Graph::add_hits_between_PRG_and_read( // add and get the new read add_read(read_id); auto read_ptr = get_read(read_id); - assert(read_ptr != nullptr); // add and get the new node add_node(prg); @@ -228,8 +234,6 @@ void pangenome::Graph::split_node_by_reads(std::unordered_set& reads_al // (in the context of node_ids) with a new node while (nodes.find(next_id) != nodes.end()) { next_id++; - assert(next_id < std::numeric_limits::max() - || assert_msg("WARNING, next_id reached max pangraph node size")); } // define new node @@ -281,9 +285,13 @@ void pangenome::Graph::add_hits_to_kmergraphs( { for (const auto& node_entries : nodes) { Node& pangraph_node = *node_entries.second; - assert(pangraph_node.kmer_prg_with_coverage.kmer_prg != nullptr - and not pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.empty()); - + bool pangraph_node_has_a_valid_kmer_prg_with_coverage = + (pangraph_node.kmer_prg_with_coverage.kmer_prg != nullptr) and + (not pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.empty()); + if (!pangraph_node_has_a_valid_kmer_prg_with_coverage) { + fatal_error("Error adding hits to kmer graph: pangraph node does not have a " + "valid Kmer PRG with coverage"); + } uint32_t num_hits[2] = { 0, 0 }; // add hits @@ -294,11 +302,13 @@ void pangenome::Graph::add_hits_to_kmergraphs( for (const auto& minimizer_hit_ptr : hits.at(pangraph_node.prg_id)) { const auto& minimizer_hit = *minimizer_hit_ptr; - assert(minimizer_hit.get_kmer_node_id() - < pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.size()); - assert(pangraph_node.kmer_prg_with_coverage.kmer_prg - ->nodes[minimizer_hit.get_kmer_node_id()] - != nullptr); + bool minimizer_hit_kmer_node_id_is_valid = + (minimizer_hit.get_kmer_node_id() < pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.size()) && + (pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes[minimizer_hit.get_kmer_node_id()] != nullptr); + if (!minimizer_hit_kmer_node_id_is_valid) { + fatal_error("Error adding hits to kmer graph: minimizer hit " + "kmer node is invalid"); + } if (minimizer_hit.is_forward()) { pangraph_node.kmer_prg_with_coverage.increment_forward_covg( @@ -339,13 +349,25 @@ void pangenome::Graph::copy_coverages_to_kmergraphs( const uint32_t ref_sample_id = 0; for (const auto& ref_node_entry : ref_pangraph.nodes) { const Node& ref_node = *ref_node_entry.second; - assert(nodes.find(ref_node.node_id) != nodes.end()); - Node& pangraph_node = *nodes[ref_node.node_id]; + bool ref_node_is_in_this_pangraph = nodes.find(ref_node.node_id) != nodes.end(); + if (!ref_node_is_in_this_pangraph) { + fatal_error("Error copying coverages to kmer graphs: reference node does not " + "exist in pangraph"); + } + + Node& pangraph_node = *nodes[ref_node.node_id]; for (auto& kmergraph_node_ptr : pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes) { const auto& knode_id = kmergraph_node_ptr->id; - assert(knode_id < ref_node.kmer_prg_with_coverage.kmer_prg->nodes.size()); + + bool kmer_graph_node_id_is_valid = + knode_id < ref_node.kmer_prg_with_coverage.kmer_prg->nodes.size(); + if (!kmer_graph_node_id_is_valid) { + fatal_error("Error copying coverages to kmer graphs: kmer graph node " + "id is not valid"); + } + pangraph_node.kmer_prg_with_coverage.set_reverse_covg(knode_id, (uint16_t)(ref_node.kmer_prg_with_coverage.get_reverse_covg( knode_id, ref_sample_id)), diff --git a/src/sampleinfo.cpp b/src/sampleinfo.cpp index fe714e41..8047d6bf 100644 --- a/src/sampleinfo.cpp +++ b/src/sampleinfo.cpp @@ -6,12 +6,18 @@ void SampleInfo::set_coverage_information( { this->allele_to_forward_coverages = allele_to_forward_coverages; this->allele_to_reverse_coverages = allele_to_reverse_coverages; - assert(check_if_coverage_information_is_correct()); + + if(!check_if_coverage_information_is_correct()) { + fatal_error("Error when setting coverage information for sample: " + "coverage information left inconsistent"); + } } void SampleInfo::genotype_from_coverage() { - assert(check_if_coverage_information_is_correct()); + if(!check_if_coverage_information_is_correct()) { + fatal_error("Error when genotyping: coverage information is inconsistent"); + } auto genotype_and_max_likelihood_optional = get_genotype_from_coverage(); if (genotype_and_max_likelihood_optional) { @@ -32,7 +38,9 @@ void SampleInfo::genotype_from_coverage_using_maximum_likelihood_path_as_referen uint32_t valid_GT_from_maximum_likelihood_path = this->get_gt_from_max_likelihood_path(); - assert(check_if_coverage_information_is_correct()); + if(!check_if_coverage_information_is_correct()) { + fatal_error("Error when genotyping: coverage information is inconsistent"); + } auto genotype_and_max_likelihood_optional = get_genotype_from_coverage(); if (genotype_and_max_likelihood_optional) { std::tie(GT_from_coverages, likelihood_of_GT_from_coverages) @@ -266,6 +274,8 @@ std::string SampleInfo::to_string(bool genotyping_from_maximum_likelihood, bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_compatible_coverage)) == 1; + // this will still remain an assert as it is responsibility of the dev to ensure + // this method is not called with the two flags set assert(only_one_flag_is_set); std::vector likelihoods_for_all_alleles = get_likelihoods_for_all_alleles(); @@ -396,5 +406,8 @@ void SampleInfo::set_number_of_alleles_and_resize_coverage_information( { this->number_of_alleles = number_of_alleles; resize_to_the_number_of_alleles(); - assert(check_if_coverage_information_is_correct()); + if(!check_if_coverage_information_is_correct()) { + fatal_error("Error when setting number of alleles for sample: " + "coverage information left inconsistent"); + } } \ No newline at end of file diff --git a/src/seq.cpp b/src/seq.cpp index 646a24e1..84abbc32 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -10,8 +10,6 @@ #include "seq.h" #include "utils.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using std::vector; Seq::Seq(uint32_t i, const std::string& n, const std::string& p, uint32_t w, uint32_t k) @@ -132,10 +130,12 @@ void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) smallest); // add the last element of the window (a Minimizer) to the // sketch, update the smallest and clear the window } - assert(window.size() < w - || assert_msg("we can't have added a smallest kmer correctly as window " - "still has size " - << window.size())); + + const bool window_has_shortened = window.size() < w; + if(!window_has_shortened) { + fatal_error("Error when sketching sequence: a minimizer should have been added " + "and windows should have size < ", w, " (is ", window.size(), ")"); + } } } diff --git a/src/utils.cpp b/src/utils.cpp index 9ef569ed..93e2133b 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -19,8 +19,6 @@ #include "minihit.h" #include "fastaq_handler.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - std::string now() { time_t now; @@ -84,13 +82,14 @@ std::string rev_complement(std::string s) float lognchoosek2(uint32_t n, uint32_t k1, uint32_t k2) { - assert(n >= k1 + k2 - || assert_msg( - "Currently the model assumes that the most a given kmer (defined by " - "position) can occur is once per read, i.e. an error somewhere else in the " - "read cannot result in this kmer. If you are getting this message, then " - "you have evidence of violation of this assumption. Either try using a " - "bigger k, or come up with a better model")); + const bool parameters_are_valid = n >= (k1 + k2); + if (!parameters_are_valid) { + fatal_error("Currently the model assumes that the most a given kmer (defined by " + "position) can occur is once per read, i.e. an error somewhere else in the " + "read cannot result in this kmer. If you are getting this message, then " + "you have evidence of violation of this assumption. Either try using a " + "bigger k, or come up with a better model"); + } float total = 0; for (uint32_t m = n; m != n - k1 - k2; --m) { @@ -492,7 +491,6 @@ uint32_t pangraph_from_read_file(const std::string& filepath, if (illumina and expected_number_kmers_in_short_read_sketch == std::numeric_limits::max()) { - assert(w != 0); expected_number_kmers_in_short_read_sketch = sequence.seq.length() * 2 / w; } @@ -547,6 +545,14 @@ void open_file_for_writing(const std::string& file_path, std::ofstream& stream) } } +void open_file_for_appending(const std::string& file_path, std::ofstream& stream) +{ + stream.open(file_path, std::ios::app); + if (!stream.is_open()) { + fatal_error("Error opening file ", file_path); + } +} + // read all strings in the readsFile file and return them as a vector of strings std::vector get_vector_of_strings_from_file(const std::string& file_path) { diff --git a/src/vcf.cpp b/src/vcf.cpp index 97f88792..4be52aa5 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -1,7 +1,5 @@ #include "vcf.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - void VCF::add_record_core(const VCFRecord& vr) { records.push_back(std::make_shared(vr)); @@ -29,9 +27,12 @@ VCFRecord& VCF::add_or_update_record_restricted_to_the_given_samples( VCFRecord& vr, const std::vector& sample_names) { // TODO: refactor this, this function does too much - - assert(vr.sampleIndex_to_sampleInfo.size() == sample_names.size() - or sample_names.size() == 0); + const bool record_and_samples_are_consistent = + vr.sampleIndex_to_sampleInfo.size() == sample_names.size() or sample_names.size() == 0; + if (!record_and_samples_are_consistent) { + fatal_error("Error updating record to a subset of samples: record and subset " + "of samples given are inconsistent"); + } auto record_it = find_record_in_records( vr); // TODO: improve this search to log(n) using a map or sth @@ -70,7 +71,12 @@ ptrdiff_t VCF::get_sample_index(const std::string& name) samples.push_back(name); for (auto& record_ptr : records) { record_ptr->add_new_samples(1); - assert(samples.size() == record_ptr->sampleIndex_to_sampleInfo.size()); + + const bool record_samples_match_VCF_samples = samples.size() == record_ptr->sampleIndex_to_sampleInfo.size(); + if(!record_samples_match_VCF_samples) { + fatal_error("Error on adding a sample to VCF record: VCF record samples " + "do no match global VCF samples"); + } } return samples.size() - 1; } else { @@ -127,8 +133,10 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it( vcf_record_was_processed = true; } - // check not mistake - assert(vcf_record_was_processed); + // check if there was a mistake + if(!vcf_record_was_processed) { + fatal_error("Error when adding a new VCF record discovered in a sample"); + } } update_other_samples_of_this_record(vcf_record_pointer); @@ -199,8 +207,6 @@ void VCF::append_vcf(const VCF& other_vcf) BOOST_LOG_TRIVIAL(debug) << "for all existing " << original_size << " records, add null entries for the " << num_samples_added << " new samples"; - assert(original_size < std::numeric_limits::max() - || assert_msg("VCF size has got too big to use the append feature")); for (uint_least64_t i = 0; i < original_size; ++i) { records[i]->add_new_samples(num_samples_added); } @@ -267,7 +273,9 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) { VCF empty_vcf = VCF(merged_VCF.genotyping_options); bool merged_VCF_passed_as_parameter_is_initially_empty = merged_VCF == empty_vcf; - assert(merged_VCF_passed_as_parameter_is_initially_empty); + if(!merged_VCF_passed_as_parameter_is_initially_empty) { + fatal_error("Error on merging VCFs: initial VCF is not empty"); + } size_t vcf_size = this->get_VCF_size(); bool no_need_for_merging = vcf_size <= 1; @@ -287,13 +295,6 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) *vcf_record_to_be_merged_in_pointer, max_allele_length); if (vcf_record_should_be_merged_in) { - // TODO: this code is not covered by tests, IDK what it is supposed to - // do - commenting it out and asserting out if we reach it - if (vcf_record_to_be_merged_in_pointer->sampleIndex_to_sampleInfo - .empty()) { - assert_msg("VCF::merge_multi_allelic: vcf_record_to_be_merged_in " - "has no samples"); - } vcf_record_merged->merge_record_into_this( *vcf_record_to_be_merged_in_pointer); } else { @@ -306,7 +307,11 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) merged_VCF.sort_records(); - assert(merged_VCF.get_VCF_size() <= vcf_size); + bool merging_did_not_create_any_record = merged_VCF.get_VCF_size() <= vcf_size; + if(!merging_did_not_create_any_record) { + fatal_error("Error on merging VCFs: new VCF records were created, whereas " + "this should not be the case"); + } } VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chrom) const @@ -329,12 +334,12 @@ VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chro continue; } - assert(vcf_ref.length() >= record.get_pos() - || assert_msg("vcf_ref.length() = " << vcf_ref.length() - << "!>= record.get_pos() " - << record.get_pos() << "\n" - << record.to_string(true, false) << "\n" - << vcf_ref)); + const bool record_pos_refers_to_an_existing_pos_in_vcf_ref = + vcf_ref.length() >= record.get_pos(); + if(!record_pos_refers_to_an_existing_pos_in_vcf_ref) { + fatal_error("When correcting dot alleles, a VCF record has an inexistent " + "position (", record.get_pos(), ") in VCF ref with length ", vcf_ref.length()); + } bool record_contains_dot_allele = record.contains_dot_allele(); bool record_did_not_contain_dot_allele_or_was_corrected = true; bool there_is_a_previous_letter = record.get_pos() > 0; @@ -360,9 +365,14 @@ VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chro } } - assert(vcf_with_dot_alleles_corrected.get_VCF_size() <= this->get_VCF_size()); - vcf_with_dot_alleles_corrected.sort_records(); + + bool correcting_dot_alleles_did_not_create_any_record = vcf_with_dot_alleles_corrected.get_VCF_size() <= this->get_VCF_size(); + if(!correcting_dot_alleles_did_not_create_any_record) { + fatal_error("Error on correcting dot alleles: new VCF records were created, whereas " + "this should not be the case"); + } + return vcf_with_dot_alleles_corrected; } @@ -510,6 +520,8 @@ std::string VCF::to_string(bool genotyping_from_maximum_likelihood, bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; + // this will still remain an assert as it is responsibility of the dev to ensure + // this method is not called with the two flags set assert(only_one_flag_is_set); std::stringstream out; diff --git a/src/vcfrecord.cpp b/src/vcfrecord.cpp index 7cc26ded..b8cf15d3 100644 --- a/src/vcfrecord.cpp +++ b/src/vcfrecord.cpp @@ -5,8 +5,6 @@ #include #include -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - VCFRecord::VCFRecord(VCF const* parent_vcf, const std::string& chrom, uint32_t pos, const std::string& ref, const std::string& alt, const std::string& info, const std::string& graph_type_info) @@ -73,6 +71,8 @@ std::string VCFRecord::get_format( bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; + // this will still remain an assert as it is responsibility of the dev to ensure + // this method is not called with the two flags set assert(only_one_flag_is_set); static std::vector format_for_genotyping_from_maximum_likelihood @@ -212,7 +212,9 @@ void VCFRecord::merge_record_into_this(const VCFRecord& other) if (other_record_has_no_alt) return; - assert(there_are_no_common_alt_alleles_between_this_and_other(other)); + if(!there_are_no_common_alt_alleles_between_this_and_other(other)) { + fatal_error("When merging two VCF records, they have common ALTs, this should not happen"); + } this->sampleIndex_to_sampleInfo.merge_other_samples_infos_into_this( other.sampleIndex_to_sampleInfo); @@ -227,9 +229,11 @@ bool VCFRecord::can_biallelic_record_be_merged_into_this( // TODO : maybe fix this? // bool ensure_we_are_merging_only_biallelic_records = // vcf_record_to_be_merged_in.alts.size() == 1; - bool ensure_we_are_merging_only_biallelic_records + bool we_are_merging_only_biallelic_records = vcf_record_to_be_merged_in.alts.size() <= 1; - assert(ensure_we_are_merging_only_biallelic_records); + if(!we_are_merging_only_biallelic_records) { + fatal_error("When merging two biallelic records, one of them is not biallelic"); + } bool both_records_have_the_same_ref = this->ref == vcf_record_to_be_merged_in.ref; @@ -296,7 +300,9 @@ void VCFRecord::add_new_alt(std::string alt) } bool alt_already_present = std::find(alts.begin(), alts.end(), alt) != alts.end(); - assert(not alt_already_present); + if (alt_already_present) { + fatal_error("Error adding new ALT to a VCF record: ALT already exists"); + } alts.push_back(alt); set_number_of_alleles_and_resize_coverage_information_for_all_samples( diff --git a/test/de_bruijn_graph_test.cpp b/test/de_bruijn_graph_test.cpp index a5602727..52104087 100644 --- a/test/de_bruijn_graph_test.cpp +++ b/test/de_bruijn_graph_test.cpp @@ -6,6 +6,7 @@ #include "gtest/gtest.h" #include "test_macro.cpp" #include "de_bruijn_graph_class.h" +#include "test_helpers.h" using namespace debruijn; @@ -232,7 +233,8 @@ TEST(DeBruijnGraphAddEdge, AddEdgeNoOverlap_Death) std::deque v2 = { 6, 0, 9 }; OrientedNodePtr n1 = g.add_node(v1, 0); OrientedNodePtr n2 = g.add_node(v2, 0); - EXPECT_DEATH(g.add_edge(n1, n2), ""); + + ASSERT_EXCEPTION(g.add_edge(n1, n2), FatalRuntimeError, "Error adding edge to de Bruijn Graph"); } TEST(DeBruijnGraphTest, remove_node) diff --git a/test/interval_test.cpp b/test/interval_test.cpp index b5b118f3..829fe8e3 100644 --- a/test/interval_test.cpp +++ b/test/interval_test.cpp @@ -2,6 +2,7 @@ #include #include #include "interval.h" +#include "test_helpers.h" TEST(IntervalTest, create) { @@ -19,8 +20,10 @@ TEST(IntervalTest, create) j = 8; EXPECT_EQ(i.length, j); - EXPECT_THROW(Interval(9, 1), std::logic_error); - EXPECT_THROW(Interval(-1, 10), std::logic_error); + ASSERT_EXCEPTION(Interval(9, 1), FatalRuntimeError, + "Error when building interval: interval end cannot be less than the interval start"); + ASSERT_EXCEPTION(Interval(-1, 10), FatalRuntimeError, + "Error when building interval: interval end cannot be less than the interval start"); } TEST(IntervalTest, write) diff --git a/test/localPRG_test.cpp b/test/localPRG_test.cpp index 2fc13ffb..873d0ee8 100644 --- a/test/localPRG_test.cpp +++ b/test/localPRG_test.cpp @@ -148,10 +148,12 @@ TEST(LocalPRGTest, string_along_path) // forbidden paths d = { Interval(2, 3), Interval(13, 25) }; p.initialize(d); - EXPECT_DEATH(l1.string_along_path(p), ""); - EXPECT_DEATH(l1.string_along_path(p), ""); - EXPECT_DEATH(l2.string_along_path(p), ""); - EXPECT_DEATH(l3.string_along_path(p), ""); + ASSERT_EXCEPTION(l1.string_along_path(p), FatalRuntimeError, + "Error getting sequence along PRG path"); + ASSERT_EXCEPTION(l2.string_along_path(p), FatalRuntimeError, + "Error getting sequence along PRG path"); + ASSERT_EXCEPTION(l3.string_along_path(p), FatalRuntimeError, + "Error getting sequence along PRG path"); } TEST(LocalPRGTest, string_along_localpath) diff --git a/test/localgraph_test.cpp b/test/localgraph_test.cpp index 393cfbd5..f73abe39 100644 --- a/test/localgraph_test.cpp +++ b/test/localgraph_test.cpp @@ -44,7 +44,8 @@ TEST(LocalGraphTest, add_edge) lg2.add_edge(2, 3); // expect failure if a node doesn't exist in the graph - EXPECT_DEATH(lg2.add_edge(0, 4), ""); + ASSERT_EXCEPTION(lg2.add_edge(0, 4), FatalRuntimeError, + "Cannot add edge to Local Graph"); } TEST(LocalGraphTest, equals) diff --git a/test/minihit_test.cpp b/test/minihit_test.cpp index 3dd9c77d..aa67a483 100644 --- a/test/minihit_test.cpp +++ b/test/minihit_test.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "test_helpers.h" using namespace std; @@ -32,7 +33,8 @@ TEST(MinimizerHitTest, create) kh = hash.kmerhash("hell", 4); m = Minimizer(min(kh.first, kh.second), 1, 5, 0); - EXPECT_DEATH(MinimizerHit(1, m, mr), ""); + ASSERT_EXCEPTION(MinimizerHit(1, m, mr), FatalRuntimeError, + "Error when storing minimizers: minimizer from read/sequence and from PRG have different lengths"); // TEST SECOND CONSTRUCTOR!! } diff --git a/test/minimizer_test.cpp b/test/minimizer_test.cpp index a4b19847..ed044e24 100644 --- a/test/minimizer_test.cpp +++ b/test/minimizer_test.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "test_helpers.h" using std::set; using namespace std; @@ -47,10 +48,12 @@ TEST(MinimizerTest, create) EXPECT_EQ(m3.pos_of_kmer_in_read.get_end(), j); // interval too short to be valid - EXPECT_DEATH(Minimizer(kh.first, 0, 2, 0), ""); + ASSERT_EXCEPTION(Minimizer(kh.first, 0, 2, 0), FatalRuntimeError, + "Error when building minimizer"); // doesn't generate an interval as 2>0 - EXPECT_THROW( - Minimizer(kh.first, 2, 0, 0), std::logic_error); + ASSERT_EXCEPTION( + Minimizer(kh.first, 2, 0, 0), FatalRuntimeError, + "Error when building interval: interval end cannot be less than the interval start"); } TEST(MinimizerTest, less_than) diff --git a/test/noise_filtering_test.cpp b/test/noise_filtering_test.cpp index f275ef75..e081096a 100644 --- a/test/noise_filtering_test.cpp +++ b/test/noise_filtering_test.cpp @@ -5,6 +5,7 @@ #include "pangenome/panread.h" #include "pangenome/pannode.h" #include "minihit.h" +#include "test_helpers.h" using namespace std; @@ -101,11 +102,12 @@ TEST(NoiseFilteringOverlapForwards, OverlapShiftedMoreThanOne_False) EXPECT_FALSE(result); } -TEST(NoiseFilteringOverlapForwards, SecondLongerThanFirst_Death) +TEST(NoiseFilteringOverlapForwards, SecondLongerThanFirst_FatalRuntimeError) { std::deque d1 = { 0, 4, 6, 2, 5, 4, 0, 1, 2 }; std::deque d2 = { 0, 4, 6, 2, 5, 4, 0, 1, 2, 3 }; - EXPECT_DEATH(overlap_forwards(d1, d2), ""); + ASSERT_EXCEPTION(overlap_forwards(d1, d2), FatalRuntimeError, + "Error on checking for overlaps in noise filtering: first node must be larger or have the same size as the second"); } TEST(NoiseFilteringTest, overlap_backwards) diff --git a/test/pangraph_test.cpp b/test/pangraph_test.cpp index 8c3c83eb..d7831bb9 100644 --- a/test/pangraph_test.cpp +++ b/test/pangraph_test.cpp @@ -9,6 +9,7 @@ #include "localPRG.h" #include #include +#include "test_helpers.h" using namespace pangenome; @@ -475,7 +476,8 @@ TEST(PangenomeGraphAddNode, AddClusterWrongReadId_AssertCatches) PGraphTester pg; auto prg_pointer = std::make_shared(prg_id, "", ""); - EXPECT_DEATH(pg.add_hits_between_PRG_and_read(prg_pointer, read_id, cluster), ""); + ASSERT_EXCEPTION(pg.add_hits_between_PRG_and_read(prg_pointer, read_id, cluster), + FatalRuntimeError, "Minimizer hits error: hit should be on read id"); } TEST(PangenomeGraphAddNode, AddClusterWrongPrgId_AssertCatches) @@ -498,7 +500,8 @@ TEST(PangenomeGraphAddNode, AddClusterWrongPrgId_AssertCatches) PGraphTester pg; auto prg_pointer = std::make_shared(prg_id, "", ""); - EXPECT_DEATH(pg.add_hits_between_PRG_and_read(prg_pointer, read_id, cluster), ""); + ASSERT_EXCEPTION(pg.add_hits_between_PRG_and_read(prg_pointer, read_id, cluster), + FatalRuntimeError, "Minimizer hits error: hit should be on PRG id"); } /* this test is now comprised in TEST(PangenomeGraphNode, add_node_and_get_node) diff --git a/test/sampleinfo_test.cpp b/test/sampleinfo_test.cpp index c1bd2b88..f546df05 100644 --- a/test/sampleinfo_test.cpp +++ b/test/sampleinfo_test.cpp @@ -16,7 +16,8 @@ using ::testing::Return; TEST(SampleInfoTest, constructor___zero_alleles___expects_death) { - EXPECT_DEATH(SampleInfo(0, 0, &default_genotyping_options), ""); + ASSERT_EXCEPTION(SampleInfo(0, 0, &default_genotyping_options), FatalRuntimeError, + "Error on creating VCF Sample INFOs: the VCF record has no alleles"); } TEST(SampleInfoTest, constructor___one_allele) @@ -204,41 +205,46 @@ TEST_F(SampleInfoTest___Fixture, get_allele_to_reverse_coverages___default_sampl TEST_F(SampleInfoTest___Fixture, set_coverage_information___forward_coverage_has_no_alleles___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_empty, allele_to_coverage_three_alleles), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, set_coverage_information___forward_coverage_has_one_allele___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_one_allele, allele_to_coverage_three_alleles), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, set_coverage_information___reverse_coverage_has_no_alleles___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_empty), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, set_coverage_information___reverse_coverage_has_one_allele___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_one_allele), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, set_coverage_information___both_coverages_have_two_alleles___different_number_of_bases___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_two_alleles, { { 1, 2 }, { 3 } }), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -259,17 +265,19 @@ TEST_F(SampleInfoTest___Fixture, TEST_F(SampleInfoTest___Fixture, set_coverage_information___fwd_coverage_has_two_alleles___rev_coverage_has_three_alleles___sample_info_expects_three_alleles___expects_death) { - EXPECT_DEATH(default_sample_info_three_alleles.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_two_alleles, allele_to_coverage_three_alleles), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, set_coverage_information___fwd_coverage_has_three_alleles___rev_coverage_has_two_alleles___sample_info_expects_three_alleles___expects_death) { - EXPECT_DEATH(default_sample_info_three_alleles.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_two_alleles), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -289,9 +297,10 @@ TEST_F(SampleInfoTest___Fixture, TEST_F(SampleInfoTest___Fixture, set_coverage_information___forward_covg_has_two_alleles___reverse_covg_has_three_alleles___expects_death) { - EXPECT_DEATH(default_sample_info.set_coverage_information( + ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_two_alleles, allele_to_coverage_three_alleles), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -302,10 +311,11 @@ TEST_F(SampleInfoTest___Fixture, allele_to_coverage_two_alleles); allele_to_coverage_two_alleles_first_allele_has_only_one_base[0] = { 1 }; - EXPECT_DEATH( + ASSERT_EXCEPTION( default_sample_info.set_coverage_information(allele_to_coverage_two_alleles, allele_to_coverage_two_alleles_first_allele_has_only_one_base), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -316,10 +326,11 @@ TEST_F(SampleInfoTest___Fixture, allele_to_coverage_two_alleles); allele_to_coverage_two_alleles_second_allele_has_only_one_base[1] = { 3 }; - EXPECT_DEATH( + ASSERT_EXCEPTION( default_sample_info.set_coverage_information(allele_to_coverage_two_alleles, allele_to_coverage_two_alleles_second_allele_has_only_one_base), - ""); + FatalRuntimeError, + "Error when setting coverage information for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -327,9 +338,10 @@ TEST_F(SampleInfoTest___Fixture, { default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_three_alleles); - EXPECT_DEATH(default_sample_info_three_alleles + ASSERT_EXCEPTION(default_sample_info_three_alleles .set_number_of_alleles_and_resize_coverage_information(0), - ""); + FatalRuntimeError, + "Error when setting number of alleles for sample: coverage information left inconsistent"); } TEST_F(SampleInfoTest___Fixture, @@ -1287,9 +1299,11 @@ TEST_F(SampleIndexToSampleInfoTemplate___Fixture, another_sample_index_to_sample_info.emplace_back_several_empty_sample_infos( 5, 2, &default_genotyping_options); - EXPECT_DEATH(sample_index_to_sample_info.merge_other_samples_infos_into_this( + ASSERT_EXCEPTION(sample_index_to_sample_info.merge_other_samples_infos_into_this( another_sample_index_to_sample_info), - ""); + FatalRuntimeError, + "Error merging two records: " + "number of samples is not consistent between both records"); } TEST_F(SampleIndexToSampleInfoTemplate___Fixture, diff --git a/test/test_cases/noisefiltering_test.pangraph.gfa b/test/test_cases/noisefiltering_test.pangraph.gfa index bf77fa6c..f9b39b81 100644 --- a/test/test_cases/noisefiltering_test.pangraph.gfa +++ b/test/test_cases/noisefiltering_test.pangraph.gfa @@ -7,13 +7,13 @@ S 3 N FC:i:5 S 2 N FC:i:4 S 1 N FC:i:4 S 0 N FC:i:5 -L 7 + 3 + 0M +L 6 + 3 + 0M L 5 + 3 + 0M L 5 + 0 + 0M -L 5 + 4 + 0M -L 4 + 3 + 0M -L 3 + 2 + 0M -L 3 + 6 + 0M -L 6 + 2 + 0M -L 2 + 1 + 0M -L 1 + 0 + 0M +L 4 + 5 + 0M +L 3 + 7 + 0M +L 3 + 4 + 0M +L 2 + 6 + 0M +L 2 + 3 + 0M +L 1 + 2 + 0M +L 0 + 1 + 0M diff --git a/test/vcf_test.cpp b/test/vcf_test.cpp index 42050780..9ab2a352 100644 --- a/test/vcf_test.cpp +++ b/test/vcf_test.cpp @@ -1220,7 +1220,9 @@ TEST_F(VCFTest___merge_multi_allelic_core___Fixture, merged_VCF_is_not_initially VCF merged_vcf(vcf.genotyping_options); merged_vcf.add_record("1", 1, "A", "T"); - EXPECT_DEATH(vcf.merge_multi_allelic_core(merged_vcf, 10000), ""); + ASSERT_EXCEPTION(vcf.merge_multi_allelic_core(merged_vcf, 10000), + FatalRuntimeError, + "Error on merging VCFs: initial VCF is not empty"); } TEST_F(VCFTest___merge_multi_allelic_core___Fixture, one_sized_VCF) diff --git a/test/vcfrecord_test.cpp b/test/vcfrecord_test.cpp index 089b5c13..31707bcf 100644 --- a/test/vcfrecord_test.cpp +++ b/test/vcfrecord_test.cpp @@ -189,11 +189,13 @@ TEST_F(VCFRecordTest___default_VCF_Record___Fixture, add_new_alt___add_two_valid } TEST_F(VCFRecordTest___default_VCF_Record___Fixture, - add_new_alt___add_two_valid_alts_and_several_repeated_alts___expects_death) + add_new_alt___add_two_valid_alts_and_several_repeated_alts___expects_FatalRuntimeError) { vcf_record.add_new_alt("AC"); vcf_record.add_new_alt("AG"); - EXPECT_DEATH(vcf_record.add_new_alt("AC"), ""); + ASSERT_EXCEPTION(vcf_record.add_new_alt("AC"), + FatalRuntimeError, + "Error adding new ALT to a VCF record: ALT already exists"); } TEST_F(VCFRecordTest___default_VCF_Record___Fixture, @@ -210,11 +212,13 @@ TEST_F(VCFRecordTest___default_VCF_Record___Fixture, } TEST_F(VCFRecordTest___default_VCF_Record___Fixture, - add_new_alts___add_two_valid_alts_and_a_repeated_alt___expects_death) + add_new_alts___add_two_valid_alts_and_a_repeated_alt___expects_FatalRuntimeError) { std::vector alts { "AC", "AG", "", "." }; // NB: "" and "." are repeated because "" is translated to "." - EXPECT_DEATH(vcf_record.add_new_alts(alts.begin(), alts.end()), ""); + ASSERT_EXCEPTION(vcf_record.add_new_alts(alts.begin(), alts.end()), + FatalRuntimeError, + "Error adding new ALT to a VCF record: ALT already exists"); } TEST(VCFRecordTest, clear_simple) @@ -567,27 +571,30 @@ TEST_F(VCFRecordTest___merge_record_into_this______Fixture, merge_T_dot_into_TTT } TEST_F(VCFRecordTest___merge_record_into_this______Fixture, - merge_first_alt_is_common___expects_death) + merge_first_alt_is_common___expects_FatalRuntimeError) { - EXPECT_DEATH( + ASSERT_EXCEPTION( vcf_record_ref_A_alt_T_TT_TTT.merge_record_into_this(vcf_record_ref_A_alt_T), - ""); + FatalRuntimeError, + "When merging two VCF records, they have common ALTs, this should not happen"); } TEST_F(VCFRecordTest___merge_record_into_this______Fixture, merge_last_alt_is_common___expects_death) { - EXPECT_DEATH( + ASSERT_EXCEPTION( vcf_record_ref_A_alt_T_TT_TTT.merge_record_into_this(vcf_record_ref_A_alt_TTT), - ""); + FatalRuntimeError, + "When merging two VCF records, they have common ALTs, this should not happen"); } TEST_F(VCFRecordTest___merge_record_into_this______Fixture, - merge_both_have_dot_alleles___expects_death) + merge_both_have_dot_alleles___expects_FatalRuntimeError) { - EXPECT_DEATH( + ASSERT_EXCEPTION( vcf_record_ref_A_alt_dot.merge_record_into_this(vcf_record_ref_A_alt_T_dot), - ""); + FatalRuntimeError, + "When merging two VCF records, they have common ALTs, this should not happen"); } class VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture @@ -632,11 +639,12 @@ TEST_F(VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture, } TEST_F(VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture, - merge_triallelic___expects_death) + merge_triallelic___expects_FatalRuntimeError) { - EXPECT_DEATH(vcf_record_ref_A.can_biallelic_record_be_merged_into_this( + ASSERT_EXCEPTION(vcf_record_ref_A.can_biallelic_record_be_merged_into_this( vcf_record_tri_allelic), - ""); + FatalRuntimeError, + "When merging two biallelic records, one of them is not biallelic"); } TEST_F(VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture, From b62be1872157471e5081288bae6abcae8a80645b Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Feb 2021 12:35:05 -0400 Subject: [PATCH 11/37] Changing hunter dir to be inside CMAKE_BINARY_DIR instead of default path --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 19b45bd9..e750160a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.12) # required by hunter ZLIB installation # include hunter +set(HUNTER_ROOT ${CMAKE_BINARY_DIR}/hunter) include("cmake/HunterGate.cmake") HunterGate( URL "https://github.com/cpp-pm/hunter/archive/v0.23.289.tar.gz" From 16c1d706777b1427645d117c64da31f19530928d Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Feb 2021 12:40:02 -0400 Subject: [PATCH 12/37] WIP: refactoring asserts --- include/compare_main.h | 1 - include/denovo_discovery/denovo_utils.h | 1 - include/fastaq.h | 1 - include/get_vcf_ref_main.h | 1 - include/interval.h | 1 - include/kmergraphwithcoverage.h | 1 - include/localgraph.h | 1 - include/map_main.h | 1 - include/sampleinfo.h | 1 - include/seq2path_main.h | 1 - include/vcf.h | 1 - src/de_bruijn/graph.cpp | 1 - src/estimate_parameters.cpp | 1 - src/kmergraph.cpp | 1 - src/kmernode.cpp | 3 -- src/localPRG.cpp | 17 +++++--- src/minihit.cpp | 3 -- src/minihits.cpp | 4 -- src/minimizer.cpp | 2 - src/noise_filtering.cpp | 33 +++++++++++---- src/pangenome/pangraph.cpp | 37 +++++++++++------ src/pangenome/pannode.cpp | 32 +++++++++------ src/pangenome/panread.cpp | 18 ++++++--- src/pangenome/pansample.cpp | 3 -- src/prg/path.cpp | 54 +++++++++++++++++-------- src/seq.cpp | 1 - src/utils.cpp | 1 - src/vcfrecord.cpp | 1 - test/pangraph_test.cpp | 5 ++- test/panread_test.cpp | 3 +- test/path_test.cpp | 7 +++- 31 files changed, 139 insertions(+), 99 deletions(-) diff --git a/include/compare_main.h b/include/compare_main.h index 3d85cf3d..4cd793a8 100644 --- a/include/compare_main.h +++ b/include/compare_main.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include diff --git a/include/denovo_discovery/denovo_utils.h b/include/denovo_discovery/denovo_utils.h index c5788e5c..f91a8439 100644 --- a/include/denovo_discovery/denovo_utils.h +++ b/include/denovo_discovery/denovo_utils.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include "localnode.h" diff --git a/include/fastaq.h b/include/fastaq.h index 181577e9..3a989ef5 100644 --- a/include/fastaq.h +++ b/include/fastaq.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/include/get_vcf_ref_main.h b/include/get_vcf_ref_main.h index 2950a1b5..12086f7a 100644 --- a/include/get_vcf_ref_main.h +++ b/include/get_vcf_ref_main.h @@ -1,6 +1,5 @@ #ifndef PANDORA_GET_VCF_REF_MAIN_H #define PANDORA_GET_VCF_REF_MAIN_H -#include #include #include #include "localPRG.h" diff --git a/include/interval.h b/include/interval.h index e239e47a..a9d67a7f 100644 --- a/include/interval.h +++ b/include/interval.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include "fatal_error.h" diff --git a/include/kmergraphwithcoverage.h b/include/kmergraphwithcoverage.h index cf12b022..a40a9a9b 100644 --- a/include/kmergraphwithcoverage.h +++ b/include/kmergraphwithcoverage.h @@ -7,7 +7,6 @@ class LocalPRG; #include #include #include -#include #include "prg/path.h" #include "kmernode.h" #include "kmergraph.h" diff --git a/include/localgraph.h b/include/localgraph.h index 3982fb35..7143679c 100644 --- a/include/localgraph.h +++ b/include/localgraph.h @@ -11,7 +11,6 @@ #include "localnode.h" #include "IITree.h" #include -#include #include #include #include "utils.h" diff --git a/include/map_main.h b/include/map_main.h index 3ea0b297..9bf2bbe0 100644 --- a/include/map_main.h +++ b/include/map_main.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/include/sampleinfo.h b/include/sampleinfo.h index f51391b1..32e528e7 100644 --- a/include/sampleinfo.h +++ b/include/sampleinfo.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include "Maths.h" diff --git a/include/seq2path_main.h b/include/seq2path_main.h index 0ac367d0..855ec9d9 100644 --- a/include/seq2path_main.h +++ b/include/seq2path_main.h @@ -1,7 +1,6 @@ #ifndef PANDORA_SEQ2PATH_MAIN_H #define PANDORA_SEQ2PATH_MAIN_H #include -#include #include #include diff --git a/include/vcf.h b/include/vcf.h index dd9827f4..bac377ae 100644 --- a/include/vcf.h +++ b/include/vcf.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/de_bruijn/graph.cpp b/src/de_bruijn/graph.cpp index 3d7041f8..1a2adbe5 100644 --- a/src/de_bruijn/graph.cpp +++ b/src/de_bruijn/graph.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/src/estimate_parameters.cpp b/src/estimate_parameters.cpp index 72fc194e..977a6cd9 100644 --- a/src/estimate_parameters.cpp +++ b/src/estimate_parameters.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include "utils.h" diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index bc3d632e..cf2fcc33 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -1,5 +1,4 @@ #include -#include #include #include /* srand, rand */ diff --git a/src/kmernode.cpp b/src/kmernode.cpp index fb9fca75..19df4408 100644 --- a/src/kmernode.cpp +++ b/src/kmernode.cpp @@ -1,8 +1,5 @@ #include -#include - #include - #include "kmernode.h" #include "utils.h" diff --git a/src/localPRG.cpp b/src/localPRG.cpp index 2603c2d8..bb7ee093 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -595,8 +594,6 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 // graph shift_paths = shift(kn->path); if (shift_paths.empty()) { - // assert(kn->path.get_start() == 0); not true for a too short test, would - // be true if all paths long enough to have at least 2 minikmers on... end_leaves.push_back(kn); } for (uint32_t i = 0; i != shift_paths.size(); @@ -1257,12 +1254,22 @@ void LocalPRG:: while (!refpath.back()->outNodes.empty() or refpath.size() > 1) { if (refpath.back()->id < samplepath.back()->id) { - assert(rpath.size() > ref_i); + bool ref_index_is_valid = rpath.size() > ref_i; + if (!ref_index_is_valid) { + fatal_error("Error when genotyping using max likelihood path: ref index " + "is not valid"); + } + refpath.push_back(rpath[ref_i]); found_new_site = true; ref_i++; } else if (samplepath.back()->id < refpath.back()->id) { - assert(sample_path.size() > sample_id); + bool sample_id_is_valid = sample_path.size() > sample_id; + if (!sample_id_is_valid) { + fatal_error("Error when genotyping using max likelihood path: sample " + "is not valid"); + } + samplepath.push_back(sample_path[sample_id]); found_new_site = true; sample_id++; diff --git a/src/minihit.cpp b/src/minihit.cpp index fe2baa1b..c152cfa5 100644 --- a/src/minihit.cpp +++ b/src/minihit.cpp @@ -1,8 +1,5 @@ -#include #include #include -#include -#include #include "minirecord.h" #include "minihit.h" #include "prg/path.h" diff --git a/src/minihits.cpp b/src/minihits.cpp index 5d3579a2..77f89c21 100644 --- a/src/minihits.cpp +++ b/src/minihits.cpp @@ -1,13 +1,9 @@ -#include #include -#include -#include #include #include "minihits.h" #include "minihit.h" #include "minirecord.h" #include "minimizer.h" -#include "utils.h" // for pointer_values_equal void MinimizerHits::add_hit(const uint32_t i, const Minimizer& minimizer_from_read, const MiniRecord& minimizer_from_PRG) diff --git a/src/minimizer.cpp b/src/minimizer.cpp index 1373391e..6a7c65d3 100644 --- a/src/minimizer.cpp +++ b/src/minimizer.cpp @@ -1,8 +1,6 @@ #include -#include #include #include "minimizer.h" -#include "prg/path.h" #include "interval.h" Minimizer::Minimizer(uint64_t s, uint32_t a, uint32_t b, bool c) diff --git a/src/noise_filtering.cpp b/src/noise_filtering.cpp index 7bd57f79..c763b262 100644 --- a/src/noise_filtering.cpp +++ b/src/noise_filtering.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include "utils.h" #include "pangenome/pangraph.h" #include "pangenome/pannode.h" @@ -242,16 +241,25 @@ void remove_leaves(std::shared_ptr pangraph, debruijn::Graph& hashed_node_ids_to_ids_and_orientations( dbg.nodes[i]->hashed_node_ids, node_ids, node_orients); + bool dbg_node_has_no_reads = dbg.nodes[i]->read_ids.empty(); + if (dbg_node_has_no_reads) { + fatal_error("Error when removing leaves from DBG: node has no leaves"); + } + // remove the last node from corresponding reads - assert(not dbg.nodes[i]->read_ids.empty()); for (const auto& r : dbg.nodes[i]->read_ids) { if (pangraph->reads[r]->get_nodes().size() == dbg.size) { pangraph->remove_read(r); } else { pos = pangraph->reads[r]->find_position(node_ids, node_orients); - assert(pos.first == 0 - or pos.first + node_ids.size() - == pangraph->reads[r]->get_nodes().size()); + + bool pos_of_nodes_in_read_is_valid = (pos.first == 0) or + (pos.first + node_ids.size() == pangraph->reads[r]->get_nodes().size()); + if (!pos_of_nodes_in_read_is_valid) { + fatal_error("Error when removing leaves from DBG: position of " + "DBG nodes in reads are not valid"); + } + if (pos.first == 0) { node = pangraph->reads[r]->get_nodes()[0]; pangraph->reads[r]->remove_node_with_iterator( @@ -518,7 +526,11 @@ pangenome::Node convert_node_debruijn_pangraph( const debruijn::Node& debruijn_node, std::shared_ptr pangraph) { auto node_id = get_pangraph_node_id(debruijn_node); - assert(pangraph->nodes.find(node_id) != pangraph->nodes.end()); + bool node_exists = pangraph->nodes.find(node_id) != pangraph->nodes.end(); + if (!node_exists) { + fatal_error("Error converting DBG node to pangraph node: the given DBG node " + "does not exist in the pangraph"); + } auto node_ptr = pangraph->nodes.at(node_id); auto node = *node_ptr; @@ -542,8 +554,13 @@ void write_pangraph_gfa( auto first_node_direction = get_pangraph_node_direction(first_debruijn_node); for (const auto& second_debruijn_node_id : first_debruijn_node.out_nodes) { - assert(debruijn_graph.nodes.find(second_debruijn_node_id) - != debruijn_graph.nodes.end()); + bool neighbour_node_exists_in_the_graph = + debruijn_graph.nodes.find(second_debruijn_node_id) != debruijn_graph.nodes.end(); + if (!neighbour_node_exists_in_the_graph) { + fatal_error("Error writing pangraph to GFA: a neighbour of a node does " + "not exist in the graph"); + } + auto& second_debruijn_node = *debruijn_graph.nodes[second_debruijn_node_id]; auto second_node diff --git a/src/pangenome/pangraph.cpp b/src/pangenome/pangraph.cpp index 5f6e527f..390f73c3 100644 --- a/src/pangenome/pangraph.cpp +++ b/src/pangenome/pangraph.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -418,10 +417,14 @@ std::vector pangenome::Graph::get_node_closest_vcf_reference( const auto& sample_paths = sample->paths.at(node.prg_id); for (const auto& sample_path : sample_paths) { for (uint32_t i = 0; i != sample_path.size(); ++i) { - assert( - sample_path[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size() - and kmer_prg_with_coverage.kmer_prg->nodes[sample_path[i]->id] - != nullptr); + bool sample_path_node_is_valid = + (sample_path[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size()) and + (kmer_prg_with_coverage.kmer_prg->nodes[sample_path[i]->id] != nullptr); + if (!sample_path_node_is_valid) { + fatal_error("When getting the path closest to VCF reference, " + "a sample path node is not valid"); + } + kmer_prg_with_coverage.increment_forward_covg(sample_path[i]->id, 0); kmer_prg_with_coverage.increment_reverse_covg(sample_path[i]->id, 0); } @@ -531,18 +534,26 @@ void pangenome::Graph::save_mapped_read_strings( readfile.get_nth_read(coord[0]); start = (uint32_t)std::max((int32_t)coord[1] - buff, 0); end = std::min(coord[2] + (uint32_t)buff, (uint32_t)readfile.read.length()); + + bool read_coordinates_are_valid = + (coord[1] < coord[2]) && + (start <= coord[1]) && + (start <= readfile.read.length()) && + (coord[2] <= readfile.read.length()) && + (end >= coord[2]) && + (start < end); + if (!read_coordinates_are_valid) { + fatal_error("When saving mapped reads, read coordinates are not valid"); + } + outhandle << ">" << readfile.name << " pandora: " << coord[0] << " " << start << ":" << end; - if (coord[3]) + if (coord[3]) { outhandle << " + " << std::endl; - else + } + else { outhandle << " - " << std::endl; - assert(coord[1] < coord[2]); - assert(start <= coord[1]); - assert(start <= readfile.read.length()); - assert(coord[2] <= readfile.read.length()); - assert(end >= coord[2]); - assert(start < end); + } outhandle << readfile.read.substr(start, end - start) << std::endl; } outhandle.close(); diff --git a/src/pangenome/pannode.cpp b/src/pangenome/pannode.cpp index 0f7ce3aa..163f458a 100644 --- a/src/pangenome/pannode.cpp +++ b/src/pangenome/pannode.cpp @@ -1,18 +1,13 @@ #include -#include -#include #include #include #include "pangenome/pannode.h" #include "pangenome/pansample.h" #include "pangenome/panread.h" #include "minihit.h" -#include "utils.h" #include "localPRG.h" #include "OptionsAggregator.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace pangenome; // constructors @@ -90,8 +85,12 @@ void pangenome::Node::add_path( const std::vector& kmp, const uint32_t& sample_id) { for (uint32_t i = 0; i != kmp.size(); ++i) { - assert(kmp[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size() - and kmer_prg_with_coverage.kmer_prg->nodes[kmp[i]->id] != nullptr); + bool kmer_node_is_valid = + (kmp[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size()) and + (kmer_prg_with_coverage.kmer_prg->nodes[kmp[i]->id] != nullptr); + if (!kmer_node_is_valid) { + fatal_error("When adding a path to a Pangraph Node, a kmer node is not valid"); + } kmer_prg_with_coverage.increment_forward_covg(kmp[i]->id, sample_id); kmer_prg_with_coverage.increment_reverse_covg(kmp[i]->id, sample_id); } @@ -119,11 +118,13 @@ void pangenome::Node::get_read_overlap_coordinates( hit_ptr->get_read_start_position() + hit_ptr->get_prg_path().length()); } - assert(end > start - or assert_msg("Error finding the read overlap coordinates for node " - << name << " and read " << read_ptr->id << " (the " << read_count - << "th on this node)" << std::endl - << "Found end " << end << " after found start " << start)); + bool read_coordinates_are_valid = end > start; + if (!read_coordinates_are_valid) { + fatal_error("Error finding the read overlap coordinates for node ", + name, " and read ", read_ptr->id, " (the ", read_count, + "th on this node). Found end ", end, " after found start ", start); + } + coordinate = { read_ptr->id, start, end, (*hit_ptr_iter)->is_forward() }; read_overlap_coordinates.push_back(coordinate); } @@ -225,7 +226,12 @@ std::set pangenome::Node::get_read_overlap_coordinates( + read_hit->get_prg_path().length()); } - assert(end > start); + bool read_coordinates_are_valid = end > start; + if (!read_coordinates_are_valid) { + fatal_error("Error finding the read overlap coordinates for node ", + name, " and read ", current_read->id, ". Found end ", end, + " after found start ", start); + } read_overlap_coordinates.emplace( current_read->id, start, end, (*read_hits_iter)->is_forward()); diff --git a/src/pangenome/panread.cpp b/src/pangenome/panread.cpp index 6a87e50e..ad90b147 100644 --- a/src/pangenome/panread.cpp +++ b/src/pangenome/panread.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -9,8 +8,6 @@ #include "pangenome/pannode.h" #include "minihits.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace pangenome; Read::Read(const uint32_t i) @@ -54,7 +51,10 @@ void Read::add_hits( hits.erase(last, hits.end()); hits.shrink_to_fit(); - assert(hits.size() == before_size + cluster.size()); + bool hits_were_correctly_inserted = hits.size() == before_size + cluster.size(); + if (!hits_were_correctly_inserted) { + fatal_error("Error when adding hits to Pangraph read"); + } // add the orientation/node accordingly bool orientation = !cluster.empty() and (*cluster.begin())->is_forward(); @@ -108,8 +108,14 @@ std::pair Read::find_position( const std::vector& node_ids, const std::vector& node_orients, const uint16_t min_overlap) { - assert(node_ids.size() == node_orients.size()); - assert(not node_ids.empty()); + bool nodes_ids_and_orientations_are_valid = + (not node_ids.empty()) and + (node_ids.size() == node_orients.size()); + if (!nodes_ids_and_orientations_are_valid) { + fatal_error("When finding positions of nodes in a Pangraph read, the node " + "ids and orientations are not valid"); + } + uint32_t search_pos = 0; uint32_t found_pos = 0; diff --git a/src/pangenome/pansample.cpp b/src/pangenome/pansample.cpp index 5977950a..c335bcd1 100644 --- a/src/pangenome/pansample.cpp +++ b/src/pangenome/pansample.cpp @@ -1,14 +1,11 @@ #include #include #include -#include #include #include #include "pangenome/pansample.h" #include "pangenome/pannode.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - using namespace pangenome; Sample::Sample(const std::string& s, const uint32_t& id) diff --git a/src/prg/path.cpp b/src/prg/path.cpp index 39f82bb1..6998ed06 100644 --- a/src/prg/path.cpp +++ b/src/prg/path.cpp @@ -1,9 +1,6 @@ -#include #include #include "prg/path.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - uint32_t prg::Path::get_start() const { if (path.size() < 1) @@ -29,21 +26,25 @@ uint32_t prg::Path::length() const void prg::Path::add_end_interval(const Interval& i) { memoizedDirty = true; - assert(i.start >= get_end() - || assert_msg("tried to add interval starting at " - << i.start << " to end of path finishing at " << get_end())); + + bool interval_is_valid = i.start >= get_end(); + if (!interval_is_valid) { + fatal_error("Error when adding a new interval to a path"); + } + path.push_back(i); } std::vector prg::Path::nodes_along_path(const LocalPRG& localPrg) { // sanity check - assert( - (isMemoized == false - || (isMemoized == true && localPRGIdOfMemoizedLocalNodePath == localPrg.id)) - || assert_msg("Memoization bug: memoized a local node path for PRG with id" - << localPRGIdOfMemoizedLocalNodePath << " but PRG id " << localPrg.id - << " is also trying to use this memoized path")); + bool memoization_is_valid = (isMemoized == false) || + (isMemoized == true && localPRGIdOfMemoizedLocalNodePath == localPrg.id); + if (!memoization_is_valid) { + fatal_error("Error when getting nodes along PRG path: memoized a local node path " + "for PRG with id", localPRGIdOfMemoizedLocalNodePath, " but PRG id ", + localPrg.id, " is also trying to use this memoized path"); + } if (isMemoized == false || memoizedDirty == true) { // checks if we must do memoization @@ -70,7 +71,11 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const { // function now returns the path starting at position start along the path, rather // than at position start on linear PRG, and for length len - assert(start + len <= length()); + bool parameters_are_valid = start + len <= length(); + if (!parameters_are_valid) { + fatal_error("Error when getting subpath from PRG path: given parameters are not valid"); + } + prg::Path p; std::deque d; uint32_t covered_length = 0; @@ -79,7 +84,12 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const if ((covered_length <= start and covered_length + interval.length > start and p.path.empty()) or (covered_length == start and interval.length == 0 and p.path.empty())) { - assert(added_len == 0); + bool no_interval_has_been_added_yet = added_len == 0; + if (!no_interval_has_been_added_yet) { + fatal_error("Error when getting subpath from PRG path: an interval " + "has already been added before the correct first one"); + } + d = { Interval(interval.start + start - covered_length, std::min(interval.get_end(), interval.start + start - covered_length + len - added_len)) }; @@ -96,7 +106,13 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const break; } } - assert(added_len == len); + + bool subpath_length_is_correct = added_len == len; + if (!subpath_length_is_correct) { + fatal_error("Error when getting subpath from PRG path: built the subpath with " + "the wrong length"); + } + return p; } @@ -269,12 +285,16 @@ std::istream& prg::operator>>(std::istream& in, prg::Path& p) prg::Path prg::get_union(const prg::Path& x, const prg::Path& y) { + bool parameters_are_valid = x < y; + if (!parameters_are_valid) { + fatal_error("Error when getting the union of two paths: first path: ", x, + " must come before second path: ", y); + } + auto xit = x.path.begin(); auto yit = y.path.begin(); prg::Path p; - assert(x < y); - if (x.get_end() < y.get_start() or x.is_branching(y)) { return p; } else if (x.path.empty()) { diff --git a/src/seq.cpp b/src/seq.cpp index 84abbc32..b4b11b14 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include diff --git a/src/utils.cpp b/src/utils.cpp index 93e2133b..18d59718 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/src/vcfrecord.cpp b/src/vcfrecord.cpp index b8cf15d3..a3867e0b 100644 --- a/src/vcfrecord.cpp +++ b/src/vcfrecord.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/test/pangraph_test.cpp b/test/pangraph_test.cpp index d7831bb9..6bcd9ced 100644 --- a/test/pangraph_test.cpp +++ b/test/pangraph_test.cpp @@ -405,9 +405,10 @@ TEST(PangenomeGraph_add_hits_between_PRG_and_read, AddTheSameClusterTwice) true); // is the node_orientation was inserted in the read? // add the cluster again - EXPECT_DEATH(pg.add_hits_between_PRG_and_read( + ASSERT_EXCEPTION(pg.add_hits_between_PRG_and_read( prg_pointer_1, read_id_1, *cluster_pointer_1), - ""); + FatalRuntimeError, + "Error when adding hits to Pangraph read"); /* EXPECT_EQ(pg.nodes.size(), 1); //should not change diff --git a/test/panread_test.cpp b/test/panread_test.cpp index cda38565..bfd87dd0 100644 --- a/test/panread_test.cpp +++ b/test/panread_test.cpp @@ -73,7 +73,8 @@ TEST(ReadAddHits, AddClusterSecondTime_DeathAndReadHitsNotChanged) auto local_prg_ptr { std::make_shared(prg_id, "four", "") }; PanNodePtr pan_node = make_shared(local_prg_ptr); read.add_hits(pan_node, cluster); - EXPECT_DEATH(read.add_hits(pan_node, cluster), ""); + ASSERT_EXCEPTION(read.add_hits(pan_node, cluster), FatalRuntimeError, + "Error when adding hits to Pangraph read"); EXPECT_EQ((uint)1, read.get_hits_as_unordered_map()[prg_id].size()); } diff --git a/test/path_test.cpp b/test/path_test.cpp index 9a6b49c3..fbbbe334 100644 --- a/test/path_test.cpp +++ b/test/path_test.cpp @@ -4,6 +4,7 @@ #include "prg/path.h" #include #include +#include "test_helpers.h" typedef prg::Path Path; using namespace std; @@ -47,7 +48,8 @@ TEST(PathTest, add_end_interval) p.add_end_interval(Interval(6, 9)); d.push_back(Interval(6, 9)); EXPECT_ITERABLE_EQ(vector, d, p.getPath()); - EXPECT_DEATH(p.add_end_interval(Interval(0, 1)), ""); + ASSERT_EXCEPTION(p.add_end_interval(Interval(0, 1)), FatalRuntimeError, + "Error when adding a new interval to a path"); } TEST(PathTest, subpath) @@ -350,5 +352,6 @@ TEST(PathTest, get_union) // wrong way round d2 = { Interval(0, 0) }; p2.initialize(d2); - EXPECT_DEATH(get_union(p1, p2), ""); + ASSERT_EXCEPTION(get_union(p1, p2), FatalRuntimeError, + "Error when getting the union of two paths"); } From 173a40aec76568a980189af249054e106be72520 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Fri, 5 Feb 2021 15:14:11 -0400 Subject: [PATCH 13/37] WIP: refactoring asserts --- src/localPRG.cpp | 150 +++++++++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 64 deletions(-) diff --git a/src/localPRG.cpp b/src/localPRG.cpp index bb7ee093..9fcb51b0 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -13,8 +13,6 @@ #include "fastaq.h" #include "Maths.h" -#define assert_msg(x) !(std::cerr << "Assertion failed: " << x << std::endl) - bool LocalPRG::do_path_memoization_in_nodes_along_path_method = false; LocalPRG::LocalPRG(uint32_t id, const std::string& name, const std::string& seq) @@ -1252,24 +1250,53 @@ void LocalPRG:: std::string alt; bool found_new_site = false; - while (!refpath.back()->outNodes.empty() or refpath.size() > 1) { - if (refpath.back()->id < samplepath.back()->id) { - bool ref_index_is_valid = rpath.size() > ref_i; - if (!ref_index_is_valid) { - fatal_error("Error when genotyping using max likelihood path: ref index " - "is not valid"); + // functions that help with some checks - lambdas for easyness + const auto check_if_ref_index_is_valid = [&]() { + bool ref_index_is_valid = rpath.size() > ref_i; + if (!ref_index_is_valid) { + fatal_error("Error when genotyping using max likelihood path: ref index " + "is not valid"); + } + }; + const auto check_if_sample_id_is_valid = [&]() { + bool sample_id_is_valid = sample_path.size() > sample_id; + if (!sample_id_is_valid) { + fatal_error("Error when genotyping using max likelihood path: sample " + "is not valid"); + } + }; + + // function that helps preparing for next iteration in the following while - lambdas for easyness + const auto prepare_next_iteration = [&](uint32_t &pos) { + refpath.erase(refpath.begin(), refpath.end() - 1); + if (refpath.back()->id != prg.nodes.size() - 1) { + const bool reference_path_is_empty + = refpath.empty(); // NB: the previous similar check refers to rpath, not refpath + if (reference_path_is_empty) { + fatal_error("Error when genotyping using max likelihood path: reference path is empty"); } + check_if_ref_index_is_valid(); + check_if_sample_id_is_valid(); + + ref = ""; + alt = ""; + pos += refpath.back()->pos.length; + refpath.push_back(rpath[ref_i]); + ref_i++; + samplepath.erase(samplepath.begin(), samplepath.end() - 1); + samplepath.push_back(sample_path[sample_id]); + sample_id++; + } + }; + while (!refpath.back()->outNodes.empty() or refpath.size() > 1) { + if (refpath.back()->id < samplepath.back()->id) { + check_if_ref_index_is_valid(); refpath.push_back(rpath[ref_i]); found_new_site = true; ref_i++; } else if (samplepath.back()->id < refpath.back()->id) { - bool sample_id_is_valid = sample_path.size() > sample_id; - if (!sample_id_is_valid) { - fatal_error("Error when genotyping using max likelihood path: sample " - "is not valid"); - } - + check_if_sample_id_is_valid(); samplepath.push_back(sample_path[sample_id]); found_new_site = true; sample_id++; @@ -1295,36 +1322,11 @@ void LocalPRG:: for (uint32_t j = 1; j < refpath.size() - 1; ++j) { pos += refpath[j]->pos.length; } - refpath.erase(refpath.begin(), refpath.end() - 1); - if (refpath.back()->id != prg.nodes.size() - 1) { - ref = ""; - alt = ""; - assert(not refpath.empty()); - pos += refpath.back()->pos.length; - assert(rpath.size() > ref_i); - refpath.push_back(rpath[ref_i]); - ref_i++; - samplepath.erase(samplepath.begin(), samplepath.end() - 1); - assert(sample_path.size() > sample_id); - samplepath.push_back(sample_path[sample_id]); - sample_id++; - } + + prepare_next_iteration(pos); pos_to = pos; } else { - refpath.erase(refpath.begin(), refpath.end() - 1); - if (refpath.back()->id != prg.nodes.size() - 1) { - ref = ""; - alt = ""; - assert(not refpath.empty()); - pos_to += refpath.back()->pos.length; - assert(rpath.size() > ref_i); - refpath.push_back(rpath[ref_i]); - ref_i++; - samplepath.erase(samplepath.begin(), samplepath.end() - 1); - assert(sample_path.size() > sample_id); - samplepath.push_back(sample_path[sample_id]); - sample_id++; - } + prepare_next_iteration(pos_to); } } vcf.set_sample_gt_to_ref_allele_for_records_in_the_interval( @@ -1365,13 +1367,25 @@ std::vector LocalPRG::find_alt_path( ref_added += ref_path[pos_along_ref_path]->pos.length; pos_along_ref_path++; } - assert(pos_along_ref_path < ref_path.size()); + + // TODO: change this bool variable name to a more meaningful one + bool pos_along_ref_path_less_than_ref_path_size = pos_along_ref_path < ref_path.size(); + if (!pos_along_ref_path_less_than_ref_path_size) { + fatal_error("Error finding alternative path: pos along ref path is not less " + "than ref path size"); + } auto ref_node_to_find = ref_path[pos_along_ref_path]; // find an alt path with the required sequence if (alt_path.empty() and not ref_path.empty() and ref_path[0]->pos.length == 0) alt_path.push_back(ref_path[0]); - assert(!alt_path.empty()); + + bool we_have_found_alt_paths = !alt_path.empty(); + if (!we_have_found_alt_paths) { + fatal_error("Error finding alternative path: no alternative paths were found " + "but we should have found at least one"); + } + for (const auto& m : alt_path.back()->outNodes) { paths_in_progress.push_back({ m }); } @@ -1406,8 +1420,9 @@ std::vector LocalPRG::find_alt_path( } } } - assert(true or assert_msg("Should have found an alt path!!")); - return alt_path; // this never happens + + fatal_error("Error finding alternative path: no alternative paths were found " + "but we should have found at least one"); } uint32_t LocalPRG::get_number_of_bases_in_local_path_before_a_given_position( @@ -1470,16 +1485,11 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( const std::vector& local_path, const uint32_t& range_pos_start, const uint32_t& range_pos_end, const uint32_t& sample_id) const { - assert(kmer_path.size() - > 1); // this is an assert because it is the programmers responsibility to - // ensure that the kmer_path given to this function has at least size 1 - // TODO: this assert could be removed if we represent std::vector as a - // concept (class) in such a way that this class could only be constructed if given - // a large enough kmer_path (or whatever condition to build a correct kmer_path) - // TODO: the existence of this class would transfer the responsibility of having a - // correct kmer_path to its constructor, instead of here - // TODO: kmer_path is used in lots of places and there are some hard-coded logic - // about it, it is worth upgrading it to a class, this will be done later + bool kmer_path_is_valid = kmer_path.size() > 1; + if (!kmer_path_is_valid) { + fatal_error("Error when geting forward and reverse kmer coverages: kmer path " + "is not valid"); + } uint32_t starting_position_of_first_non_trivial_kmer_in_kmer_path = kmer_path[1] @@ -1519,10 +1529,14 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( and number_of_bases_in_local_path_which_were_already_considered < range_pos_end; if (is_inside_the_given_range) { - assert( - current_kmer_node->id < kmer_graph_with_coverage.kmer_prg->nodes.size() - and kmer_graph_with_coverage.kmer_prg->nodes[current_kmer_node->id] - != nullptr); + bool kmer_node_is_valid = + (current_kmer_node->id < kmer_graph_with_coverage.kmer_prg->nodes.size()) + and (kmer_graph_with_coverage.kmer_prg->nodes[current_kmer_node->id] != nullptr); + if (!kmer_node_is_valid) { + fatal_error("Error when geting forward and reverse kmer coverages: found " + "an invalid kmer node"); + } + forward_coverages.push_back( kmer_graph_with_coverage.get_forward_covg(current_kmer_node->id, sample_id)); reverse_coverages.push_back( @@ -1545,9 +1559,12 @@ void LocalPRG::add_sample_covgs_to_vcf(VCF& vcf, const KmerGraphWithCoverage& kg const std::vector& ref_path, const std::string& sample_name, const uint32_t& sample_id) const { - BOOST_LOG_TRIVIAL(debug) << "Update VCF with sample covgs"; + bool prg_is_empty = prg.nodes.empty(); + if (prg_is_empty) { + fatal_error("Error when adding sample coverages to VCF: PRG is empty"); + } - assert(!prg.nodes.empty()); // otherwise empty nodes -> segfault + BOOST_LOG_TRIVIAL(debug) << "Update VCF with sample covgs"; vcf.sort_records(); std::vector alt_path; @@ -1596,9 +1613,14 @@ void LocalPRG::add_sample_covgs_to_vcf(VCF& vcf, const KmerGraphWithCoverage& kg // if sample has alt path, we have the kmer path for this, but otherwise we will // need to work it out auto sample_it = find(vcf.samples.begin(), vcf.samples.end(), sample_name); - assert(sample_it != vcf.samples.end()); auto sample_index = distance(vcf.samples.begin(), sample_it); - assert((uint)sample_index != vcf.samples.size()); + + bool sample_is_valid = (sample_it != vcf.samples.end()) && + ((uint)sample_index != vcf.samples.size()); + if (!sample_is_valid) { + fatal_error("Error when adding sample coverages to VCF: sample is not valid"); + } + record.sampleIndex_to_sampleInfo[sample_index].set_coverage_information( all_forward_coverages, all_reverse_coverages); } From 3369882c6e955af646a5ba1fd46cd575a6d30f3b Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 9 Feb 2021 01:28:27 -0400 Subject: [PATCH 14/37] Improving build process --- CMakeLists.txt | 11 +++++++---- cmake/Hunter/config.cmake | 12 ++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e750160a..bb517e51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ set(PROJECT_NAME_STR pandora) project(${PROJECT_NAME_STR} VERSION "0.7.0" LANGUAGES C CXX) configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) - +# add or not feature to print the stack trace if(PRINT_STACKTRACE) message(STATUS "Printing meaningful stacktrace enabled, please have binutils-dev installed") add_compile_definitions(BACKWARD_HAS_BFD=1) @@ -23,8 +23,6 @@ else() set(BACKWARD_LIBRARIES "") endif() -# add a RELEASE_WITH_ASSERTS build type - TODO: FIX THIS -set(CMAKE_CXX_FLAGS_RELEASE_WITH_ASSERTS "-O3") # C++11 required include(CheckCXXCompilerFlag) @@ -41,6 +39,12 @@ endif () # default flags set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_SYSTEM_NO_DEPRECATED -Wall -Wextra") +if(PRINT_STACKTRACE) + # -g is to add debug symbols, to make backtraces meaningful. + # it does not impact performance (see https://stackoverflow.com/a/39223245) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") +endif () + # compile with openmp only on Linux if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(MACOSX TRUE) @@ -122,7 +126,6 @@ file(GLOB_RECURSE SRC_FILES ${PROJECT_SOURCE_DIR}/include/*.h ${PROJECT_SOURCE_DIR}/include/*/*.hpp ${PROJECT_SOURCE_DIR}/include/*/*.h - ${PROJECT_SOURCE_DIR}/thirdparty/src/*.cpp ${PROJECT_SOURCE_DIR}/thirdparty/include/*.hpp ${PROJECT_SOURCE_DIR}/thirdparty/include/*.h diff --git a/cmake/Hunter/config.cmake b/cmake/Hunter/config.cmake index f661c65a..45f04b83 100644 --- a/cmake/Hunter/config.cmake +++ b/cmake/Hunter/config.cmake @@ -1,9 +1,9 @@ hunter_config( - Boost - VERSION - "1.62.0" - CONFIGURATION_TYPES - Debug + Boost + VERSION + "1.62.0" + CONFIGURATION_TYPES + Release ) hunter_config( @@ -11,5 +11,5 @@ hunter_config( VERSION "1.10.0" CONFIGURATION_TYPES - Debug + Release ) From 760b4d8fd9ad75c1ceee8535fafab7de5eacc09e Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 9 Feb 2021 19:07:13 -0400 Subject: [PATCH 15/37] Changing build type to Release --- .travis.yml | 3 +-- Dockerfile | 2 +- scripts/portable_binary_builder/build_portable_binary_core.sh | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 72b12ab8..c0faeaee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,6 @@ include: apt: packages: - clang-format-8 - - zlib1g-dev jobs: fast_finish: true @@ -30,7 +29,7 @@ jobs: - if [ $? -ne 1 ]; then echo "Not all source and header files are formatted with clang-format"; exit 1; fi - stage: "Build and Test" env: - - BUILD_TYPE="Debug" + - BUILD_TYPE="Release" script: bash ci/script.sh stages: diff --git a/Dockerfile b/Dockerfile index e65b4ae6..10e24738 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN apt update \ #============================================ # can override the build type with docker's --build-arg command # https://docs.docker.com/engine/reference/builder/#arg -ARG PANDORA_BUILD_TYPE="RELEASE_WITH_ASSERTS" +ARG PANDORA_BUILD_TYPE="Release" ENV PANDORA_DIR "/pandora/" COPY . $PANDORA_DIR diff --git a/scripts/portable_binary_builder/build_portable_binary_core.sh b/scripts/portable_binary_builder/build_portable_binary_core.sh index 26eca935..fa1c288d 100644 --- a/scripts/portable_binary_builder/build_portable_binary_core.sh +++ b/scripts/portable_binary_builder/build_portable_binary_core.sh @@ -14,7 +14,7 @@ yum install wget git binutils-devel -y cd io mkdir build_portable_executable cd build_portable_executable -cmake -DPRINT_STACKTRACE=True .. +cmake -DPRINT_STACKTRACE=True -DCMAKE_BUILD_TYPE=Release .. make VERBOSE=1 -j 4 ctest -VV From 5d5c8d71c512432b8b50dec0ad3a48d011b8a1fd Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 9 Feb 2021 23:16:52 -0400 Subject: [PATCH 16/37] Removing the remaining asserts --- src/sampleinfo.cpp | 6 +++--- src/vcf.cpp | 6 +++--- src/vcfrecord.cpp | 6 +++--- test/sampleinfo_test.cpp | 6 ++++-- test/vcfrecord_test.cpp | 6 ++++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/sampleinfo.cpp b/src/sampleinfo.cpp index 8047d6bf..48a7215e 100644 --- a/src/sampleinfo.cpp +++ b/src/sampleinfo.cpp @@ -274,9 +274,9 @@ std::string SampleInfo::to_string(bool genotyping_from_maximum_likelihood, bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_compatible_coverage)) == 1; - // this will still remain an assert as it is responsibility of the dev to ensure - // this method is not called with the two flags set - assert(only_one_flag_is_set); + if (!only_one_flag_is_set) { + fatal_error("Error on stringifying VCF record sample info: incompatible genotyping options"); + } std::vector likelihoods_for_all_alleles = get_likelihoods_for_all_alleles(); diff --git a/src/vcf.cpp b/src/vcf.cpp index 4be52aa5..b778889f 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -520,9 +520,9 @@ std::string VCF::to_string(bool genotyping_from_maximum_likelihood, bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; - // this will still remain an assert as it is responsibility of the dev to ensure - // this method is not called with the two flags set - assert(only_one_flag_is_set); + if (!only_one_flag_is_set) { + fatal_error("Error on stringifying VCF record: incompatible genotyping options"); + } std::stringstream out; out << header(); diff --git a/src/vcfrecord.cpp b/src/vcfrecord.cpp index a3867e0b..a93b97d8 100644 --- a/src/vcfrecord.cpp +++ b/src/vcfrecord.cpp @@ -70,9 +70,9 @@ std::string VCFRecord::get_format( bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; - // this will still remain an assert as it is responsibility of the dev to ensure - // this method is not called with the two flags set - assert(only_one_flag_is_set); + if (!only_one_flag_is_set) { + fatal_error("Error on getting format field from VCF record: incompatible genotyping options"); + } static std::vector format_for_genotyping_from_maximum_likelihood = { "GT", "MEAN_FWD_COVG", "MEAN_REV_COVG", "MED_FWD_COVG", "MED_REV_COVG", diff --git a/test/sampleinfo_test.cpp b/test/sampleinfo_test.cpp index f546df05..06f356fe 100644 --- a/test/sampleinfo_test.cpp +++ b/test/sampleinfo_test.cpp @@ -1126,12 +1126,14 @@ TEST_F(SampleInfoTest___get_genotype_from_coverage___Fixture, TEST_F(SampleInfoTest___Fixture, to_string___no_flags_set___expects_death) { - EXPECT_DEATH(default_sample_info.to_string(false, false), ""); + ASSERT_EXCEPTION(default_sample_info.to_string(false, false), FatalRuntimeError, + "Error on stringifying VCF record sample info: incompatible genotyping options"); } TEST_F(SampleInfoTest___Fixture, to_string___both_flags_set___expects_death) { - EXPECT_DEATH(default_sample_info.to_string(true, true), ""); + ASSERT_EXCEPTION(default_sample_info.to_string(true, true), FatalRuntimeError, + "Error on stringifying VCF record sample info: incompatible genotyping options"); } TEST_F(SampleInfoTest___Fixture, to_string___genotyping_from_maximum_likelihood) diff --git a/test/vcfrecord_test.cpp b/test/vcfrecord_test.cpp index 31707bcf..4b893036 100644 --- a/test/vcfrecord_test.cpp +++ b/test/vcfrecord_test.cpp @@ -319,14 +319,16 @@ TEST(VCFRecordTest, get_format___no_flags_set___expects_death) { VCF vcf = create_VCF_with_default_parameters(); VCFRecord vcf_record(&vcf); - EXPECT_DEATH(vcf_record.get_format(false, false), ""); + ASSERT_EXCEPTION(vcf_record.get_format(false, false), FatalRuntimeError, + "Error on getting format field from VCF record: incompatible genotyping options"); } TEST(VCFRecordTest, get_format___both_flags_set___expects_death) { VCF vcf = create_VCF_with_default_parameters(); VCFRecord vcf_record(&vcf); - EXPECT_DEATH(vcf_record.get_format(true, true), ""); + ASSERT_EXCEPTION(vcf_record.get_format(true, true), FatalRuntimeError, + "Error on getting format field from VCF record: incompatible genotyping options"); } TEST(VCFRecordTest, get_format___genotyping_from_maximum_likelihood) From 73dea549f5e89b260ca9991554ee8259bfb2a3ed Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Sun, 14 Feb 2021 15:08:48 -0400 Subject: [PATCH 17/37] Updating Changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30b1a401..3c62eb71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ project adheres to ## [Unreleased] +Some improvements to the build process: +* We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager to remove the requirement of having ZLIB and +Boost system-wide installations; +* We now provide a portable precompiled binary as another option to run `pandora` easily; +* We refactored asserts into exceptions, and now `pandora` can be compiled correctly in the `Release` mode, and the build +process will thus be able to create a more optimized binary; +* `pandora` can now provide a meaningful stack trace in case of errors, to facilitate debugging + (need to pass flag `-DPRINT_STACKTRACE` to `CMake`); + ## [v0.7.0] There is a significant amount of changes to the project between version From a6511c2099078fb35439f21ff6f04d4f33113ac9 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Sat, 27 Feb 2021 03:48:50 -0400 Subject: [PATCH 18/37] Updating precompiled binary link --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d4ddb82f..7fcc99bf 100644 --- a/README.md +++ b/README.md @@ -83,13 +83,13 @@ can be found [here](https://gcc.gnu.org/onlinedocs/gfortran/OpenMP.html). * **Download**: ``` - wget "https://www.dropbox.com/s/74ptrnk4k5qcc6o/pandora-linux-precompiled_v0.8.0_beta?dl=1" -O pandora-linux-precompiled_v0.8.0_beta + wget "https://www.dropbox.com/s/ltq2gti9t6wav1j/pandora-linux-precompiled_v0.8.1_beta?dl=1" -O pandora-linux-precompiled_v0.8.1_beta ``` * **TODO: updated to a github link when we make the release;** * **Running**: ``` -chmod +x pandora-linux-precompiled_v0.8.0_beta -./pandora-linux-precompiled_v0.8.0_beta -h +chmod +x pandora-linux-precompiled_v0.8.1_beta +./pandora-linux-precompiled_v0.8.1_beta -h ``` * **Compatibility**: This precompiled binary works on pretty much any glibc-2.12-or-later-based x86 and x86-64 Linux distribution From ecc18de51a7f40288bc68c67d003686b7ead3ec5 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 2 Mar 2021 11:06:19 -0300 Subject: [PATCH 19/37] Updating Changelog --- CHANGELOG.md | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c62eb71..ad9c9033 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,14 +9,23 @@ project adheres to ## [Unreleased] -Some improvements to the build process: -* We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager to remove the requirement of having ZLIB and -Boost system-wide installations; -* We now provide a portable precompiled binary as another option to run `pandora` easily; -* We refactored asserts into exceptions, and now `pandora` can be compiled correctly in the `Release` mode, and the build -process will thus be able to create a more optimized binary; -* `pandora` can now provide a meaningful stack trace in case of errors, to facilitate debugging +Improvements to the build process and performance. + +### Added +- We now provide a script to build a portable precompiled binary as another option to run `pandora` easily. + The portable binary is now provided with the release; +- `pandora` can now provide a meaningful stack trace in case of errors, to facilitate debugging (need to pass flag `-DPRINT_STACKTRACE` to `CMake`); + +### Changed +- We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager, removing the requirement of having ZLIB and + Boost system-wide installations; + +### Fixed +- We refactored asserts into exceptions and errors, and now `pandora` can be compiled correctly in the `Release` mode. + The build process is thus be able to create a more optimized binary, resulting in improved performance; +- Refactored thirdparty libraries into a directory of their own; + ## [v0.7.0] From f7146d660e3a34b06f7826a7beec291a5c312b1c Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 2 Mar 2021 11:15:09 -0300 Subject: [PATCH 20/37] Updating README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7fcc99bf..f021dcb1 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ chmod +x pandora-linux-precompiled_v0.8.1_beta ![Docker Cloud Build Status](https://img.shields.io/docker/cloud/build/rmcolq/pandora) -We highly recommend that you download a containerized image of Pandora. +You can also download a containerized image of Pandora. Pandora is hosted on Dockerhub and images can be downloaded with the command: @@ -128,6 +128,8 @@ NB For consistency, we no longer maintain images on singularity hub. ### Installation from source +This is the hardest way to install `pandora`, but that yields the most optimised binary. + Requirements: - A Unix or Mac OS, with a C++11 compiler toolset (e.g. `g++`, `ld`, `make`, `ctest`, etc), `cmake`, `git` and `wget`. @@ -138,15 +140,13 @@ git clone --single-branch https://github.com/rmcolq/pandora.git --recursive cd pandora mkdir -p build cd build -cmake .. +cmake -DCMAKE_BUILD_TYPE=Release .. make -j4 ctest -VV ``` * If you want to produce meaningful stack traces in case `pandora` errors out, `binutils-dev` must be installed and the - `cmake` command above must be changed to: - -`cmake -DPRINT_STACKTRACE=True ..` + `cmake` must receive this additional parameter: `-DPRINT_STACKTRACE=True`. ## Usage From 3c29aacb9c2169ce95703aaac3008cc210758da1 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 2 Mar 2021 15:16:35 -0300 Subject: [PATCH 21/37] Refactoring exit()s into fatal_error()s --- src/compare_main.cpp | 4 +--- src/get_vcf_ref_main.cpp | 3 +-- src/index.cpp | 4 +--- src/kmergraph.cpp | 3 +-- src/kmergraphwithcoverage.cpp | 11 ++++------- src/localPRG.cpp | 24 +++++++++++------------- src/localgraph.cpp | 3 +-- src/seq2path_main.cpp | 10 +++------- src/utils.cpp | 3 +-- src/walk_main.cpp | 3 +-- 10 files changed, 25 insertions(+), 43 deletions(-) diff --git a/src/compare_main.cpp b/src/compare_main.cpp index 4f82945c..1267085d 100644 --- a/src/compare_main.cpp +++ b/src/compare_main.cpp @@ -198,9 +198,7 @@ std::vector> load_read_index( } } } else { - BOOST_LOG_TRIVIAL(error) - << "Unable to open read index file " << read_index_fpath; - exit(1); + fatal_error("Unable to open read index file ", read_index_fpath); } BOOST_LOG_TRIVIAL(info) << "Finished loading " << samples.size() << " samples from read index"; diff --git a/src/get_vcf_ref_main.cpp b/src/get_vcf_ref_main.cpp index 6daf4359..973c1a86 100644 --- a/src/get_vcf_ref_main.cpp +++ b/src/get_vcf_ref_main.cpp @@ -69,8 +69,7 @@ int pandora_get_vcf_ref(GetVcfRefOptions const& opt) if (!found) { if (npath.empty()) { - BOOST_LOG_TRIVIAL(error) << "PRG is empty"; - exit(1); + fatal_error("PRG is empty"); } BOOST_LOG_TRIVIAL(debug) << "Using top path as ref for " << prg_ptr->name; diff --git a/src/index.cpp b/src/index.cpp index 320b8e95..98cbde7a 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -124,9 +124,7 @@ void Index::load(const fs::path& indexfile) } } } else { - BOOST_LOG_TRIVIAL(warning) << "Unable to open index file " << indexfile - << ". Does it exist? Have you run pandora index?"; - exit(1); + fatal_error("Unable to open index file ", indexfile, ". Does it exist? Have you run pandora index?"); } if (minhash.size() <= 1) { diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index cf2fcc33..af427eb4 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -276,8 +276,7 @@ void KmerGraph::save(const fs::path& filepath, const std::shared_ptr l } handle.close(); } else { - BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath; - std::exit(EXIT_FAILURE); + fatal_error("Unable to open kmergraph file ", filepath); } } diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp index 2cb1a02b..241c8eb1 100644 --- a/src/kmergraphwithcoverage.cpp +++ b/src/kmergraphwithcoverage.cpp @@ -300,9 +300,8 @@ float KmerGraphWithCoverage::find_max_path(std::vector& maxpath, prev_node = prev_node_along_maxpath[prev_node]; if (maxpath.size() > 1000000) { - BOOST_LOG_TRIVIAL(warning) << "I think I've found an infinite loop - is " - "something wrong with this kmergraph?"; - exit(1); + fatal_error("I think I've found an infinite loop - is " + "something wrong with this kmergraph?"); } } @@ -417,8 +416,7 @@ void KmerGraphWithCoverage::save( } handle.close(); } else { - BOOST_LOG_TRIVIAL(error) << "Unable to open kmergraph file " << filepath; - std::exit(EXIT_FAILURE); + fatal_error("Unable to open kmergraph file ", filepath); } } @@ -571,7 +569,6 @@ void KmerGraphWithCoverage::load(const std::string& filepath) } } } else { - fatal_error("Error reading GFA: unable to open kmergraph file", filepath); - exit(1); + fatal_error("Error reading GFA: unable to open kmergraph file: ", filepath); } } \ No newline at end of file diff --git a/src/localPRG.cpp b/src/localPRG.cpp index 9fcb51b0..94544100 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -311,16 +311,15 @@ LocalPRG::build_graph( // first into the invariant region coming before it, all // its alleles and then the rest of the PRG. if (v.size() < (uint32_t)4) { - BOOST_LOG_TRIVIAL(warning) - << "In conversion from linear localPRG string to graph, splitting the " + fatal_error( + "In conversion from linear localPRG string to graph, splitting the " "string by " "the next var site resulted in the wrong number of intervals. " "Please check that site numbers " "are flanked by a space on either side. Or perhaps ordering of " "numbers in GFA is irregular?! " - "Size of partition based on site " - << next_site << " is " << v.size() << "\nLocalPRG name: " << name; - std::exit(-1); + "Size of partition based on site ", + next_site, " is ", v.size(), "\nLocalPRG name: ", name); } next_site += 2; // update next site // add first interval (should be the invariable seq, and thus composed only by @@ -329,19 +328,18 @@ LocalPRG::build_graph( v[0].start, v[0].length); // gets the sequence of the invariable part if (!(isalpha_string( s))) { // verify that the invariable part is indeed invariable - BOOST_LOG_TRIVIAL(warning) - << "In conversion from linear localPRG string to graph, splitting the " + fatal_error( + "In conversion from linear localPRG string to graph, splitting the " "string by " "the next var site resulted in the first interval being non " "alphabetic. Please check that site " "numbers are flanked by a space on either side. Or perhaps ordering " "of numbers in GFA is " - "irregular?! After splitting by site " - << next_site - << " do not have alphabetic sequence before " - "var site: " - << v[0]; - std::exit(-1); + "irregular?! After splitting by site ", + next_site, + " do not have alphabetic sequence before " + "var site: ", + v[0]); } prg.add_node( next_id, s, v[0]); // adds the invariable part as a node in the graph diff --git a/src/localgraph.cpp b/src/localgraph.cpp index 6574cc68..f63b452c 100644 --- a/src/localgraph.cpp +++ b/src/localgraph.cpp @@ -123,8 +123,7 @@ void LocalGraph::read_gfa(const std::string& filepath) } } } else { - BOOST_LOG_TRIVIAL(error) << "Unable to open GFA file " << filepath; - std::exit(1); + fatal_error("Unable to open GFA file: ", filepath); } } diff --git a/src/seq2path_main.cpp b/src/seq2path_main.cpp index 80d45bd2..0d544ec8 100644 --- a/src/seq2path_main.cpp +++ b/src/seq2path_main.cpp @@ -68,8 +68,7 @@ int pandora_seq2path(Seq2PathOptions const& opt) load_PRG_kmergraphs(prgs, opt.window_size, opt.kmer_size, opt.prgfile); if (prgs.empty()) { - BOOST_LOG_TRIVIAL(error) << "PRG is empty!"; - exit(1); + fatal_error("PRG is empty!"); } if (opt.top) { @@ -124,9 +123,7 @@ int pandora_seq2path(Seq2PathOptions const& opt) rev_complement(seq_handle.read)); } } else { - BOOST_LOG_TRIVIAL(error) - << "Different numbers of PRGs and reads, exiting"; - exit(1); + fatal_error("Different numbers of PRGs and reads"); } if (opt.flag) { if (npath.empty() and seq_handle.read.size() < 300) { @@ -145,8 +142,7 @@ int pandora_seq2path(Seq2PathOptions const& opt) } seq_handle.close(); } else { - BOOST_LOG_TRIVIAL(error) << "One of --top, --bottom or --input must be given"; - exit(1); + fatal_error("One of --top, --bottom or --input must be given"); } return 0; diff --git a/src/utils.cpp b/src/utils.cpp index 18d59718..80744557 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -127,8 +127,7 @@ void read_prg_file( prgs.push_back(s); id++; } else { - BOOST_LOG_TRIVIAL(error) << "Failed to make LocalPRG for " << fh.name; - exit(1); + fatal_error("Failed to make LocalPRG for ", fh.name); } } BOOST_LOG_TRIVIAL(debug) << "Number of LocalPRGs read: " << prgs.size(); diff --git a/src/walk_main.cpp b/src/walk_main.cpp index 0785b11e..702af3cb 100644 --- a/src/walk_main.cpp +++ b/src/walk_main.cpp @@ -78,8 +78,7 @@ int pandora_walk(WalkOptions const& opt) } } } else { - std::cerr << "One of --top, --bottom or --input must be given" << std::endl; - exit(1); + fatal_error("One of --top, --bottom or --input must be given"); } return 0; } From 099bd138067820a57d5e881e03d686c241180b42 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 2 Mar 2021 16:40:13 -0300 Subject: [PATCH 22/37] Make GATB a git submodule instead of downloading it during compilation Closes #264 --- .gitmodules | 3 +++ CMakeLists.txt | 2 +- ext/gatb.cmake | 4 +--- thirdparty/gatb-core | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) create mode 160000 thirdparty/gatb-core diff --git a/.gitmodules b/.gitmodules index 0a7daea7..3c9fb9ed 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cgranges"] path = cgranges url = https://github.com/lh3/cgranges +[submodule "thirdparty/gatb-core"] + path = thirdparty/gatb-core + url = https://github.com/leoisl/gatb-core diff --git a/CMakeLists.txt b/CMakeLists.txt index bb517e51..543a264c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ set(ZLIB_LIBRARY ZLIB::ZLIB) # INSTALL GATB include(${PROJECT_SOURCE_DIR}/ext/gatb.cmake) include_directories(SYSTEM - ${gatb_source_dir}/gatb-core/src + ${gatb_source_dir}/src ${gatb_binary_dir}/include ) link_directories(${gatb_binary_dir}/lib) diff --git a/ext/gatb.cmake b/ext/gatb.cmake index 7d048a3e..cc635935 100644 --- a/ext/gatb.cmake +++ b/ext/gatb.cmake @@ -6,10 +6,8 @@ SET (GATB_CORE_EXCLUDE_TESTS 1) SET (GATB_CORE_INCLUDE_EXAMPLES 1) ExternalProject_Add(gatb - GIT_REPOSITORY https://github.com/leoisl/gatb-core - GIT_TAG "1.4.1_zlib" + SOURCE_DIR "${CMAKE_SOURCE_DIR}/thirdparty/gatb-core/gatb-core" PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gatb" - SOURCE_SUBDIR gatb-core CMAKE_ARGS -DKSIZE_LIST=32 -DZLIB_ROOT=${ZLIB_ROOT} INSTALL_COMMAND "") diff --git a/thirdparty/gatb-core b/thirdparty/gatb-core new file mode 160000 index 00000000..75cd4921 --- /dev/null +++ b/thirdparty/gatb-core @@ -0,0 +1 @@ +Subproject commit 75cd4921dc8a6f556f62baa0f70f60af73b3413e From b2cbb4fb8e9be6d8fcf9ab9f012945db018c90f8 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 2 Mar 2021 18:34:06 -0300 Subject: [PATCH 23/37] Making condition-related bool variables const --- include/Maths.h | 2 +- include/sampleinfo.h | 8 +-- include/utils.h | 1 - src/de_bruijn/graph.cpp | 10 ++-- src/denovo_discovery/local_assembly.cpp | 8 +-- src/fastaq.cpp | 6 +-- src/interval.cpp | 2 +- src/kmergraphwithcoverage.cpp | 8 +-- src/localPRG.cpp | 72 ++++++++++++------------- src/localgraph.cpp | 2 +- src/minihit.cpp | 2 +- src/minimizer.cpp | 2 +- src/noise_filtering.cpp | 16 +++--- src/pangenome/pangraph.cpp | 22 ++++---- src/pangenome/pannode.cpp | 6 +-- src/pangenome/panread.cpp | 6 +-- src/prg/path.cpp | 12 ++--- src/sampleinfo.cpp | 26 ++++----- src/seq.cpp | 4 +- src/vcf.cpp | 46 ++++++++-------- src/vcfrecord.cpp | 16 +++--- 21 files changed, 138 insertions(+), 139 deletions(-) diff --git a/include/Maths.h b/include/Maths.h index 46c0b961..79696453 100644 --- a/include/Maths.h +++ b/include/Maths.h @@ -35,7 +35,7 @@ class Maths { typename std::iterator_traits::difference_type difference_type; difference_type number_of_elements = std::distance(begin, end); - bool no_elements_in_container = number_of_elements == 0; + const bool no_elements_in_container = number_of_elements == 0; if (no_elements_in_container) { return get_default_value(); } diff --git a/include/sampleinfo.h b/include/sampleinfo.h index 32e528e7..2aa0286d 100644 --- a/include/sampleinfo.h +++ b/include/sampleinfo.h @@ -28,7 +28,7 @@ class SampleInfo { , exp_depth_covg_for_this_sample( genotyping_options->get_sample_index_to_exp_depth_covg()[sample_index]) { - bool at_least_one_allele = number_of_alleles >= 1; + const bool at_least_one_allele = number_of_alleles >= 1; if (!at_least_one_allele) { fatal_error("Error on creating VCF Sample INFOs: the VCF record has no alleles"); } @@ -387,7 +387,7 @@ template class SampleIndexToSampleInfoTemplate { out << sample_info.to_string( genotyping_from_maximum_likelihood, genotyping_from_coverage); - bool is_the_last_sample_info = sample_info_index == this->size() - 1; + const bool is_the_last_sample_info = sample_info_index == this->size() - 1; if (not is_the_last_sample_info) out << "\t"; } @@ -414,7 +414,7 @@ template class SampleIndexToSampleInfoTemplate { virtual inline void solve_incompatible_gt_conflict_with( SampleIndexToSampleInfoTemplate& other) { - bool same_number_of_samples = this->size() == other.size(); + const bool same_number_of_samples = this->size() == other.size(); if(!same_number_of_samples) { fatal_error("Error solving genotype conflicts between two records: " "number of samples is not consistent between both records"); @@ -462,7 +462,7 @@ template class SampleIndexToSampleInfoTemplate { virtual inline void merge_other_samples_infos_into_this( const SampleIndexToSampleInfoTemplate& other) { - bool same_number_of_samples = this->size() == other.size(); + const bool same_number_of_samples = this->size() == other.size(); if(!same_number_of_samples) { fatal_error("Error merging two records: " "number of samples is not consistent between both records"); diff --git a/include/utils.h b/include/utils.h index 1866725c..9b7c808b 100644 --- a/include/utils.h +++ b/include/utils.h @@ -100,7 +100,6 @@ uint32_t pangraph_from_read_file(const std::string&, std::shared_ptr>&, PanNode*, uint32_t, float); diff --git a/src/de_bruijn/graph.cpp b/src/de_bruijn/graph.cpp index 1a2adbe5..4f9091c8 100644 --- a/src/de_bruijn/graph.cpp +++ b/src/de_bruijn/graph.cpp @@ -78,7 +78,7 @@ bool edge_is_valid(OrientedNodePtr from, OrientedNodePtr to) // Add directed edge between from and to void debruijn::Graph::add_edge(OrientedNodePtr from, OrientedNodePtr to) { - bool nodes_are_valid = from.first != nullptr and to.first != nullptr; + const bool nodes_are_valid = from.first != nullptr and to.first != nullptr; if(!nodes_are_valid) { fatal_error("Error adding edge to de Bruijn Graph: from or to node is invalid"); } @@ -209,8 +209,8 @@ std::set> debruijn::Graph::get_unitigs() const auto& id = node_entry.first; const auto& node_ptr = node_entry.second; - bool node_seen = seen.find(id) != seen.end(); - bool at_branch + const bool node_seen = seen.find(id) != seen.end(); + const bool at_branch = (node_ptr->out_nodes.size() > 1) or (node_ptr->in_nodes.size() > 1); if (node_seen or at_branch) continue; @@ -227,8 +227,8 @@ std::set> debruijn::Graph::get_unitigs() // Extend a dbg path on either end until reaching a branch point void debruijn::Graph::extend_unitig(std::deque& tig) { - bool tig_is_empty = (tig.empty()); - bool node_is_isolated = (tig.size() == 1 + const bool tig_is_empty = (tig.empty()); + const bool node_is_isolated = (tig.size() == 1 and (nodes[tig.back()]->out_nodes.size() + nodes[tig.back()]->in_nodes.size()) == 0); if (tig_is_empty or node_is_isolated) { diff --git a/src/denovo_discovery/local_assembly.cpp b/src/denovo_discovery/local_assembly.cpp index 9cc08f62..25d4196d 100644 --- a/src/denovo_discovery/local_assembly.cpp +++ b/src/denovo_discovery/local_assembly.cpp @@ -70,7 +70,7 @@ DfsTree LocalAssemblyGraph::depth_first_search_from( auto& current_node { nodes_to_explore.top() }; nodes_to_explore.pop(); - bool previously_explored { explored_nodes.find(toString(current_node)) + const bool previously_explored { explored_nodes.find(toString(current_node)) != explored_nodes.end() }; if (previously_explored) { continue; @@ -110,7 +110,7 @@ BfsDistanceMap LocalAssemblyGraph::breadth_first_search_from( auto current_kmer = toString(current_node); auto parent_kmer = child_kmer_to_parent_kmer.at(current_kmer); - bool previously_explored { explored_nodes.find(current_kmer) + const bool previously_explored { explored_nodes.find(current_kmer) != explored_nodes.end() }; if (previously_explored) { continue; @@ -152,7 +152,7 @@ std::pair LocalAssemblyGraph::get_paths_between( auto tree { depth_first_search_from(start_node) }; // check if end node is in forward tree, if not just return - bool end_kmer_not_reachable_from_start_kmer = tree.find(end_kmer) == tree.end(); + const bool end_kmer_not_reachable_from_start_kmer = tree.find(end_kmer) == tree.end(); if (end_kmer_not_reachable_from_start_kmer) { BOOST_LOG_TRIVIAL(trace) << "End kmer " << end_kmer << " is not reachable from start kmer " @@ -206,7 +206,7 @@ void LocalAssemblyGraph::build_paths_between(const std::string& start_kmer, return; } - bool start_kmer_can_reach_end_kmer_with_distance_max_path_length + const bool start_kmer_can_reach_end_kmer_with_distance_max_path_length = (node_to_distance_to_the_end_node.find(start_kmer) != node_to_distance_to_the_end_node.end() and path_accumulator.length() diff --git a/src/fastaq.cpp b/src/fastaq.cpp index 4eab3183..6c67c52b 100644 --- a/src/fastaq.cpp +++ b/src/fastaq.cpp @@ -49,11 +49,11 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, const std::vector& covgs, const uint_least16_t global_covg, const std::string header) { - bool fasta_entry_has_a_name = name.length() > 0; + const bool fasta_entry_has_a_name = name.length() > 0; if (!fasta_entry_has_a_name) { fatal_error("Error adding entry to Fasta/q file: empty names are invalid"); } - bool quality_string_has_same_length_as_sequence = covgs.size() == sequence.length(); + const bool quality_string_has_same_length_as_sequence = covgs.size() == sequence.length(); if (!quality_string_has_same_length_as_sequence) { fatal_error("Error adding entry to Fasta/q file: sequence and quality string have different lengths"); } @@ -81,7 +81,7 @@ void Fastaq::add_entry(const std::string& name, const std::string& sequence, void Fastaq::add_entry( const std::string& name, const std::string& sequence, const std::string header) { - bool fasta_entry_has_a_name = name.length() > 0; + const bool fasta_entry_has_a_name = name.length() > 0; if (!fasta_entry_has_a_name) { fatal_error("Error adding entry to Fasta/q file: empty names are invalid"); } diff --git a/src/interval.cpp b/src/interval.cpp index 7fff067e..5bb74ea4 100644 --- a/src/interval.cpp +++ b/src/interval.cpp @@ -97,7 +97,7 @@ void merge_intervals_within(std::vector& intervals, const uint32_t dis bool Interval::sorted_interval_vector_has_overlapping_intervals (const std::vector &intervals) { for (uint32_t index = 1; index < intervals.size(); ++index) { - bool there_is_overlap = intervals[index - 1].get_end() > intervals[index].start; + const bool there_is_overlap = intervals[index - 1].get_end() > intervals[index].start; if (there_is_overlap) { return true; } diff --git a/src/kmergraphwithcoverage.cpp b/src/kmergraphwithcoverage.cpp index 241c8eb1..c61e494c 100644 --- a/src/kmergraphwithcoverage.cpp +++ b/src/kmergraphwithcoverage.cpp @@ -243,18 +243,18 @@ float KmerGraphWithCoverage::find_max_path(std::vector& maxpath, const auto& current_node = sorted_nodes[j - 1]; for (uint32_t i = 0; i != current_node->out_nodes.size(); ++i) { const auto& considered_outnode = current_node->out_nodes[i].lock(); - bool is_terminus_and_most_likely + const bool is_terminus_and_most_likely = considered_outnode->id == sorted_nodes.back()->id and thresh > max_mean + tolerance; - bool avg_log_likelihood_is_most_likely + const bool avg_log_likelihood_is_most_likely = max_sum_of_log_probs_from_node[considered_outnode->id] / length_of_maxpath_from_node[considered_outnode->id] > max_mean + tolerance; - bool avg_log_likelihood_is_close_to_most_likely = max_mean + const bool avg_log_likelihood_is_close_to_most_likely = max_mean - max_sum_of_log_probs_from_node[considered_outnode->id] / length_of_maxpath_from_node[considered_outnode->id] <= tolerance; - bool is_longer_path = length_of_maxpath_from_node[considered_outnode->id] + const bool is_longer_path = length_of_maxpath_from_node[considered_outnode->id] > (uint)max_length; if (is_terminus_and_most_likely or avg_log_likelihood_is_most_likely diff --git a/src/localPRG.cpp b/src/localPRG.cpp index 94544100..ec8d1055 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -59,7 +59,7 @@ std::string LocalPRG::string_along_path(const prg::Path& p) const s += seq.substr(it.start, it.length); } - bool sequence_and_path_have_the_same_length = s.length() == p.length(); + const bool sequence_and_path_have_the_same_length = s.length() == p.length(); if(!sequence_and_path_have_the_same_length) { fatal_error("Error getting sequence along PRG path: the sequence generated ", "has a length different from the path"); @@ -135,19 +135,19 @@ std::vector LocalPRG::nodes_along_path_core(const prg::Path& p) co void LocalPRG::check_if_vector_of_subintervals_is_consistent_with_envelopping_interval( const std::vector &subintervals, const Interval& envelopping_interval ) { - bool invariant_region_starts_at_or_after_given_interval = subintervals[0].start >= envelopping_interval.start; + const bool invariant_region_starts_at_or_after_given_interval = subintervals[0].start >= envelopping_interval.start; if (!invariant_region_starts_at_or_after_given_interval) { fatal_error("When splitting PRG by site, invariant region starts before (", subintervals[0].start, ") the given interval (", envelopping_interval.start, ")"); } - bool there_is_overlap = Interval::sorted_interval_vector_has_overlapping_intervals(subintervals); + const bool there_is_overlap = Interval::sorted_interval_vector_has_overlapping_intervals(subintervals); if (there_is_overlap) { fatal_error("When splitting PRG by site, there are overlapping intervals"); } - bool site_ends_before_given_interval = subintervals.back().get_end() <= envelopping_interval.get_end(); + const bool site_ends_before_given_interval = subintervals.back().get_end() <= envelopping_interval.get_end(); if (!site_ends_before_given_interval) { fatal_error("When splitting PRG by site, site ends (", subintervals.back().get_end(), ") after given interval (", envelopping_interval.get_end(), ")"); @@ -364,7 +364,7 @@ LocalPRG::build_graph( end_ids = build_graph(v.back(), end_ids, current_level); } if (start_id == 0) { - bool graph_has_a_sink_node = end_ids.size() == 1; + const bool graph_has_a_sink_node = end_ids.size() == 1; if (!graph_has_a_sink_node) { fatal_error("Error building local PRG graph from interval: built graph has no sink node"); } @@ -603,7 +603,7 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 v = shifts.front(); // get the first shifted path shifts.pop_front(); - bool shifted_path_has_k_bases = v.back()->length() == k; + const bool shifted_path_has_k_bases = v.back()->length() == k; if(!shifted_path_has_k_bases) { fatal_error("Error when minimizing a local PRG: shifted path does not have k (", k, ") bases"); @@ -727,7 +727,7 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 } // create a null end node, and for each end leaf add an edge to this terminus - bool kmer_graph_has_leaves = !end_leaves.empty(); + const bool kmer_graph_has_leaves = !end_leaves.empty(); if (!kmer_graph_has_leaves) { fatal_error("Error when minimizing a local PRG: kmer graph does not have any leaves"); } @@ -742,7 +742,7 @@ void LocalPRG::minimizer_sketch(const std::shared_ptr& index, const uint3 } // print, check and return - bool number_of_kmers_added_is_consistent = (num_kmers_added == 0) or + const bool number_of_kmers_added_is_consistent = (num_kmers_added == 0) or (kmer_prg.nodes.size() == num_kmers_added); if (!number_of_kmers_added_is_consistent) { fatal_error("Error when minimizing a local PRG: incorrect number of kmers added"); @@ -791,7 +791,7 @@ std::vector LocalPRG::kmernode_path_from_localnode_path( } } - bool kmernode_path_is_empty = kmernode_path.empty(); + const bool kmernode_path_is_empty = kmernode_path.empty(); if (kmernode_path_is_empty) { fatal_error("Error when converting local node path to kmer node path: received " "non-empty local node path and returned an empty kmer node path"); @@ -949,7 +949,7 @@ std::vector get_covgs_along_localnode_path(const PanNodePtr pan_node, k = j; for (const auto& interval : kmernode_ptr->path) { const LocalNodePtr &localnode = localnode_path[k]; - bool local_node_is_inside_kmer_path_interval = + const bool local_node_is_inside_kmer_path_interval = (localnode->pos.start <= interval.start) and (localnode->pos.get_end() >= interval.get_end()); @@ -962,7 +962,7 @@ std::vector get_covgs_along_localnode_path(const PanNodePtr pan_node, end = std::min(start + interval.length, localnode->pos.get_end()); for (uint32_t l = start; l < end; ++l) { - bool kmernode_is_valid = + const bool kmernode_is_valid = (kmernode_ptr->id < pan_node->kmer_prg_with_coverage.kmer_prg->nodes.size()) and (pan_node->kmer_prg_with_coverage.kmer_prg->nodes[kmernode_ptr->id] != nullptr); if (!kmernode_is_valid) { @@ -1066,7 +1066,7 @@ void LocalPRG::build_vcf_from_reference_path( { BOOST_LOG_TRIVIAL(debug) << "Build VCF for prg " << name; - bool prg_is_empty = prg.nodes.empty(); + const bool prg_is_empty = prg.nodes.empty(); if (prg_is_empty) { fatal_error("Error when building VCF from reference path: PRG is empty"); } @@ -1110,12 +1110,12 @@ void LocalPRG::build_vcf_from_reference_path( // we have come down a level, add the alts compared to this region level -= 1; - bool level_is_valid = level >= 0; + const bool level_is_valid = level >= 0; if (!level_is_valid) { fatal_error("Error when building VCF from reference path: PRG level is negative"); } - bool previous_levels_are_empty = level_start.empty(); + const bool previous_levels_are_empty = level_start.empty(); if (previous_levels_are_empty) { fatal_error("Error when building VCF from reference path: PRG or path is inconsistent (a site was closed without opening it)"); } @@ -1180,7 +1180,7 @@ void LocalPRG::build_vcf_from_reference_path( } // add sites to vcf - bool record_sequence_is_valid = pos + ref_seq.length() <= ref_length; + const bool record_sequence_is_valid = pos + ref_seq.length() <= ref_length; if (!record_sequence_is_valid) { fatal_error("Error when building VCF from reference path: record sequence end (", pos + ref_seq.length(), ") overflows reference length (", ref_length, ")"); @@ -1198,7 +1198,7 @@ void LocalPRG::build_vcf_from_reference_path( level_start.pop_back(); if (level == 0) { - bool all_sites_were_closed = level_start.empty(); + const bool all_sites_were_closed = level_start.empty(); if (!all_sites_were_closed) { fatal_error("Error when building VCF from reference path: PRG or path is inconsistent (reached level 0 without closing all sites)"); } @@ -1218,15 +1218,15 @@ void LocalPRG:: { BOOST_LOG_TRIVIAL(debug) << "Update VCF with sample path"; - bool prg_is_empty = prg.nodes.empty(); + const bool prg_is_empty = prg.nodes.empty(); if (prg_is_empty) { fatal_error("Error when genotyping using max likelihood path: PRG is empty"); } - bool reference_path_is_empty = rpath.empty(); + const bool reference_path_is_empty = rpath.empty(); if (reference_path_is_empty) { fatal_error("Error when genotyping using max likelihood path: reference path is empty"); } - bool sample_path_is_empty = sample_path.empty(); + const bool sample_path_is_empty = sample_path.empty(); if (sample_path_is_empty) { fatal_error("Error when genotyping using max likelihood path: sample path is empty"); } @@ -1250,14 +1250,14 @@ void LocalPRG:: // functions that help with some checks - lambdas for easyness const auto check_if_ref_index_is_valid = [&]() { - bool ref_index_is_valid = rpath.size() > ref_i; + const bool ref_index_is_valid = rpath.size() > ref_i; if (!ref_index_is_valid) { fatal_error("Error when genotyping using max likelihood path: ref index " "is not valid"); } }; const auto check_if_sample_id_is_valid = [&]() { - bool sample_id_is_valid = sample_path.size() > sample_id; + const bool sample_id_is_valid = sample_path.size() > sample_id; if (!sample_id_is_valid) { fatal_error("Error when genotyping using max likelihood path: sample " "is not valid"); @@ -1367,7 +1367,7 @@ std::vector LocalPRG::find_alt_path( } // TODO: change this bool variable name to a more meaningful one - bool pos_along_ref_path_less_than_ref_path_size = pos_along_ref_path < ref_path.size(); + const bool pos_along_ref_path_less_than_ref_path_size = pos_along_ref_path < ref_path.size(); if (!pos_along_ref_path_less_than_ref_path_size) { fatal_error("Error finding alternative path: pos along ref path is not less " "than ref path size"); @@ -1378,7 +1378,7 @@ std::vector LocalPRG::find_alt_path( if (alt_path.empty() and not ref_path.empty() and ref_path[0]->pos.length == 0) alt_path.push_back(ref_path[0]); - bool we_have_found_alt_paths = !alt_path.empty(); + const bool we_have_found_alt_paths = !alt_path.empty(); if (!we_have_found_alt_paths) { fatal_error("Error finding alternative path: no alternative paths were found " "but we should have found at least one"); @@ -1428,13 +1428,13 @@ uint32_t LocalPRG::get_number_of_bases_in_local_path_before_a_given_position( { uint32_t number_of_bases_in_local_path_before_the_position = 0; for (const auto& local_node : local_path) { - bool local_node_is_empty = local_node->pos.length == 0; + const bool local_node_is_empty = local_node->pos.length == 0; if (local_node_is_empty) { continue; } - bool local_node_starts_before_position = local_node->pos.start < position; - bool local_node_ends_before_position = local_node->pos.get_end() <= position; + const bool local_node_starts_before_position = local_node->pos.start < position; + const bool local_node_ends_before_position = local_node->pos.get_end() <= position; if (local_node_ends_before_position) { number_of_bases_in_local_path_before_the_position += local_node->pos.length; } else if (local_node_starts_before_position) { @@ -1483,7 +1483,7 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( const std::vector& local_path, const uint32_t& range_pos_start, const uint32_t& range_pos_end, const uint32_t& sample_id) const { - bool kmer_path_is_valid = kmer_path.size() > 1; + const bool kmer_path_is_valid = kmer_path.size() > 1; if (!kmer_path_is_valid) { fatal_error("Error when geting forward and reverse kmer coverages: kmer path " "is not valid"); @@ -1507,12 +1507,12 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( KmerNodePtr previous_kmer_node = nullptr; for (const auto& current_kmer_node : kmer_path) { - bool current_kmer_node_is_empty = current_kmer_node->path.length() == 0; + const bool current_kmer_node_is_empty = current_kmer_node->path.length() == 0; if (current_kmer_node_is_empty) { continue; } - bool there_is_previous_kmer_node = previous_kmer_node != nullptr; + const bool there_is_previous_kmer_node = previous_kmer_node != nullptr; if (there_is_previous_kmer_node) { uint32_t number_of_bases_that_are_exclusively_in_the_previous_kmer_node = get_number_of_bases_that_are_exclusively_in_the_previous_kmer_node( @@ -1521,13 +1521,13 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( += number_of_bases_that_are_exclusively_in_the_previous_kmer_node; } - bool is_inside_the_given_range = range_pos_start + const bool is_inside_the_given_range = range_pos_start <= number_of_bases_in_local_path_which_were_already_considered + kmer_size and number_of_bases_in_local_path_which_were_already_considered < range_pos_end; if (is_inside_the_given_range) { - bool kmer_node_is_valid = + const bool kmer_node_is_valid = (current_kmer_node->id < kmer_graph_with_coverage.kmer_prg->nodes.size()) and (kmer_graph_with_coverage.kmer_prg->nodes[current_kmer_node->id] != nullptr); if (!kmer_node_is_valid) { @@ -1540,7 +1540,7 @@ LocalPRG::get_forward_and_reverse_kmer_coverages_in_range( reverse_coverages.push_back( kmer_graph_with_coverage.get_reverse_covg(current_kmer_node->id, sample_id)); } else { - bool has_gone_past_the_given_range + const bool has_gone_past_the_given_range = number_of_bases_in_local_path_which_were_already_considered > range_pos_end; if (has_gone_past_the_given_range) @@ -1557,7 +1557,7 @@ void LocalPRG::add_sample_covgs_to_vcf(VCF& vcf, const KmerGraphWithCoverage& kg const std::vector& ref_path, const std::string& sample_name, const uint32_t& sample_id) const { - bool prg_is_empty = prg.nodes.empty(); + const bool prg_is_empty = prg.nodes.empty(); if (prg_is_empty) { fatal_error("Error when adding sample coverages to VCF: PRG is empty"); } @@ -1613,7 +1613,7 @@ void LocalPRG::add_sample_covgs_to_vcf(VCF& vcf, const KmerGraphWithCoverage& kg auto sample_it = find(vcf.samples.begin(), vcf.samples.end(), sample_name); auto sample_index = distance(vcf.samples.begin(), sample_it); - bool sample_is_valid = (sample_it != vcf.samples.end()) && + const bool sample_is_valid = (sample_it != vcf.samples.end()) && ((uint)sample_index != vcf.samples.size()); if (!sample_is_valid) { fatal_error("Error when adding sample coverages to VCF: sample is not valid"); @@ -1705,11 +1705,11 @@ std::vector LocalPRG::get_valid_vcf_reference( if (reference_path.empty()) return reference_path; - bool not_starting_at_prg_start = reference_path.front()->pos.start != 0; + const bool not_starting_at_prg_start = reference_path.front()->pos.start != 0; LocalNode last_prg_node = *(*(prg.nodes.rbegin())).second; auto final_prg_coordinate = last_prg_node.pos.get_end(); - bool not_ending_at_prg_end + const bool not_ending_at_prg_end = reference_path.back()->pos.get_end() != final_prg_coordinate; if (not_starting_at_prg_start or not_ending_at_prg_end) { diff --git a/src/localgraph.cpp b/src/localgraph.cpp index f63b452c..81eafe5e 100644 --- a/src/localgraph.cpp +++ b/src/localgraph.cpp @@ -23,7 +23,7 @@ void LocalGraph::add_node( intervalTree.add(pos.start, pos.get_end(), n); startIndexOfAllIntervals[pos.start] = n; } else { - bool node_with_same_id_seq_and_pos_already_added = (it->second->seq == seq) && (it->second->pos == pos); + const bool node_with_same_id_seq_and_pos_already_added = (it->second->seq == seq) && (it->second->pos == pos); if (!node_with_same_id_seq_and_pos_already_added) { fatal_error("Error adding node to Local Graph: node with ID ", id, " already exists in graph, but with different sequence or pos"); diff --git a/src/minihit.cpp b/src/minihit.cpp index c152cfa5..d3a901f0 100644 --- a/src/minihit.cpp +++ b/src/minihit.cpp @@ -11,7 +11,7 @@ MinimizerHit::MinimizerHit(const uint32_t i, const Minimizer& minimizer_from_rea , read_strand { minimizer_from_read.is_forward_strand } , minimizer_from_PRG { minimizer_from_PRG } { - bool both_minimizers_have_same_length = minimizer_from_read.pos_of_kmer_in_read.length + const bool both_minimizers_have_same_length = minimizer_from_read.pos_of_kmer_in_read.length == minimizer_from_PRG.path.length(); if(!both_minimizers_have_same_length) { fatal_error("Error when storing minimizers: minimizer from read/sequence " diff --git a/src/minimizer.cpp b/src/minimizer.cpp index 6a7c65d3..caa04d32 100644 --- a/src/minimizer.cpp +++ b/src/minimizer.cpp @@ -8,7 +8,7 @@ Minimizer::Minimizer(uint64_t s, uint32_t a, uint32_t b, bool c) , pos_of_kmer_in_read(Interval(a, b)) , is_forward_strand(c) { - bool hash_value_is_consistend_with_kmer_interval_size + const bool hash_value_is_consistend_with_kmer_interval_size = s <= pow(4, pos_of_kmer_in_read.length); if (!hash_value_is_consistend_with_kmer_interval_size) { fatal_error("Error when building minimizer: hash value (", s, diff --git a/src/noise_filtering.cpp b/src/noise_filtering.cpp index c763b262..4401978d 100644 --- a/src/noise_filtering.cpp +++ b/src/noise_filtering.cpp @@ -62,7 +62,7 @@ bool overlap_forwards( const std::deque& node1, const std::deque& node2) { // second deque should extend first by 1 - bool first_node_is_larger_or_same_size = node1.size() >= node2.size(); + const bool first_node_is_larger_or_same_size = node1.size() >= node2.size(); if(!first_node_is_larger_or_same_size) { fatal_error("Error on checking for overlaps in noise filtering: first node must be larger or have the same size as the second"); } @@ -167,8 +167,8 @@ void dbg_node_ids_to_ids_and_orientations(const debruijn::Graph& dbg, hashed_pg_node_ids = extend_hashed_pg_node_ids_forwards(dbg, dbg_node_ids); } - // TODO: give a better name to this bool once we understand what id does - bool hashed_pg_node_ids_is_empty = hashed_pg_node_ids.empty(); + // TODO: give a better name to this bool once we understand what it does + const bool hashed_pg_node_ids_is_empty = hashed_pg_node_ids.empty(); if(hashed_pg_node_ids_is_empty) { // TODO: improve this message fatal_error("Error when noise filtering: hashed_pg_node_ids is empty"); @@ -241,7 +241,7 @@ void remove_leaves(std::shared_ptr pangraph, debruijn::Graph& hashed_node_ids_to_ids_and_orientations( dbg.nodes[i]->hashed_node_ids, node_ids, node_orients); - bool dbg_node_has_no_reads = dbg.nodes[i]->read_ids.empty(); + const bool dbg_node_has_no_reads = dbg.nodes[i]->read_ids.empty(); if (dbg_node_has_no_reads) { fatal_error("Error when removing leaves from DBG: node has no leaves"); } @@ -253,7 +253,7 @@ void remove_leaves(std::shared_ptr pangraph, debruijn::Graph& } else { pos = pangraph->reads[r]->find_position(node_ids, node_orients); - bool pos_of_nodes_in_read_is_valid = (pos.first == 0) or + const bool pos_of_nodes_in_read_is_valid = (pos.first == 0) or (pos.first + node_ids.size() == pangraph->reads[r]->get_nodes().size()); if (!pos_of_nodes_in_read_is_valid) { fatal_error("Error when removing leaves from DBG: position of " @@ -471,7 +471,7 @@ enum NodeDirection { forward, reverse }; NodeDirection get_pangraph_node_direction(const debruijn::Node& debruijn_node) { - bool forward_node = debruijn_node.hashed_node_ids[0] % 2 != 0; + const bool forward_node = debruijn_node.hashed_node_ids[0] % 2 != 0; if (forward_node) return NodeDirection::forward; else @@ -526,7 +526,7 @@ pangenome::Node convert_node_debruijn_pangraph( const debruijn::Node& debruijn_node, std::shared_ptr pangraph) { auto node_id = get_pangraph_node_id(debruijn_node); - bool node_exists = pangraph->nodes.find(node_id) != pangraph->nodes.end(); + const bool node_exists = pangraph->nodes.find(node_id) != pangraph->nodes.end(); if (!node_exists) { fatal_error("Error converting DBG node to pangraph node: the given DBG node " "does not exist in the pangraph"); @@ -554,7 +554,7 @@ void write_pangraph_gfa( auto first_node_direction = get_pangraph_node_direction(first_debruijn_node); for (const auto& second_debruijn_node_id : first_debruijn_node.out_nodes) { - bool neighbour_node_exists_in_the_graph = + const bool neighbour_node_exists_in_the_graph = debruijn_graph.nodes.find(second_debruijn_node_id) != debruijn_graph.nodes.end(); if (!neighbour_node_exists_in_the_graph) { fatal_error("Error writing pangraph to GFA: a neighbour of a node does " diff --git a/src/pangenome/pangraph.cpp b/src/pangenome/pangraph.cpp index 390f73c3..7d4904ad 100644 --- a/src/pangenome/pangraph.cpp +++ b/src/pangenome/pangraph.cpp @@ -33,7 +33,7 @@ pangenome::Graph::Graph(const std::vector& sample_names) void pangenome::Graph::add_read(const uint32_t& read_id) { auto it = reads.find(read_id); - bool found = it != reads.end(); + const bool found = it != reads.end(); if (not found) { auto read_ptr = std::make_shared(read_id); reads[read_id] = read_ptr; @@ -44,7 +44,7 @@ void pangenome::Graph::add_node(const std::shared_ptr& prg, uint32_t n { NodePtr node_ptr; auto it = nodes.find(node_id); - bool found_node = it != nodes.end(); + const bool found_node = it != nodes.end(); if (not found_node) { node_ptr = std::make_shared( prg, node_id, samples.size()); // TODO: refactor this - holding the @@ -65,7 +65,7 @@ void update_node_info_with_this_read(const NodePtr& node_ptr, const ReadPtr& rea node_ptr->covg += 1; node_ptr->reads.insert(read_ptr); - bool coverage_information_is_consistent_with_read_information = + const bool coverage_information_is_consistent_with_read_information = node_ptr->covg == node_ptr->reads.size(); if (!coverage_information_is_consistent_with_read_information) { fatal_error("Error updating Pangraph node with read: coverage information " @@ -78,13 +78,13 @@ void check_correct_hits(const uint32_t prg_id, const uint32_t read_id, const std::set& cluster) { for (const auto& hit_ptr : cluster) { - bool hits_correspond_to_correct_read = read_id == hit_ptr->get_read_id(); + const bool hits_correspond_to_correct_read = read_id == hit_ptr->get_read_id(); if (!hits_correspond_to_correct_read) { fatal_error("Minimizer hits error: hit should be on read id ", read_id, ", but it is on read id ", hit_ptr->get_read_id()); } - bool hits_correspond_to_correct_prg = prg_id == hit_ptr->get_prg_id(); + const bool hits_correspond_to_correct_prg = prg_id == hit_ptr->get_prg_id(); if (!hits_correspond_to_correct_prg) { fatal_error("Minimizer hits error: hit should be on PRG id ", prg_id, ", but it is on PRG id ", hit_ptr->get_prg_id()); @@ -284,7 +284,7 @@ void pangenome::Graph::add_hits_to_kmergraphs( { for (const auto& node_entries : nodes) { Node& pangraph_node = *node_entries.second; - bool pangraph_node_has_a_valid_kmer_prg_with_coverage = + const bool pangraph_node_has_a_valid_kmer_prg_with_coverage = (pangraph_node.kmer_prg_with_coverage.kmer_prg != nullptr) and (not pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.empty()); if (!pangraph_node_has_a_valid_kmer_prg_with_coverage) { @@ -301,7 +301,7 @@ void pangenome::Graph::add_hits_to_kmergraphs( for (const auto& minimizer_hit_ptr : hits.at(pangraph_node.prg_id)) { const auto& minimizer_hit = *minimizer_hit_ptr; - bool minimizer_hit_kmer_node_id_is_valid = + const bool minimizer_hit_kmer_node_id_is_valid = (minimizer_hit.get_kmer_node_id() < pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes.size()) && (pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes[minimizer_hit.get_kmer_node_id()] != nullptr); if (!minimizer_hit_kmer_node_id_is_valid) { @@ -349,7 +349,7 @@ void pangenome::Graph::copy_coverages_to_kmergraphs( for (const auto& ref_node_entry : ref_pangraph.nodes) { const Node& ref_node = *ref_node_entry.second; - bool ref_node_is_in_this_pangraph = nodes.find(ref_node.node_id) != nodes.end(); + const bool ref_node_is_in_this_pangraph = nodes.find(ref_node.node_id) != nodes.end(); if (!ref_node_is_in_this_pangraph) { fatal_error("Error copying coverages to kmer graphs: reference node does not " "exist in pangraph"); @@ -360,7 +360,7 @@ void pangenome::Graph::copy_coverages_to_kmergraphs( pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes) { const auto& knode_id = kmergraph_node_ptr->id; - bool kmer_graph_node_id_is_valid = + const bool kmer_graph_node_id_is_valid = knode_id < ref_node.kmer_prg_with_coverage.kmer_prg->nodes.size(); if (!kmer_graph_node_id_is_valid) { fatal_error("Error copying coverages to kmer graphs: kmer graph node " @@ -417,7 +417,7 @@ std::vector pangenome::Graph::get_node_closest_vcf_reference( const auto& sample_paths = sample->paths.at(node.prg_id); for (const auto& sample_path : sample_paths) { for (uint32_t i = 0; i != sample_path.size(); ++i) { - bool sample_path_node_is_valid = + const bool sample_path_node_is_valid = (sample_path[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size()) and (kmer_prg_with_coverage.kmer_prg->nodes[sample_path[i]->id] != nullptr); if (!sample_path_node_is_valid) { @@ -535,7 +535,7 @@ void pangenome::Graph::save_mapped_read_strings( start = (uint32_t)std::max((int32_t)coord[1] - buff, 0); end = std::min(coord[2] + (uint32_t)buff, (uint32_t)readfile.read.length()); - bool read_coordinates_are_valid = + const bool read_coordinates_are_valid = (coord[1] < coord[2]) && (start <= coord[1]) && (start <= readfile.read.length()) && diff --git a/src/pangenome/pannode.cpp b/src/pangenome/pannode.cpp index 163f458a..94b08b62 100644 --- a/src/pangenome/pannode.cpp +++ b/src/pangenome/pannode.cpp @@ -85,7 +85,7 @@ void pangenome::Node::add_path( const std::vector& kmp, const uint32_t& sample_id) { for (uint32_t i = 0; i != kmp.size(); ++i) { - bool kmer_node_is_valid = + const bool kmer_node_is_valid = (kmp[i]->id < kmer_prg_with_coverage.kmer_prg->nodes.size()) and (kmer_prg_with_coverage.kmer_prg->nodes[kmp[i]->id] != nullptr); if (!kmer_node_is_valid) { @@ -118,7 +118,7 @@ void pangenome::Node::get_read_overlap_coordinates( hit_ptr->get_read_start_position() + hit_ptr->get_prg_path().length()); } - bool read_coordinates_are_valid = end > start; + const bool read_coordinates_are_valid = end > start; if (!read_coordinates_are_valid) { fatal_error("Error finding the read overlap coordinates for node ", name, " and read ", read_ptr->id, " (the ", read_count, @@ -226,7 +226,7 @@ std::set pangenome::Node::get_read_overlap_coordinates( + read_hit->get_prg_path().length()); } - bool read_coordinates_are_valid = end > start; + const bool read_coordinates_are_valid = end > start; if (!read_coordinates_are_valid) { fatal_error("Error finding the read overlap coordinates for node ", name, " and read ", current_read->id, ". Found end ", end, diff --git a/src/pangenome/panread.cpp b/src/pangenome/panread.cpp index ad90b147..9f2e67e4 100644 --- a/src/pangenome/panread.cpp +++ b/src/pangenome/panread.cpp @@ -51,13 +51,13 @@ void Read::add_hits( hits.erase(last, hits.end()); hits.shrink_to_fit(); - bool hits_were_correctly_inserted = hits.size() == before_size + cluster.size(); + const bool hits_were_correctly_inserted = hits.size() == before_size + cluster.size(); if (!hits_were_correctly_inserted) { fatal_error("Error when adding hits to Pangraph read"); } // add the orientation/node accordingly - bool orientation = !cluster.empty() and (*cluster.begin())->is_forward(); + const bool orientation = !cluster.empty() and (*cluster.begin())->is_forward(); if (get_nodes().empty() or node_ptr != get_nodes().back().lock() or orientation != node_orientations.back() // or we think there really are 2 copies of gene @@ -108,7 +108,7 @@ std::pair Read::find_position( const std::vector& node_ids, const std::vector& node_orients, const uint16_t min_overlap) { - bool nodes_ids_and_orientations_are_valid = + const bool nodes_ids_and_orientations_are_valid = (not node_ids.empty()) and (node_ids.size() == node_orients.size()); if (!nodes_ids_and_orientations_are_valid) { diff --git a/src/prg/path.cpp b/src/prg/path.cpp index 6998ed06..36960a52 100644 --- a/src/prg/path.cpp +++ b/src/prg/path.cpp @@ -27,7 +27,7 @@ void prg::Path::add_end_interval(const Interval& i) { memoizedDirty = true; - bool interval_is_valid = i.start >= get_end(); + const bool interval_is_valid = i.start >= get_end(); if (!interval_is_valid) { fatal_error("Error when adding a new interval to a path"); } @@ -38,7 +38,7 @@ void prg::Path::add_end_interval(const Interval& i) std::vector prg::Path::nodes_along_path(const LocalPRG& localPrg) { // sanity check - bool memoization_is_valid = (isMemoized == false) || + const bool memoization_is_valid = (isMemoized == false) || (isMemoized == true && localPRGIdOfMemoizedLocalNodePath == localPrg.id); if (!memoization_is_valid) { fatal_error("Error when getting nodes along PRG path: memoized a local node path " @@ -71,7 +71,7 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const { // function now returns the path starting at position start along the path, rather // than at position start on linear PRG, and for length len - bool parameters_are_valid = start + len <= length(); + const bool parameters_are_valid = start + len <= length(); if (!parameters_are_valid) { fatal_error("Error when getting subpath from PRG path: given parameters are not valid"); } @@ -84,7 +84,7 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const if ((covered_length <= start and covered_length + interval.length > start and p.path.empty()) or (covered_length == start and interval.length == 0 and p.path.empty())) { - bool no_interval_has_been_added_yet = added_len == 0; + const bool no_interval_has_been_added_yet = added_len == 0; if (!no_interval_has_been_added_yet) { fatal_error("Error when getting subpath from PRG path: an interval " "has already been added before the correct first one"); @@ -107,7 +107,7 @@ prg::Path prg::Path::subpath(const uint32_t start, const uint32_t len) const } } - bool subpath_length_is_correct = added_len == len; + const bool subpath_length_is_correct = added_len == len; if (!subpath_length_is_correct) { fatal_error("Error when getting subpath from PRG path: built the subpath with " "the wrong length"); @@ -285,7 +285,7 @@ std::istream& prg::operator>>(std::istream& in, prg::Path& p) prg::Path prg::get_union(const prg::Path& x, const prg::Path& y) { - bool parameters_are_valid = x < y; + const bool parameters_are_valid = x < y; if (!parameters_are_valid) { fatal_error("Error when getting the union of two paths: first path: ", x, " must come before second path: ", y); diff --git a/src/sampleinfo.cpp b/src/sampleinfo.cpp index 48a7215e..98849021 100644 --- a/src/sampleinfo.cpp +++ b/src/sampleinfo.cpp @@ -46,7 +46,7 @@ void SampleInfo::genotype_from_coverage_using_maximum_likelihood_path_as_referen std::tie(GT_from_coverages, likelihood_of_GT_from_coverages) = *genotype_and_max_likelihood_optional; - bool global_and_local_choices_are_not_compatible + const bool global_and_local_choices_are_not_compatible = GT_from_coverages != valid_GT_from_maximum_likelihood_path; if (global_and_local_choices_are_not_compatible) { GT_from_coverages = boost::none; @@ -64,18 +64,18 @@ void SampleInfo::genotype_from_coverage_using_maximum_likelihood_path_as_referen bool SampleInfo::check_if_coverage_information_is_correct() const { - bool there_are_at_least_one_allele = allele_to_forward_coverages.size() >= 1 + const bool there_are_at_least_one_allele = allele_to_forward_coverages.size() >= 1 and allele_to_reverse_coverages.size() >= 1; - bool forward_and_reverse_coverages_have_the_same_number_of_alleles + const bool forward_and_reverse_coverages_have_the_same_number_of_alleles = allele_to_forward_coverages.size() == allele_to_reverse_coverages.size(); - bool correct_number_of_alleles + const bool correct_number_of_alleles = allele_to_forward_coverages.size() == get_number_of_alleles(); bool all_alleles_in_forward_and_reverse_have_the_same_number_of_bases = true; for (size_t allele_index = 0; allele_index < get_number_of_alleles(); ++allele_index) { - bool alleles_in_forward_and_reverse_have_the_same_number_of_bases + const bool alleles_in_forward_and_reverse_have_the_same_number_of_bases = allele_to_forward_coverages[allele_index].size() == allele_to_reverse_coverages[allele_index].size(); if (not alleles_in_forward_and_reverse_have_the_same_number_of_bases) { @@ -121,7 +121,7 @@ std::vector SampleInfo::get_likelihoods_for_all_alleles() const uint32_t total_mean_coverage_of_allele_above_threshold = get_total_mean_coverage_given_a_minimum_threshold( allele, min_coverage_threshold); - bool min_coverage_threshold_is_satisfied + const bool min_coverage_threshold_is_satisfied = total_mean_coverage_of_allele_above_threshold > 0; uint32_t total_mean_coverage_of_all_other_alleles_above_threshold @@ -203,7 +203,7 @@ SampleInfo::get_confidence() const = Maths::arg_max(likelihoods_for_all_alleles_without_max_element.begin(), likelihoods_for_all_alleles_without_max_element.end()); - bool index_of_second_max_likelihood_is_past_index_of_max_likelihood + const bool index_of_second_max_likelihood_is_past_index_of_max_likelihood = index_of_second_max_likelihood >= index_of_max_likelihood; if (index_of_second_max_likelihood_is_past_index_of_max_likelihood) { ++index_of_second_max_likelihood; @@ -219,10 +219,10 @@ SampleInfo::get_confidence() const = get_mean_coverage_both_alleles(index_of_max_likelihood); uint32_t total_mean_coverage_of_second_max_likelihood_allele = get_mean_coverage_both_alleles(index_of_second_max_likelihood); - bool enough_total_covg = (total_mean_coverage_of_max_likelihood_allele + const bool enough_total_covg = (total_mean_coverage_of_max_likelihood_allele + total_mean_coverage_of_second_max_likelihood_allele >= genotyping_options->get_min_site_total_covg()); - bool enough_difference_in_covg + const bool enough_difference_in_covg = (std::abs((int64_t)(total_mean_coverage_of_max_likelihood_allele) - (int64_t)(total_mean_coverage_of_second_max_likelihood_allele)) >= genotyping_options->get_min_site_diff_covg()); @@ -259,7 +259,7 @@ SampleInfo::get_genotype_from_coverage() const std::tie(index_of_max_likelihood, confidence, max_likelihood) = *index_and_confidence_and_max_likelihood_optional; - bool satisfy_confidence_threshold + const bool satisfy_confidence_threshold = confidence > genotyping_options->get_confidence_threshold(); if (satisfy_confidence_threshold) { return std::make_pair((uint32_t)index_of_max_likelihood, max_likelihood); @@ -271,7 +271,7 @@ SampleInfo::get_genotype_from_coverage() const std::string SampleInfo::to_string(bool genotyping_from_maximum_likelihood, bool genotyping_from_compatible_coverage) const { - bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + const bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_compatible_coverage)) == 1; if (!only_one_flag_is_set) { @@ -373,13 +373,13 @@ void SampleInfo::merge_other_sample_gt_from_max_likelihood_path_into_this( void SampleInfo::solve_incompatible_gt_conflict_with(SampleInfo& other) { - bool any_of_gts_are_invalid_thus_no_conflict + const bool any_of_gts_are_invalid_thus_no_conflict = not this->is_gt_from_coverages_compatible_valid() or not other.is_gt_from_coverages_compatible_valid(); if (any_of_gts_are_invalid_thus_no_conflict) return; - bool both_gts_are_to_ref_thus_no_conflict + const bool both_gts_are_to_ref_thus_no_conflict = this->get_gt_from_coverages_compatible() == 0 and other.get_gt_from_coverages_compatible() == 0; if (both_gts_are_to_ref_thus_no_conflict) diff --git a/src/seq.cpp b/src/seq.cpp index b4b11b14..951fc606 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -97,7 +97,7 @@ void Seq::add_new_smallest_minimizer(vector& window, uint64_t& smalle void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) { - bool sequence_too_short_to_sketch = seq.length() + 1 < w + k; + const bool sequence_too_short_to_sketch = seq.length() + 1 < w + k; if (sequence_too_short_to_sketch) return; @@ -110,7 +110,7 @@ void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) window.reserve(w); for (const char letter : seq) { - bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, kmer, + const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, kmer, kh); // add the next base and remove the first one to get the next kmer if (not added) return; diff --git a/src/vcf.cpp b/src/vcf.cpp index b778889f..58c40a9a 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -100,7 +100,7 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it( auto vcf_record_iterator = find_record_in_records( vcf_record); // TODO: improve this search to log(n) using alt map or sth - bool vcf_record_was_found = vcf_record_iterator != records.end(); + const bool vcf_record_was_found = vcf_record_iterator != records.end(); if (vcf_record_was_found) { (*vcf_record_iterator) ->sampleIndex_to_sampleInfo[sample_index] @@ -111,7 +111,7 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it( // or alt mistake bool vcf_record_was_processed = false; - bool sample_genotyped_towards_ref_allele = ref == alt; + const bool sample_genotyped_towards_ref_allele = ref == alt; if (sample_genotyped_towards_ref_allele) { // TODO: create a method to find records based on chrom, pos and ref only for (const auto& record : records) { @@ -146,9 +146,9 @@ void VCF::update_other_samples_of_this_record(VCFRecord* reference_record) { // update other samples at this site if they have ref allele at this pos for (const auto& other_record : records) { - bool both_records_are_on_the_same_site + const bool both_records_are_on_the_same_site = other_record->get_chrom() == reference_record->get_chrom(); - bool reference_record_start_overlaps_other_record + const bool reference_record_start_overlaps_other_record = other_record->get_pos() <= reference_record->get_pos() and reference_record->get_pos() < other_record->get_pos() + other_record->get_ref().length(); @@ -249,10 +249,10 @@ bool VCF::pos_in_range( void VCF::genotype(const bool do_local_genotyping) { - bool all_SV_types = not genotyping_options->is_snps_only(); + const bool all_SV_types = not genotyping_options->is_snps_only(); for (auto& vcf_record : records) { - bool should_genotype_record = all_SV_types + const bool should_genotype_record = all_SV_types or (genotyping_options->is_snps_only() and vcf_record->is_SNP()); if (should_genotype_record) { if (do_local_genotyping) { @@ -272,13 +272,13 @@ void VCF::genotype(const bool do_local_genotyping) void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) const { VCF empty_vcf = VCF(merged_VCF.genotyping_options); - bool merged_VCF_passed_as_parameter_is_initially_empty = merged_VCF == empty_vcf; + const bool merged_VCF_passed_as_parameter_is_initially_empty = merged_VCF == empty_vcf; if(!merged_VCF_passed_as_parameter_is_initially_empty) { fatal_error("Error on merging VCFs: initial VCF is not empty"); } size_t vcf_size = this->get_VCF_size(); - bool no_need_for_merging = vcf_size <= 1; + const bool no_need_for_merging = vcf_size <= 1; if (no_need_for_merging) { merged_VCF = *this; return; @@ -290,7 +290,7 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) = (records[0])->make_copy_as_shared_ptr(); for_each(records.begin() + 1, records.end(), [&](const std::shared_ptr& vcf_record_to_be_merged_in_pointer) { - bool vcf_record_should_be_merged_in + const bool vcf_record_should_be_merged_in = vcf_record_merged->can_biallelic_record_be_merged_into_this( *vcf_record_to_be_merged_in_pointer, max_allele_length); @@ -307,7 +307,7 @@ void VCF::merge_multi_allelic_core(VCF& merged_VCF, uint32_t max_allele_length) merged_VCF.sort_records(); - bool merging_did_not_create_any_record = merged_VCF.get_VCF_size() <= vcf_size; + const bool merging_did_not_create_any_record = merged_VCF.get_VCF_size() <= vcf_size; if(!merging_did_not_create_any_record) { fatal_error("Error on merging VCFs: new VCF records were created, whereas " "this should not be the case"); @@ -327,7 +327,7 @@ VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chro for (auto& recordPointer : records) { auto& record = *recordPointer; - bool we_are_not_in_the_given_chrom = record.get_chrom() != chrom; + const bool we_are_not_in_the_given_chrom = record.get_chrom() != chrom; if (we_are_not_in_the_given_chrom) { vcf_with_dot_alleles_corrected.add_record(record); @@ -340,11 +340,11 @@ VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chro fatal_error("When correcting dot alleles, a VCF record has an inexistent " "position (", record.get_pos(), ") in VCF ref with length ", vcf_ref.length()); } - bool record_contains_dot_allele = record.contains_dot_allele(); - bool record_did_not_contain_dot_allele_or_was_corrected = true; - bool there_is_a_previous_letter = record.get_pos() > 0; - bool there_is_a_next_letter + const bool record_contains_dot_allele = record.contains_dot_allele(); + const bool there_is_a_previous_letter = record.get_pos() > 0; + const bool there_is_a_next_letter = record.get_pos() + record.get_ref().length() + 1 < vcf_ref.length(); + bool record_did_not_contain_dot_allele_or_was_corrected = true; if (record_contains_dot_allele and there_is_a_previous_letter) { char prev_letter = vcf_ref[record.get_pos() - 1]; record.correct_dot_alleles_adding_nucleotide_before(prev_letter); @@ -367,7 +367,7 @@ VCF VCF::correct_dot_alleles(const std::string& vcf_ref, const std::string& chro vcf_with_dot_alleles_corrected.sort_records(); - bool correcting_dot_alleles_did_not_create_any_record = vcf_with_dot_alleles_corrected.get_VCF_size() <= this->get_VCF_size(); + const bool correcting_dot_alleles_did_not_create_any_record = vcf_with_dot_alleles_corrected.get_VCF_size() <= this->get_VCF_size(); if(!correcting_dot_alleles_did_not_create_any_record) { fatal_error("Error on correcting dot alleles: new VCF records were created, whereas " "this should not be the case"); @@ -394,7 +394,7 @@ void VCF::make_gt_compatible() = record.get_pos() == overlapping_record.get_pos() and record.get_ref() < overlapping_record.get_ref(); - bool this_record_starts_before_the_overlapping_record + const bool this_record_starts_before_the_overlapping_record = record.get_pos() < overlapping_record.get_pos() or record_starts_at_the_same_position_but_ref_is_smaller_than_overlapping_record_ref; @@ -517,7 +517,7 @@ std::string VCF::to_string(bool genotyping_from_maximum_likelihood, bool graph_is_nested, bool graph_has_too_many_alts, bool sv_type_is_snp, bool sv_type_is_indel, bool sv_type_is_ph_snps, bool sv_type_is_complex) { - bool only_one_flag_is_set + const bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; if (!only_one_flag_is_set) { @@ -532,22 +532,22 @@ std::string VCF::to_string(bool genotyping_from_maximum_likelihood, sort_records(); for (const auto& record : this->records) { - bool record_has_dot_allele_and_should_be_output + const bool record_has_dot_allele_and_should_be_output = output_dot_allele and record->contains_dot_allele(); - bool graph_type_condition_is_satisfied + const bool graph_type_condition_is_satisfied = (graph_is_simple and record->graph_type_is_simple()) or (graph_is_nested and record->graph_type_is_nested()) or (graph_has_too_many_alts and record->graph_type_has_too_many_alts()); - bool sv_type_condition_is_satisfied + const bool sv_type_condition_is_satisfied = (sv_type_is_snp and record->svtype_is_SNP()) or (sv_type_is_indel and record->svtype_is_indel()) or (sv_type_is_ph_snps and record->svtype_is_PH_SNPs()) or (sv_type_is_complex and record->svtype_is_complex()); - bool graph_and_sv_type_conditions_are_satisfied + const bool graph_and_sv_type_conditions_are_satisfied = graph_type_condition_is_satisfied and sv_type_condition_is_satisfied; - bool record_should_be_output = record_has_dot_allele_and_should_be_output + const bool record_should_be_output = record_has_dot_allele_and_should_be_output or graph_and_sv_type_conditions_are_satisfied; if (record_should_be_output) { diff --git a/src/vcfrecord.cpp b/src/vcfrecord.cpp index a93b97d8..7106c4b1 100644 --- a/src/vcfrecord.cpp +++ b/src/vcfrecord.cpp @@ -67,7 +67,7 @@ std::string VCFRecord::infer_SVTYPE() const std::string VCFRecord::get_format( bool genotyping_from_maximum_likelihood, bool genotyping_from_coverage) const { - bool only_one_flag_is_set + const bool only_one_flag_is_set = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) == 1; if (!only_one_flag_is_set) { @@ -90,7 +90,7 @@ std::string VCFRecord::get_format( std::stringstream out; for (const auto& field : *format) { out << field; - bool is_not_last_field = field != format->back(); + const bool is_not_last_field = field != format->back(); if (is_not_last_field) { out << ":"; } @@ -207,7 +207,7 @@ size_t VCFRecord::get_longest_allele_length() const void VCFRecord::merge_record_into_this(const VCFRecord& other) { // no need for merge - bool other_record_has_no_alt = other.alts.size() == 0; + const bool other_record_has_no_alt = other.alts.size() == 0; if (other_record_has_no_alt) return; @@ -228,19 +228,19 @@ bool VCFRecord::can_biallelic_record_be_merged_into_this( // TODO : maybe fix this? // bool ensure_we_are_merging_only_biallelic_records = // vcf_record_to_be_merged_in.alts.size() == 1; - bool we_are_merging_only_biallelic_records + const bool we_are_merging_only_biallelic_records = vcf_record_to_be_merged_in.alts.size() <= 1; if(!we_are_merging_only_biallelic_records) { fatal_error("When merging two biallelic records, one of them is not biallelic"); } - bool both_records_have_the_same_ref = this->ref == vcf_record_to_be_merged_in.ref; + const bool both_records_have_the_same_ref = this->ref == vcf_record_to_be_merged_in.ref; - bool all_alleles_have_at_most_max_allele_length + const bool all_alleles_have_at_most_max_allele_length = this->get_longest_allele_length() <= max_allele_length and vcf_record_to_be_merged_in.get_longest_allele_length() <= max_allele_length; - bool vcf_record_should_be_merged_in = vcf_record_to_be_merged_in != (*this) + const bool vcf_record_should_be_merged_in = vcf_record_to_be_merged_in != (*this) and this->has_the_same_position(vcf_record_to_be_merged_in) and both_records_have_the_same_ref and all_alleles_have_at_most_max_allele_length @@ -298,7 +298,7 @@ void VCFRecord::add_new_alt(std::string alt) alt = "."; } - bool alt_already_present = std::find(alts.begin(), alts.end(), alt) != alts.end(); + const bool alt_already_present = std::find(alts.begin(), alts.end(), alt) != alts.end(); if (alt_already_present) { fatal_error("Error adding new ALT to a VCF record: ALT already exists"); } From c27eae1b7b24b68106710ce62aa14a66d9c63cf7 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 11:51:07 +0000 Subject: [PATCH 24/37] Improving a few fatal error messages --- src/kmergraph.cpp | 2 +- src/localPRG.cpp | 2 +- src/minimizer.cpp | 2 +- src/prg/path.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/kmergraph.cpp b/src/kmergraph.cpp index af427eb4..4c7dd518 100644 --- a/src/kmergraph.cpp +++ b/src/kmergraph.cpp @@ -419,7 +419,7 @@ void KmerGraph::load(const fs::path& filepath) } } } else { - fatal_error("Error reading GFA: unable to open kmergraph file", filepath); + fatal_error("Error reading GFA: unable to open kmergraph file: ", filepath); } } diff --git a/src/localPRG.cpp b/src/localPRG.cpp index ec8d1055..75a38ed6 100644 --- a/src/localPRG.cpp +++ b/src/localPRG.cpp @@ -52,7 +52,7 @@ std::string LocalPRG::string_along_path(const prg::Path& p) const const bool path_is_inside_the_PRG = (p.get_start() <= seq.length()) && (p.get_end() <= seq.length()); if(!path_is_inside_the_PRG) { - fatal_error("Error getting sequence along PRG path: path is out of range"); + fatal_error("Error getting sequence along PRG path: path goes beyond PRG limits"); } std::string s; for (const auto& it : p) { diff --git a/src/minimizer.cpp b/src/minimizer.cpp index caa04d32..05a8be7c 100644 --- a/src/minimizer.cpp +++ b/src/minimizer.cpp @@ -12,7 +12,7 @@ Minimizer::Minimizer(uint64_t s, uint32_t a, uint32_t b, bool c) = s <= pow(4, pos_of_kmer_in_read.length); if (!hash_value_is_consistend_with_kmer_interval_size) { fatal_error("Error when building minimizer: hash value (", s, - ") is too big for kmer ", "of interval size ", pos_of_kmer_in_read.length); + ") is too big for kmer of interval size ", pos_of_kmer_in_read.length); } } diff --git a/src/prg/path.cpp b/src/prg/path.cpp index 36960a52..10435d86 100644 --- a/src/prg/path.cpp +++ b/src/prg/path.cpp @@ -63,7 +63,7 @@ std::vector prg::Path::nodes_along_path(const LocalPRG& localPrg) // redudant call, return the memoized local node path return memoizedLocalNodePath; } else { - fatal_error("Bug on prg::Path::nodes_along_path()"); + fatal_error("Error when getting nodes along PRG path: memoization state is invalid"); } } From 0a4ccd3c3d5c744b6bc47caabf541024c9b37706 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 11:58:58 +0000 Subject: [PATCH 25/37] Removing cgranges submodule --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 3c9fb9ed..25c1f70d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "cgranges"] - path = cgranges - url = https://github.com/lh3/cgranges [submodule "thirdparty/gatb-core"] path = thirdparty/gatb-core url = https://github.com/leoisl/gatb-core From f16bc939a9ddcc4c9a92d942b775a2261bb48b6f Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 11:59:24 +0000 Subject: [PATCH 26/37] Removing cgranges module --- cgranges | 1 - 1 file changed, 1 deletion(-) delete mode 160000 cgranges diff --git a/cgranges b/cgranges deleted file mode 160000 index ce6ba0ea..00000000 --- a/cgranges +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ce6ba0ea27938c25e95f4a2ed31193cecc3f7606 From e02d8eaa26a9e5a666d1cabb4bdc4fe9eb4e5527 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 12:36:02 +0000 Subject: [PATCH 27/37] Adding cgranges back to thirdparty dir --- .gitmodules | 3 +++ CMakeLists.txt | 2 +- thirdparty/cgranges | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 160000 thirdparty/cgranges diff --git a/.gitmodules b/.gitmodules index 25c1f70d..55a3a322 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "thirdparty/gatb-core"] path = thirdparty/gatb-core url = https://github.com/leoisl/gatb-core +[submodule "thirdparty/cgranges"] + path = thirdparty/cgranges + url = https://github.com/lh3/cgranges diff --git a/CMakeLists.txt b/CMakeLists.txt index 543a264c..a8e00173 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ set(Boost_USE_STATIC_LIBS ON) #include directories as SYSTEM includes, thus warnings will be ignored for these include_directories(SYSTEM ${CMAKE_BINARY_DIR}/include - ${PROJECT_SOURCE_DIR}/cgranges/cpp + ${PROJECT_SOURCE_DIR}/thirdparty/cgranges/cpp ) # normal includes: warnings will be reported for these diff --git a/thirdparty/cgranges b/thirdparty/cgranges new file mode 160000 index 00000000..22b85aa5 --- /dev/null +++ b/thirdparty/cgranges @@ -0,0 +1 @@ +Subproject commit 22b85aa52ea68338f95baee8e18ffc398d134289 From 138eef4e91f46782d27fa4ba57a34271fe3de1a4 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 12:38:16 +0000 Subject: [PATCH 28/37] Expecting FatalRuntimeError instead of death in tests --- test/de_bruijn_graph_test.cpp | 2 +- test/panread_test.cpp | 2 +- test/sampleinfo_test.cpp | 30 +++++++++++++++--------------- test/vcfrecord_test.cpp | 8 ++++---- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/test/de_bruijn_graph_test.cpp b/test/de_bruijn_graph_test.cpp index 52104087..13b48a2c 100644 --- a/test/de_bruijn_graph_test.cpp +++ b/test/de_bruijn_graph_test.cpp @@ -225,7 +225,7 @@ TEST(DeBruijnGraphAddEdge, AddEdgeNodesBothRC_EdgeAdded) EXPECT_FALSE(found_outnode_n2); } -TEST(DeBruijnGraphAddEdge, AddEdgeNoOverlap_Death) +TEST(DeBruijnGraphAddEdge, AddEdgeNoOverlap_FatalRuntimeError) { GraphTester g(3); diff --git a/test/panread_test.cpp b/test/panread_test.cpp index bfd87dd0..e2b75895 100644 --- a/test/panread_test.cpp +++ b/test/panread_test.cpp @@ -54,7 +54,7 @@ TEST(ReadAddHits, AddOneEmptyClusterToHits_ReadHitsMapContainsCorrectPrgId) EXPECT_TRUE(result); } -TEST(ReadAddHits, AddClusterSecondTime_DeathAndReadHitsNotChanged) +TEST(ReadAddHits, AddClusterSecondTime_FatalRuntimeErrorAndReadHitsNotChanged) { uint32_t read_id = 1; Read read(read_id); diff --git a/test/sampleinfo_test.cpp b/test/sampleinfo_test.cpp index 06f356fe..fd4a1db0 100644 --- a/test/sampleinfo_test.cpp +++ b/test/sampleinfo_test.cpp @@ -14,7 +14,7 @@ using ::testing::DoubleNear; using ::testing::Property; using ::testing::Return; -TEST(SampleInfoTest, constructor___zero_alleles___expects_death) +TEST(SampleInfoTest, constructor___zero_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(SampleInfo(0, 0, &default_genotyping_options), FatalRuntimeError, "Error on creating VCF Sample INFOs: the VCF record has no alleles"); @@ -203,7 +203,7 @@ TEST_F(SampleInfoTest___Fixture, get_allele_to_reverse_coverages___default_sampl } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___forward_coverage_has_no_alleles___expects_death) + set_coverage_information___forward_coverage_has_no_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_empty, allele_to_coverage_three_alleles), @@ -212,7 +212,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___forward_coverage_has_one_allele___expects_death) + set_coverage_information___forward_coverage_has_one_allele___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_one_allele, allele_to_coverage_three_alleles), @@ -221,7 +221,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___reverse_coverage_has_no_alleles___expects_death) + set_coverage_information___reverse_coverage_has_no_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_empty), @@ -230,7 +230,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___reverse_coverage_has_one_allele___expects_death) + set_coverage_information___reverse_coverage_has_one_allele___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_one_allele), @@ -239,7 +239,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___both_coverages_have_two_alleles___different_number_of_bases___expects_death) + set_coverage_information___both_coverages_have_two_alleles___different_number_of_bases___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_two_alleles, { { 1, 2 }, { 3 } }), @@ -263,7 +263,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___fwd_coverage_has_two_alleles___rev_coverage_has_three_alleles___sample_info_expects_three_alleles___expects_death) + set_coverage_information___fwd_coverage_has_two_alleles___rev_coverage_has_three_alleles___sample_info_expects_three_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_two_alleles, allele_to_coverage_three_alleles), @@ -272,7 +272,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___fwd_coverage_has_three_alleles___rev_coverage_has_two_alleles___sample_info_expects_three_alleles___expects_death) + set_coverage_information___fwd_coverage_has_three_alleles___rev_coverage_has_two_alleles___sample_info_expects_three_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_two_alleles), @@ -295,7 +295,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___forward_covg_has_two_alleles___reverse_covg_has_three_alleles___expects_death) + set_coverage_information___forward_covg_has_two_alleles___reverse_covg_has_three_alleles___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.set_coverage_information( allele_to_coverage_two_alleles, allele_to_coverage_three_alleles), @@ -304,7 +304,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___both_covgs_have_two_alleles_but_have_different_number_of_bases_on_first_allele___expects_death) + set_coverage_information___both_covgs_have_two_alleles_but_have_different_number_of_bases_on_first_allele___expects_FatalRuntimeError) { std::vector> allele_to_coverage_two_alleles_first_allele_has_only_one_base( @@ -319,7 +319,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_coverage_information___both_covgs_have_two_alleles_but_have_different_number_of_bases_on_second_allele___expects_death) + set_coverage_information___both_covgs_have_two_alleles_but_have_different_number_of_bases_on_second_allele___expects_FatalRuntimeError) { std::vector> allele_to_coverage_two_alleles_second_allele_has_only_one_base( @@ -334,7 +334,7 @@ TEST_F(SampleInfoTest___Fixture, } TEST_F(SampleInfoTest___Fixture, - set_number_of_alleles_and_resize_coverage_information___resize_to_zero_alleles___expects_death) + set_number_of_alleles_and_resize_coverage_information___resize_to_zero_alleles___expects_FatalRuntimeError) { default_sample_info_three_alleles.set_coverage_information( allele_to_coverage_three_alleles, allele_to_coverage_three_alleles); @@ -1124,13 +1124,13 @@ TEST_F(SampleInfoTest___get_genotype_from_coverage___Fixture, EXPECT_NEAR(-50.5, actual->second, 0.000001); } -TEST_F(SampleInfoTest___Fixture, to_string___no_flags_set___expects_death) +TEST_F(SampleInfoTest___Fixture, to_string___no_flags_set___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.to_string(false, false), FatalRuntimeError, "Error on stringifying VCF record sample info: incompatible genotyping options"); } -TEST_F(SampleInfoTest___Fixture, to_string___both_flags_set___expects_death) +TEST_F(SampleInfoTest___Fixture, to_string___both_flags_set___expects_FatalRuntimeError) { ASSERT_EXCEPTION(default_sample_info.to_string(true, true), FatalRuntimeError, "Error on stringifying VCF record sample info: incompatible genotyping options"); @@ -1294,7 +1294,7 @@ TEST_F(SampleIndexToSampleInfoTemplate___Fixture, } TEST_F(SampleIndexToSampleInfoTemplate___Fixture, - merge_other_samples_infos_into_this___different_nb_of_samples___expects_death) + merge_other_samples_infos_into_this___different_nb_of_samples___expects_FatalRuntimeError) { SampleIndexToSampleInfoTemplateAllVisible another_sample_index_to_sample_info; diff --git a/test/vcfrecord_test.cpp b/test/vcfrecord_test.cpp index 4b893036..f26c284a 100644 --- a/test/vcfrecord_test.cpp +++ b/test/vcfrecord_test.cpp @@ -315,7 +315,7 @@ TEST(VCFRecordTest, alts_to_string___three_alts) EXPECT_EQ("A1,A2,A3", actual); } -TEST(VCFRecordTest, get_format___no_flags_set___expects_death) +TEST(VCFRecordTest, get_format___no_flags_set___expects_FatalRuntimeError) { VCF vcf = create_VCF_with_default_parameters(); VCFRecord vcf_record(&vcf); @@ -323,7 +323,7 @@ TEST(VCFRecordTest, get_format___no_flags_set___expects_death) "Error on getting format field from VCF record: incompatible genotyping options"); } -TEST(VCFRecordTest, get_format___both_flags_set___expects_death) +TEST(VCFRecordTest, get_format___both_flags_set___expects_FatalRuntimeError) { VCF vcf = create_VCF_with_default_parameters(); VCFRecord vcf_record(&vcf); @@ -582,7 +582,7 @@ TEST_F(VCFRecordTest___merge_record_into_this______Fixture, } TEST_F(VCFRecordTest___merge_record_into_this______Fixture, - merge_last_alt_is_common___expects_death) + merge_last_alt_is_common___expects_FatalRuntimeError) { ASSERT_EXCEPTION( vcf_record_ref_A_alt_T_TT_TTT.merge_record_into_this(vcf_record_ref_A_alt_TTT), @@ -633,7 +633,7 @@ class VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture }; TEST_F(VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture, - merge_only_ref_no_alts___expects_death) + merge_only_ref_no_alts___expects_FatalRuntimeError) { bool actual = vcf_record_ref_A.can_biallelic_record_be_merged_into_this( vcf_record_only_ref_no_alts); From 5f762b5ec712b1db5abac948ccd91e2ee32a99af Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 12:54:05 +0000 Subject: [PATCH 29/37] Updating Changelog --- CHANGELOG.md | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad9c9033..96fe63e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ The format is based on project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [v0.8.0] Improvements to the build process and performance. @@ -15,16 +15,24 @@ Improvements to the build process and performance. - We now provide a script to build a portable precompiled binary as another option to run `pandora` easily. The portable binary is now provided with the release; - `pandora` can now provide a meaningful stack trace in case of errors, to facilitate debugging - (need to pass flag `-DPRINT_STACKTRACE` to `CMake`); + (need to pass flag `-DPRINT_STACKTRACE` to `CMake`). Due to this, we now add debug symbols (`-g` flag) + to every `pandora` build type, but this [does not impact performance](https://stackoverflow.com/a/39223245). + The precompiled binary has this enabled. ### Changed -- We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager, removing the requirement of having ZLIB and - Boost system-wide installations; +- We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager, removing the requirement of having `ZLIB` and + `Boost` system-wide installations; +- `GATB` is now a git submodule instead of an external project downloaded and compiled during compilation time. + This means that when git cloning `pandora`, `cgranges` and `GATB` are also downloaded/cloned, and when preparing + the build (running `cmake`), `Hunter` downloads and installs `Boost`, `GTest` and `ZLIB`. + Thus we still need internet connection to prepare the build (running `cmake`) but not for compiling (running `make`). +- We now use a GATB fork that accepts a `ZLIB` custom installation; +- Refactored all thirdparty libraries (`cgranges`, `GATB`, `backward`, `CLI11`, `inthash`) into their own directory `thirdparty`. ### Fixed -- We refactored asserts into exceptions and errors, and now `pandora` can be compiled correctly in the `Release` mode. - The build process is thus be able to create a more optimized binary, resulting in improved performance; -- Refactored thirdparty libraries into a directory of their own; +- Refactored asserts into exceptions, and now `pandora` can be compiled correctly in the `Release` mode. + The build process is thus able to create a more optimized binary, resulting in improved performance. + ## [v0.7.0] From ed47b655762393fe9b4f1bff08cdef2d4a47a848 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 15:03:25 +0000 Subject: [PATCH 30/37] Updating README with pandora v0.8.0-alpha version precompiled binary --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f021dcb1..6bdbb5d0 100644 --- a/README.md +++ b/README.md @@ -83,13 +83,13 @@ can be found [here](https://gcc.gnu.org/onlinedocs/gfortran/OpenMP.html). * **Download**: ``` - wget "https://www.dropbox.com/s/ltq2gti9t6wav1j/pandora-linux-precompiled_v0.8.1_beta?dl=1" -O pandora-linux-precompiled_v0.8.1_beta + wget "https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=0" -O pandora-linux-precompiled-v0.8.0-alpha ``` * **TODO: updated to a github link when we make the release;** * **Running**: ``` -chmod +x pandora-linux-precompiled_v0.8.1_beta -./pandora-linux-precompiled_v0.8.1_beta -h +chmod +x pandora-linux-precompiled-v0.8.0-alpha +./pandora-linux-precompiled-v0.8.0-alpha -h ``` * **Compatibility**: This precompiled binary works on pretty much any glibc-2.12-or-later-based x86 and x86-64 Linux distribution From 638f6c656a8a2c843d89d88bead9dc8cb85236ee Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 15:17:59 +0000 Subject: [PATCH 31/37] Fixing small issue in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6bdbb5d0..c223514f 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ can be found [here](https://gcc.gnu.org/onlinedocs/gfortran/OpenMP.html). * **Download**: ``` - wget "https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=0" -O pandora-linux-precompiled-v0.8.0-alpha + wget "https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=1" -O pandora-linux-precompiled-v0.8.0-alpha ``` * **TODO: updated to a github link when we make the release;** * **Running**: From a36c75f2af07dc99e88be294f19b23955c61f578 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 15:48:40 +0000 Subject: [PATCH 32/37] Updating example to use the precompiled binary --- example/README.md | 6 ++--- ...-linux-precompiled-v0.8.0-alpha.md5sum.txt | 1 + example/run_pandora_nodenovo.sh | 22 +++++++++++++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt diff --git a/example/README.md b/example/README.md index a39633f0..92b77944 100644 --- a/example/README.md +++ b/example/README.md @@ -15,7 +15,7 @@ pandora_workflow_data/ : contains other input and configuration files to run the ### Dependencies -* [`singularity`](https://sylabs.io/) +* `md5sum`, `wget`, `GCC` 4.9+ (see [why](../README.md#no-installation-needed---precompiled-portable-binary)). ### Running ``` @@ -41,9 +41,7 @@ We can see samples `toy_sample_1` and `toy_sample_2` genotype towards different ### Dependencies -* [`singularity`](https://sylabs.io/) -* `git` -* `python 3.6+` +* [`singularity`](https://sylabs.io/), `git`, `python 3.6+` ### Running ``` diff --git a/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt b/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt new file mode 100644 index 00000000..e86378ea --- /dev/null +++ b/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt @@ -0,0 +1 @@ +3dfbf91158e3d1e8e1c9515800f8133e pandora-linux-precompiled-v0.8.0-alpha diff --git a/example/run_pandora_nodenovo.sh b/example/run_pandora_nodenovo.sh index 470f4580..2dd05a3c 100755 --- a/example/run_pandora_nodenovo.sh +++ b/example/run_pandora_nodenovo.sh @@ -1,4 +1,22 @@ #!/usr/bin/env bash set -eu -pandora_command="pandora index prgs/toy_prg.fa && pandora compare --genotype -o output_toy_example_no_denovo prgs/toy_prg.fa reads/read_index.tsv" -singularity exec docker://rmcolq/pandora:latest bash -c "${pandora_command}" + +# configs +pandora_URL="https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=1" +pandora_executable="./pandora-linux-precompiled-v0.8.0-alpha" +pandora_md5sum_file="./pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt" + + +if md5sum -c "${pandora_md5sum_file}"; then + # The MD5 sum match + echo "${pandora_executable} has correct MD5 sum, proceeding..." +else + # The MD5 sum didn't match + echo "${pandora_executable} does not exist or does not have correct MD5 sum, downloading..." + wget "${pandora_URL}" -O "${pandora_executable}" + chmod +x "${pandora_executable}" +fi + +"${pandora_executable}" index prgs/toy_prg.fa +"${pandora_executable}" compare --genotype -o output_toy_example_no_denovo prgs/toy_prg.fa reads/read_index.tsv + From 876ded7a5ccecae86fdda9f3c51fe7d7ec822379 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 16:10:09 +0000 Subject: [PATCH 33/37] Forgot to bump version, updating pre-release now --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8e00173..de938609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ HunterGate( # project configuration set(PROJECT_NAME_STR pandora) -project(${PROJECT_NAME_STR} VERSION "0.7.0" LANGUAGES C CXX) +project(${PROJECT_NAME_STR} VERSION "0.8.0" LANGUAGES C CXX) configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) # add or not feature to print the stack trace From 9b2e925ce40110c4cabb64b56e57e88924af713a Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 17:42:46 +0000 Subject: [PATCH 34/37] Updating v0.8.0-alpha pre-compiled binary link to github link --- README.md | 3 +-- example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt | 2 +- example/run_pandora_nodenovo.sh | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c223514f..c87b37b4 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,8 @@ can be found [here](https://gcc.gnu.org/onlinedocs/gfortran/OpenMP.html). * **Download**: ``` - wget "https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=1" -O pandora-linux-precompiled-v0.8.0-alpha + wget https://github.com/rmcolq/pandora/releases/download/v0.8.0-alpha/pandora-linux-precompiled-v0.8.0-alpha ``` - * **TODO: updated to a github link when we make the release;** * **Running**: ``` chmod +x pandora-linux-precompiled-v0.8.0-alpha diff --git a/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt b/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt index e86378ea..f45b2560 100644 --- a/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt +++ b/example/pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt @@ -1 +1 @@ -3dfbf91158e3d1e8e1c9515800f8133e pandora-linux-precompiled-v0.8.0-alpha +0703e724c62cfe41c048519f53298825 pandora-linux-precompiled-v0.8.0-alpha diff --git a/example/run_pandora_nodenovo.sh b/example/run_pandora_nodenovo.sh index 2dd05a3c..071cb5be 100755 --- a/example/run_pandora_nodenovo.sh +++ b/example/run_pandora_nodenovo.sh @@ -2,7 +2,7 @@ set -eu # configs -pandora_URL="https://www.dropbox.com/s/3ofko0q5fcec5xe/pandora-linux-precompiled-v0.8.0-alpha?dl=1" +pandora_URL="https://github.com/rmcolq/pandora/releases/download/v0.8.0-alpha/pandora-linux-precompiled-v0.8.0-alpha" pandora_executable="./pandora-linux-precompiled-v0.8.0-alpha" pandora_md5sum_file="./pandora-linux-precompiled-v0.8.0-alpha.md5sum.txt" From fa0228de7d28afdc51d08e9dc04007de81c7717e Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 17:45:53 +0000 Subject: [PATCH 35/37] Adding a news section to the README --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index c87b37b4..b03498b5 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ [TOC]: # # Table of Contents +- [News](#news) - [Introduction](#introduction) - [Quick Start](#quick-start) - [Hands-on toy example](#hands-on-toy-example) @@ -21,6 +22,12 @@ - [Usage](#usage) +## News + +* A new pre-release version is out: [v0.8.0-alpha](https://github.com/rmcolq/pandora/releases/tag/v0.8.0-alpha). +Note that this is yet an unstable version. The latest stable release is [v0.7.0](https://github.com/rmcolq/pandora/releases/tag/v0.7.0). + + ## Introduction Pandora is a tool for bacterial genome analysis using a pangenome reference graph (PanRG). It allows gene presence/absence detection and genotyping of SNPs, indels and longer variants in one or a number of samples. Pandora works with Illumina or Nanopore data. For more details, see [our paper][pandora_2020_paper]. From 2c67f17813fdb6cdcab3360dc3be1ac655783842 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 4 Mar 2021 17:45:53 +0000 Subject: [PATCH 36/37] Revert "Adding a news section to the README" This reverts commit fa0228de7d28afdc51d08e9dc04007de81c7717e. --- README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/README.md b/README.md index b03498b5..c87b37b4 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ [TOC]: # # Table of Contents -- [News](#news) - [Introduction](#introduction) - [Quick Start](#quick-start) - [Hands-on toy example](#hands-on-toy-example) @@ -22,12 +21,6 @@ - [Usage](#usage) -## News - -* A new pre-release version is out: [v0.8.0-alpha](https://github.com/rmcolq/pandora/releases/tag/v0.8.0-alpha). -Note that this is yet an unstable version. The latest stable release is [v0.7.0](https://github.com/rmcolq/pandora/releases/tag/v0.7.0). - - ## Introduction Pandora is a tool for bacterial genome analysis using a pangenome reference graph (PanRG). It allows gene presence/absence detection and genotyping of SNPs, indels and longer variants in one or a number of samples. Pandora works with Illumina or Nanopore data. For more details, see [our paper][pandora_2020_paper]. From f5d7a137df059af0ca9a64dc2ab673c743dae75a Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Mon, 8 Mar 2021 08:16:30 +0000 Subject: [PATCH 37/37] Updating .dockerignore and .gitignore --- .dockerignore | 5 ++++- .gitignore | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index e92dc343..1de79275 100644 --- a/.dockerignore +++ b/.dockerignore @@ -20,4 +20,7 @@ tmp/ cmake_install.cmake compile_commands.json Makefile -pandora.cbp \ No newline at end of file +pandora.cbp +build_portable_executable +pandora-linux-precompiled +/cmake-build-release/ diff --git a/.gitignore b/.gitignore index 16258254..bef1341e 100644 --- a/.gitignore +++ b/.gitignore @@ -115,6 +115,6 @@ example/pandora_workflow #portable binary build dir build_portable_executable - pandora-linux-precompiled + /cmake-build-release/