diff --git a/src/TN93.cpp b/src/TN93.cpp index 645d949..947b90d 100644 --- a/src/TN93.cpp +++ b/src/TN93.cpp @@ -330,7 +330,7 @@ int main(int argc, const char *argv[]) { args, nameLengths, names, pairwise, percentDone, cerr, max, randFlag, \ distanceMatrix, upperBound, seqLengthInFile1, seqLengthInFile2, mean, \ randSeqs, weighted_counts, do_fst, randomized_fst, randomized_idx, \ - recounts, cross_comparison_only, report_self) + recounts, cross_comparison_only, report_self) { @@ -341,8 +341,9 @@ int main(int argc, const char *argv[]) { } } -#pragma omp for schedule(dynamic) +#pragma omp for schedule(guided) for (long seq1 = 0; seq1 < upperBound; seq1++) { + long mapped_id = randomized_fst ? randomized_idx.value(seq1) : (randSeqs ? randSeqs[seq1] : seq1); @@ -402,6 +403,8 @@ int main(int argc, const char *argv[]) { } } + + double thisD = sequence_descriptors ? computeTN93(s1, stringText(sequences, seqLengths, mapped_id2), @@ -415,6 +418,7 @@ int main(int argc, const char *argv[]) { args.overlap, &(histogram_counts[which_bin][0]), HISTOGRAM_SLICE, HISTOGRAM_BINS, weighted_count); + if (thisD >= -1.e-10 && thisD <= args.distance) { local_links_found += weighted_count; // char *s2 = stringText(sequences, seqLengths, seq1); @@ -488,7 +492,7 @@ int main(int argc, const char *argv[]) { } } } - } + } // end of the parallel loop if (args.do_count == false && args.format == hyphy) { fprintf(args.output, "{"); @@ -523,6 +527,14 @@ int main(int argc, const char *argv[]) { << endl; (*outStream) << "\t\"Links found\" : " << foundLinks << ',' << endl; (*outStream) << "\t\"Maximum distance\" : " << max[0] << ',' << endl; + + if (args.input2 == NULL) { + (*outStream) << "\t\"Sequences\" : " << sequenceCount << ',' << endl; + } else { + (*outStream) << "\t\"Sequences in first file\" : " << seqLengthInFile1 << ',' << endl; + (*outStream) << "\t\"Sequences in second file\" : " << seqLengthInFile2 << ',' << endl; + } + if (do_fst) { const char *keys[4] = {"File 1", "File 2", "Between", "Combined"}; for (unsigned long k = 0; k < 3; k++) { diff --git a/src/tn93_shared.cc b/src/tn93_shared.cc index 6107e00..696569b 100644 --- a/src/tn93_shared.cc +++ b/src/tn93_shared.cc @@ -360,8 +360,11 @@ long perfect_match (const char* source, char* target, const long sequence_length struct sequence_gap_structure describe_sequence (const char* source, const unsigned long sequence_length, const unsigned long char_count) { sequence_gap_structure result; - unsigned long start_run = sequence_length + 1UL, - end_run = 0L; + long start_run = sequence_length + 1UL, + end_run = 0L; + + result.resolved_end = end_run; + result.resolved_start = start_run; for (unsigned long char_idx = 0UL; char_idx < sequence_length; char_idx++) { unsigned long this_char = source[char_idx]; @@ -397,6 +400,10 @@ struct sequence_gap_structure describe_sequence (const char* source, const unsig result.resolved_end = end_run; } } + + //cout << sequence_length << "/" << char_count << endl << result.first_nongap << "," + // << result.last_nongap << " : " << result.resolved_start << "," << result.resolved_end << endl; + return result; } @@ -432,12 +439,17 @@ double computeTN93 (const char * s1, const char * s2, const unsigned long L, c if (sequence_descriptor1 && sequence_descriptor2 && matchMode != GAPMM) { + +//#pragma omp critical +// cout << "HERE" << endl; + + unsigned long first_nongap = MAX (sequence_descriptor1->first_nongap, sequence_descriptor2->first_nongap), last_nongap = MIN (sequence_descriptor1->last_nongap, sequence_descriptor2->last_nongap), span_start = MAX (sequence_descriptor1->resolved_start, sequence_descriptor2->resolved_start), span_end = MIN (sequence_descriptor1->resolved_end, sequence_descriptor2->resolved_end); - - //cout << first_nongap << " " << last_nongap << " " << span_start << " " << span_end << endl; +//#pragma omp critical +// cout << first_nongap << " " << last_nongap << " " << span_start << " " << span_end << endl; if (span_start > span_end) {