diff --git a/README.md b/README.md index 00d53379..15911038 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,13 @@ If you can't find an answer in the VSEARCH documentation, please visit the [VSEA In the example below, VSEARCH will identify sequences in the file database.fsa that are at least 90% identical on the plus strand to the query sequences in the file queries.fsa and write the results to the file alnout.txt. -`./vsearch-1.0.6-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt` +`./vsearch-1.0.7-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt` ## Download and install The latest releases of VSEARCH are available [here](https://github.com/torognes/vsearch/releases). -Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.6-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.6-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2). +Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.7-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.7-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2). Download the appropriate executable and make a symbolic link in a folder included in your `$PATH` from `vsearch` to the appropriate binary. You may use the following commands (assuming `~/bin` is in your `$PATH`): @@ -44,8 +44,8 @@ Download the appropriate executable and make a symbolic link in a folder include cd ~ mkdir -p bin cd bin -wget https://github.com/torognes/vsearch/releases/download/v1.0.6/vsearch-1.0.6-linux-x86_64 -ln -s vsearch-1.0.6-linux-x86_64 vsearch +wget https://github.com/torognes/vsearch/releases/download/v1.0.7/vsearch-1.0.7-linux-x86_64 +ln -s vsearch-1.0.7-linux-x86_64 vsearch ``` Substitute `linux` with `osx` in those lines if you're on a Mac. diff --git a/doc/vsearch.1 b/doc/vsearch.1 index 9e19b151..f8bc892a 100644 --- a/doc/vsearch.1 +++ b/doc/vsearch.1 @@ -1,5 +1,5 @@ .\" ============================================================================ -.TH vsearch 1 "December 14, 2014" "version 1.0.6" "USER COMMANDS" +.TH vsearch 1 "December 19, 2014" "version 1.0.7" "USER COMMANDS" .\" ============================================================================ .SH NAME vsearch \(em chimera detection, clustering, dereplication, masking, pairwise alignment, searching, shuffling and sorting of amplicons from metagenomic projects. @@ -1411,6 +1411,9 @@ Fixes a minor bug with --allpairs_global and --acceptall options. .TP .BR v1.0.6\~ "released December 14th, 2014" Fixes a memory allocation bug in chimera detection (--uchime_ref option). +.TP +.BR v1.0.7\~ "released December 19th, 2014" +Fixes a bug in the output from chimera detection with the --uchimeout option. .LP .\" ============================================================================ .\" TODO: diff --git a/doc/vsearch_manual.pdf b/doc/vsearch_manual.pdf index ce0d63ef..f5a85138 100644 Binary files a/doc/vsearch_manual.pdf and b/doc/vsearch_manual.pdf differ diff --git a/eval/stats.pl b/eval/stats.pl index e2bf937d..ce10c013 100755 --- a/eval/stats.pl +++ b/eval/stats.pl @@ -40,7 +40,7 @@ close I; my $pos = $truepos + $falsepos; -my $falseneg = $gold - $ truepos; +my $falseneg = $gold - $truepos; my $prec = 100.0 * $truepos / $pos; my $fdr = 100.0 * $falsepos / $pos; my $recall = 100.0 * $truepos / $gold; diff --git a/src/align_simd.cc b/src/align_simd.cc index 595f6823..6de3a41c 100644 --- a/src/align_simd.cc +++ b/src/align_simd.cc @@ -732,7 +732,7 @@ void search16(s16info_s * s, __m128i M_QR_target_left, M_R_target_left; - __m128i QR_query_left, R_query_left; + __m128i R_query_left; __m128i QR_query_interior, R_query_interior; __m128i QR_query_right, R_query_right; __m128i QR_target_left, R_target_left; @@ -760,8 +760,6 @@ void search16(s16info_s * s, T0 = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0xffff); - QR_query_left = _mm_set1_epi16(s->penalty_gap_open_query_left + - s->penalty_gap_extension_query_left); R_query_left = _mm_set1_epi16(s->penalty_gap_extension_query_left); QR_query_interior = _mm_set1_epi16(s->penalty_gap_open_query_interior + diff --git a/src/chimera.cc b/src/chimera.cc index 43b6922c..2402cbc0 100644 --- a/src/chimera.cc +++ b/src/chimera.cc @@ -337,6 +337,8 @@ int find_best_parents(struct chimera_info_s * ci) int eval_parents(struct chimera_info_s * ci) { + int status = 1; + /* create msa */ /* find max insertions in front of each position in the query sequence */ @@ -622,11 +624,10 @@ int eval_parents(struct chimera_info_s * ci) } } - int ischimeric = 0; - int isnonchimeric = 0; - if (best_h >= 0.0) { + status = 2; + /* flip A and B if necessary */ if (best_reverse) @@ -733,19 +734,18 @@ int eval_parents(struct chimera_info_s * ci) if (best_h >= opt_minh) { + status = 3; if ((divdiff >= opt_mindiv) && (sumL >= opt_mindiffs) && (sumR >= opt_mindiffs)) - ischimeric = 1; + status = 4; } - else - isnonchimeric = 1; /* print alignment */ pthread_mutex_lock(&mutex_output); - if (opt_uchimealns && ischimeric) + if (opt_uchimealns && (status == 4)) { fprintf(fp_uchimealns, "\n"); fprintf(fp_uchimealns, "----------------------------------------" @@ -851,7 +851,7 @@ int eval_parents(struct chimera_info_s * ci) best_right_n, best_right_a, divdiff, - ischimeric ? 'Y' : (isnonchimeric ? 'N' : '?')); + status == 4 ? 'Y' : (status == 2 ? 'N' : '?')); } else { @@ -875,24 +875,24 @@ int eval_parents(struct chimera_info_s * ci) best_right_n, best_right_a, divdiff, - ischimeric ? 'Y' : (isnonchimeric ? 'N' : '?')); + status == 4 ? 'Y' : (status == 2 ? 'N' : '?')); } } pthread_mutex_unlock(&mutex_output); } - else - { - isnonchimeric = 1; - } - - if (ischimeric) - return 1; - else if (isnonchimeric) - return -1; - else - return 0; + + return status; } +/* + new chimeric status: + 0: no parents, non-chimeric + 1: score < 0 (no alignment), non-chimeric + 2: score < minh, non-chimeric + 3: score >= minh, suspicious + 4: score >= minh && (divdiff >= opt_mindiv) && ..., chimeric +*/ + void query_init(struct searchinfo_s * si) { si->qsequence = 0; @@ -1152,13 +1152,13 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci) if (find_best_parents(ci)) status = eval_parents(ci); else - status = -1; + status = 0; /* output results */ pthread_mutex_lock(&mutex_output); - if (status > 0) + if (status == 4) { chimera_count++; @@ -1170,12 +1170,12 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci) } } - if (status < 0) + if (status < 3) { nonchimera_count++; /* output no parents, no chimeras */ - if (opt_uchimeout) + if ((status < 2) && opt_uchimeout) { if (opt_uchimeout5) fprintf(fp_uchimeout, diff --git a/src/db.cc b/src/db.cc index 00fe805c..da752aae 100644 --- a/src/db.cc +++ b/src/db.cc @@ -82,7 +82,7 @@ void db_read(const char * filename, int upcase) rewind(fp); char * prompt; - asprintf(& prompt, "Reading file %s", filename); + (void) asprintf(& prompt, "Reading file %s", filename); progress_init(prompt, filesize); #ifdef HAVE_BZLIB @@ -124,7 +124,7 @@ void db_read(const char * filename, int upcase) switch (db_format) { case FORMAT_PLAIN: - fgets(line, LINEALLOC, fp); + (void) fgets(line, LINEALLOC, fp); break; case FORMAT_BZIP: #ifdef HAVE_BZLIB @@ -205,7 +205,7 @@ void db_read(const char * filename, int upcase) switch (db_format) { case FORMAT_PLAIN: - fgets(line, LINEALLOC, fp); + (void) fgets(line, LINEALLOC, fp); break; case FORMAT_BZIP: #ifdef HAVE_BZLIB @@ -294,7 +294,7 @@ void db_read(const char * filename, int upcase) switch (db_format) { case FORMAT_PLAIN: - fgets(line, LINEALLOC, fp); + (void) fgets(line, LINEALLOC, fp); break; case FORMAT_BZIP: #ifdef HAVE_BZLIB diff --git a/src/query.cc b/src/query.cc index 0fc94558..95f35470 100644 --- a/src/query.cc +++ b/src/query.cc @@ -84,7 +84,7 @@ static char * FGETS(char * query_line, int size) switch (query_format) { case FORMAT_PLAIN: - fgets(query_line, size, query_fp); + (void) fgets(query_line, size, query_fp); break; case FORMAT_BZIP: #ifdef HAVE_BZLIB diff --git a/src/sortbysize.cc b/src/sortbysize.cc index 3e054837..7456e18d 100644 --- a/src/sortbysize.cc +++ b/src/sortbysize.cc @@ -75,12 +75,12 @@ void sortbysize() for(int i=0; i= opt_minsize) && (size <= opt_maxsize)) { sortinfo[passed].seqno = i; - sortinfo[passed].size = size; + sortinfo[passed].size = (unsigned int) size; passed++; } progress_update(i); diff --git a/src/util.cc b/src/util.cc index d9ed8800..cccaf69a 100644 --- a/src/util.cc +++ b/src/util.cc @@ -91,7 +91,7 @@ void * xmalloc(size_t size) { const size_t alignment = 16; void * t; - posix_memalign(& t, alignment, size); + (void) posix_memalign(& t, alignment, size); if (t==NULL) fatal("Unable to allocate enough memory."); diff --git a/src/vsearch.cc b/src/vsearch.cc index 40f78236..ab381ad6 100644 --- a/src/vsearch.cc +++ b/src/vsearch.cc @@ -439,7 +439,7 @@ void args_init(int argc, char **argv) opt_maxqsize = INT_MAX; opt_maxqt = DBL_MAX; opt_maxrejects = -1; - opt_maxseqlength = 50000; + opt_maxseqlength = 15000; opt_maxsize = LONG_MAX; opt_maxsizeratio = DBL_MAX; opt_maxsl = DBL_MAX; diff --git a/src/vsearch.h b/src/vsearch.h index 3647ea89..6f00a0e6 100644 --- a/src/vsearch.h +++ b/src/vsearch.h @@ -102,7 +102,7 @@ #define FORMAT_GZIP 3 #define PROG_NAME "vsearch" -#define PROG_VERSION "v1.0.6" +#define PROG_VERSION "v1.0.7" /* options */ diff --git a/test/clusterf.sh b/test/clusterf.sh index 9af18887..5caf16ae 100755 --- a/test/clusterf.sh +++ b/test/clusterf.sh @@ -2,9 +2,9 @@ P=$1 -INPUT=../data/Rfam_9_1.fasta +#INPUT=../data/Rfam_9_1.fasta #INPUT=../data/AF091148.fsa -#INPUT=../data/BioMarKs50k.fsa +INPUT=../data/BioMarKs50k.fsa THREADS=0 diff --git a/test/clusters.sh b/test/clusters.sh index c8d72272..779cd69a 100755 --- a/test/clusters.sh +++ b/test/clusters.sh @@ -29,7 +29,6 @@ CMD="/usr/bin/time $PROG \ --id 0.9 \ --maxaccepts 1 \ --maxrejects 8 \ - --sizein \ --sizeout \ --centroids s.$P.centroids \ --uc s.$P.uc \