Skip to content

Commit

Permalink
VSEARCH 1.0.7: Fixed bug in output from chimera detection with --uchi…
Browse files Browse the repository at this point in the history
…meout option, reduced default for --maxseqlength option, and removed some compiler warnings
  • Loading branch information
torognes committed Dec 19, 2014
1 parent b8d4828 commit e390b73
Show file tree
Hide file tree
Showing 14 changed files with 46 additions and 46 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ If you can't find an answer in the VSEARCH documentation, please visit the [VSEA

In the example below, VSEARCH will identify sequences in the file database.fsa that are at least 90% identical on the plus strand to the query sequences in the file queries.fsa and write the results to the file alnout.txt.

`./vsearch-1.0.6-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`
`./vsearch-1.0.7-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`

## Download and install

The latest releases of VSEARCH are available [here](https://github.com/torognes/vsearch/releases).

Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.6-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.6-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).
Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.7-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.7-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).

Download the appropriate executable and make a symbolic link in a folder included in your `$PATH` from `vsearch` to the appropriate binary. You may use the following commands (assuming `~/bin` is in your `$PATH`):

```sh
cd ~
mkdir -p bin
cd bin
wget https://github.com/torognes/vsearch/releases/download/v1.0.6/vsearch-1.0.6-linux-x86_64
ln -s vsearch-1.0.6-linux-x86_64 vsearch
wget https://github.com/torognes/vsearch/releases/download/v1.0.7/vsearch-1.0.7-linux-x86_64
ln -s vsearch-1.0.7-linux-x86_64 vsearch
```

Substitute `linux` with `osx` in those lines if you're on a Mac.
Expand Down
5 changes: 4 additions & 1 deletion doc/vsearch.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.\" ============================================================================
.TH vsearch 1 "December 14, 2014" "version 1.0.6" "USER COMMANDS"
.TH vsearch 1 "December 19, 2014" "version 1.0.7" "USER COMMANDS"
.\" ============================================================================
.SH NAME
vsearch \(em chimera detection, clustering, dereplication, masking, pairwise alignment, searching, shuffling and sorting of amplicons from metagenomic projects.
Expand Down Expand Up @@ -1411,6 +1411,9 @@ Fixes a minor bug with --allpairs_global and --acceptall options.
.TP
.BR v1.0.6\~ "released December 14th, 2014"
Fixes a memory allocation bug in chimera detection (--uchime_ref option).
.TP
.BR v1.0.7\~ "released December 19th, 2014"
Fixes a bug in the output from chimera detection with the --uchimeout option.
.LP
.\" ============================================================================
.\" TODO:
Expand Down
Binary file modified doc/vsearch_manual.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion eval/stats.pl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
close I;

my $pos = $truepos + $falsepos;
my $falseneg = $gold - $ truepos;
my $falseneg = $gold - $truepos;
my $prec = 100.0 * $truepos / $pos;
my $fdr = 100.0 * $falsepos / $pos;
my $recall = 100.0 * $truepos / $gold;
Expand Down
4 changes: 1 addition & 3 deletions src/align_simd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ void search16(s16info_s * s,

__m128i M_QR_target_left, M_R_target_left;

__m128i QR_query_left, R_query_left;
__m128i R_query_left;
__m128i QR_query_interior, R_query_interior;
__m128i QR_query_right, R_query_right;
__m128i QR_target_left, R_target_left;
Expand Down Expand Up @@ -760,8 +760,6 @@ void search16(s16info_s * s,

T0 = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0xffff);

QR_query_left = _mm_set1_epi16(s->penalty_gap_open_query_left +
s->penalty_gap_extension_query_left);
R_query_left = _mm_set1_epi16(s->penalty_gap_extension_query_left);

QR_query_interior = _mm_set1_epi16(s->penalty_gap_open_query_interior +
Expand Down
48 changes: 24 additions & 24 deletions src/chimera.cc
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ int find_best_parents(struct chimera_info_s * ci)

int eval_parents(struct chimera_info_s * ci)
{
int status = 1;

/* create msa */

/* find max insertions in front of each position in the query sequence */
Expand Down Expand Up @@ -622,11 +624,10 @@ int eval_parents(struct chimera_info_s * ci)
}
}

int ischimeric = 0;
int isnonchimeric = 0;

if (best_h >= 0.0)
{
status = 2;

/* flip A and B if necessary */

if (best_reverse)
Expand Down Expand Up @@ -733,19 +734,18 @@ int eval_parents(struct chimera_info_s * ci)

if (best_h >= opt_minh)
{
status = 3;
if ((divdiff >= opt_mindiv) &&
(sumL >= opt_mindiffs) &&
(sumR >= opt_mindiffs))
ischimeric = 1;
status = 4;
}
else
isnonchimeric = 1;

/* print alignment */

pthread_mutex_lock(&mutex_output);

if (opt_uchimealns && ischimeric)
if (opt_uchimealns && (status == 4))
{
fprintf(fp_uchimealns, "\n");
fprintf(fp_uchimealns, "----------------------------------------"
Expand Down Expand Up @@ -851,7 +851,7 @@ int eval_parents(struct chimera_info_s * ci)
best_right_n,
best_right_a,
divdiff,
ischimeric ? 'Y' : (isnonchimeric ? 'N' : '?'));
status == 4 ? 'Y' : (status == 2 ? 'N' : '?'));
}
else
{
Expand All @@ -875,24 +875,24 @@ int eval_parents(struct chimera_info_s * ci)
best_right_n,
best_right_a,
divdiff,
ischimeric ? 'Y' : (isnonchimeric ? 'N' : '?'));
status == 4 ? 'Y' : (status == 2 ? 'N' : '?'));
}
}
pthread_mutex_unlock(&mutex_output);
}
else
{
isnonchimeric = 1;
}

if (ischimeric)
return 1;
else if (isnonchimeric)
return -1;
else
return 0;

return status;
}

/*
new chimeric status:
0: no parents, non-chimeric
1: score < 0 (no alignment), non-chimeric
2: score < minh, non-chimeric
3: score >= minh, suspicious
4: score >= minh && (divdiff >= opt_mindiv) && ..., chimeric
*/

void query_init(struct searchinfo_s * si)
{
si->qsequence = 0;
Expand Down Expand Up @@ -1152,13 +1152,13 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci)
if (find_best_parents(ci))
status = eval_parents(ci);
else
status = -1;
status = 0;

/* output results */

pthread_mutex_lock(&mutex_output);

if (status > 0)
if (status == 4)
{
chimera_count++;

Expand All @@ -1170,12 +1170,12 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci)
}
}

if (status < 0)
if (status < 3)
{
nonchimera_count++;

/* output no parents, no chimeras */
if (opt_uchimeout)
if ((status < 2) && opt_uchimeout)
{
if (opt_uchimeout5)
fprintf(fp_uchimeout,
Expand Down
8 changes: 4 additions & 4 deletions src/db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void db_read(const char * filename, int upcase)
rewind(fp);

char * prompt;
asprintf(& prompt, "Reading file %s", filename);
(void) asprintf(& prompt, "Reading file %s", filename);
progress_init(prompt, filesize);

#ifdef HAVE_BZLIB
Expand Down Expand Up @@ -124,7 +124,7 @@ void db_read(const char * filename, int upcase)
switch (db_format)
{
case FORMAT_PLAIN:
fgets(line, LINEALLOC, fp);
(void) fgets(line, LINEALLOC, fp);
break;
case FORMAT_BZIP:
#ifdef HAVE_BZLIB
Expand Down Expand Up @@ -205,7 +205,7 @@ void db_read(const char * filename, int upcase)
switch (db_format)
{
case FORMAT_PLAIN:
fgets(line, LINEALLOC, fp);
(void) fgets(line, LINEALLOC, fp);
break;
case FORMAT_BZIP:
#ifdef HAVE_BZLIB
Expand Down Expand Up @@ -294,7 +294,7 @@ void db_read(const char * filename, int upcase)
switch (db_format)
{
case FORMAT_PLAIN:
fgets(line, LINEALLOC, fp);
(void) fgets(line, LINEALLOC, fp);
break;
case FORMAT_BZIP:
#ifdef HAVE_BZLIB
Expand Down
2 changes: 1 addition & 1 deletion src/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ static char * FGETS(char * query_line, int size)
switch (query_format)
{
case FORMAT_PLAIN:
fgets(query_line, size, query_fp);
(void) fgets(query_line, size, query_fp);
break;
case FORMAT_BZIP:
#ifdef HAVE_BZLIB
Expand Down
4 changes: 2 additions & 2 deletions src/sortbysize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ void sortbysize()

for(int i=0; i<dbsequencecount; i++)
{
unsigned int size = db_getabundance(i);
long size = db_getabundance(i);

if((size >= opt_minsize) && (size <= opt_maxsize))
{
sortinfo[passed].seqno = i;
sortinfo[passed].size = size;
sortinfo[passed].size = (unsigned int) size;
passed++;
}
progress_update(i);
Expand Down
2 changes: 1 addition & 1 deletion src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void * xmalloc(size_t size)
{
const size_t alignment = 16;
void * t;
posix_memalign(& t, alignment, size);
(void) posix_memalign(& t, alignment, size);

if (t==NULL)
fatal("Unable to allocate enough memory.");
Expand Down
2 changes: 1 addition & 1 deletion src/vsearch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ void args_init(int argc, char **argv)
opt_maxqsize = INT_MAX;
opt_maxqt = DBL_MAX;
opt_maxrejects = -1;
opt_maxseqlength = 50000;
opt_maxseqlength = 15000;
opt_maxsize = LONG_MAX;
opt_maxsizeratio = DBL_MAX;
opt_maxsl = DBL_MAX;
Expand Down
2 changes: 1 addition & 1 deletion src/vsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
#define FORMAT_GZIP 3

#define PROG_NAME "vsearch"
#define PROG_VERSION "v1.0.6"
#define PROG_VERSION "v1.0.7"

/* options */

Expand Down
4 changes: 2 additions & 2 deletions test/clusterf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

P=$1

INPUT=../data/Rfam_9_1.fasta
#INPUT=../data/Rfam_9_1.fasta
#INPUT=../data/AF091148.fsa
#INPUT=../data/BioMarKs50k.fsa
INPUT=../data/BioMarKs50k.fsa

THREADS=0

Expand Down
1 change: 0 additions & 1 deletion test/clusters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ CMD="/usr/bin/time $PROG \
--id 0.9 \
--maxaccepts 1 \
--maxrejects 8 \
--sizein \
--sizeout \
--centroids s.$P.centroids \
--uc s.$P.uc \
Expand Down

0 comments on commit e390b73

Please sign in to comment.