From 989fc49f97ee6d4985f42f15cebf51d5fc555575 Mon Sep 17 00:00:00 2001 From: Marco van Zwetselaar Date: Wed, 27 Jan 2016 01:32:40 +0300 Subject: [PATCH] Fix the race and deadlock in uf-map, refer from uf-dress. --- uf-bare | 2 +- uf-dress | 16 +++++++++++----- uf-map | 36 ++++++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/uf-bare b/uf-bare index 0a9c40b..c4916a3 100755 --- a/uf-bare +++ b/uf-bare @@ -71,6 +71,6 @@ done # Do the work awk -b -O -v F="$HDRSFILE" ' - NR%2==1 && F { print > "'"$HDRSFILE"'" } + NR%2==1 && F { print > F } NR%2==0' "$@" diff --git a/uf-dress b/uf-dress index c737b96..6e85a5b 100755 --- a/uf-dress +++ b/uf-dress @@ -29,18 +29,22 @@ usage_exit() { echo echo "Usage: $(basename $0) [OPTIONS] [FILE] ..." echo - echo " Insert headers to turn bare sequence FILE(s) into valid unfasta. If no FILE or" - echo " FILE is '-' read from standard input. Write unfasta to standard output." + echo " Insert headers to turn bare sequences into valid unfasta. Reads each line from" + echo " each FILE and writes it to standard output preceded by a header line. If no + echo " FILE is present or FILE is '-' read from standard input." echo echo " Options" - echo " -r HDRSFILE Optional file to read header lines from. If not specified then" - echo " default headers 'lcl|NUM' will be generated." + echo " -r HDRSFILE Optional file to read header lines from. If not specified or if" + echo " this file has fewer header lines than sequences, then dummy header" + echo " lines are generated for the remaining sequences." echo echo " The HDRSFILE could be a file previously written by 'uf-bare', or output from" echo " the 'uf-hdrs' filter. Tip: use bash process substitution to avoid temp files:" echo echo ' $ uf file.fna | uf-bare | ..processing.. | uf-dress -r <(uf file.fna | uf-hdrs)' echo + echo " See also: 'uf-map' which implements this idiom in a single command." + echo exit ${1:-1} } @@ -70,7 +74,9 @@ done # Do the work awk -b -O -v F="$HDRSFILE" '{ - if (F) getline HDR < F; else HDR = ">lcl|" NR + HDR = "" + if (F) getline HDR < F + if (!HDR) HDR = ">lcl|" NR " Dummy header " NR print HDR print } diff --git a/uf-map b/uf-map index f25d17a..f51ece2 100755 --- a/uf-map +++ b/uf-map @@ -62,7 +62,39 @@ done OPERATION="$1" shift -# AWK it +# Do it -awk -b -O "NR%2==1 { print }; NR%2==0 { print | \"$OPERATION\" }" +# FAIL 1: using pipe within awk +# Fails because of race: external operation is buffering its output, so lines do not come out in the right order. +#awk -b -O -v OPER="sh -c '$OPERATION'" ' +# NR%2==1 { print } +# NR%2==0 { print | OPER } +# { fflush() }' "$@" + +# FAIL 2: using a FIFO between uf-bare and uf-dress +# Fails because of deadlock: uf-bare blocks on FIFO until uf-dress consumes, +# but uf-dress is waiting for standard input before consuming from FIFO. +#FIFO="/tmp/$(basename "$0").$$" +#mkfifo "$FIFO" +#./uf-bare -f -w "$FIFO" | ./uf-dress -r "$FIFO" +#RETVAL=$? +#rm -f $FIFO + +# Maybe using paste to pull things together? + +# Set pipefail to have the errorcode of the rightmost failing filter (OPERATION) +[ -z "$BASH" ] set +o pipefail + +# Make a FIFO to hold the headers and from which paste merges them with OPERATION standard output +FIFO="/tmp/$(basename "$0").$$" +mkfifo "$FIFO" + +# Run the pipeline +awk -b -O -v FIFO="$FIFO" 'NR%2==1 { print > FIFO }; NR%2==0' "$@" | $OPERATION | paste -d '\n' "$FIFO" - +RETVAL=$? + +# Clean up - @TODO@ set a shell trap, is cleaner +rm -f $FIFO + +exit $RETVAL