Skip to content

Commit

Permalink
Add uf-dress as inverse of uf-bare.
Browse files Browse the repository at this point in the history
  • Loading branch information
zwets committed Jan 26, 2016
1 parent ee2c0a3 commit 8847c50
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 3 deletions.
30 changes: 27 additions & 3 deletions uf-bare
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,32 @@ usage_exit() {
echo
echo "Usage: $(basename $0) [OPTIONS] [FILE] ..."
echo
echo " Drop the headers and output only the bare sequences from each FILE."
echo " If no FILE is present or FILE is '-', read from standard input."
echo " Output only the bare sequences from each FILE to stanard output, dropping the"
echo " headers. If no FILE is present or FILE is '-', read from standard input."
echo
echo " Options"
echo " -w HDRSFILE Write the headers to HDRSFILE, which must not already exist."
echo " -f|--force Overwrite HDRSFILE if it exists."
echo
echo " By default headers go to /dev/null. Use option -w to store them in a file."
echo " The file can be merged back in with bare sequences using 'uf-dress'."
echo
exit ${1:-1}
}

# Parse options

unset HDRSFILE FORCE

while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do
case $1 in
-w) shift
[ $# -ge 1 ] || usage_exit
HDRSFILE="$1"
;;
-f|--force)
FORCE="yes"
;;
--help)
usage_exit 0
;;
Expand All @@ -48,5 +64,13 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do
shift
done

awk -b -O 'NR%2==0'
# Check options validity

[ -z "$HDRSFILE" ] || [ -n "$FORCE" ] || [ ! -e "$HDRSFILE" ] || err_exit "file exists (use --force to overwrite): '$HDRSFILE'"

# Do the work

awk -b -O -v F="$HDRSFILE" '
NR%2==1 && F { print > "'"$HDRSFILE"'" }
NR%2==0' "$@"

78 changes: 78 additions & 0 deletions uf-dress
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/sh
#
# uf-dress - Turn bare sequence data into an unfasta file.
# Copyright (C) 2016 Marco van Zwetselaar <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Created on 2016-01-25

# Function to exit this script with an error message on stderr
err_exit() {
echo "$(basename "$0"): $*" >&2
exit 1
}

# Function to show usage information and exit
usage_exit() {
echo
echo "Usage: $(basename $0) [OPTIONS] [FILE] ..."
echo
echo " Insert headers to turn bare sequence FILE(s) into valid unfasta. If no FILE or"
echo " FILE is '-' read from standard input. Write unfasta to standard output."
echo
echo " Options"
echo " -r HDRSFILE Optional file to read header lines from. If not specified then"
echo " default headers 'lcl|NUM' will be generated."
echo
echo " The HDRSFILE could be a file previously written by 'uf-bare', or output from"
echo " the 'uf-hdrs' filter. Tip: use bash process substitution to avoid temp files:"
echo
echo ' $ uf file.fna | uf-bare | ..processing.. | uf-dress -r <(uf file.fna | uf-hdrs)'
echo
exit ${1:-1}
}

# Parse options

unset HDRSFILE

while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do
case $1 in
-r) shift
[ $# -ge 1 ] || usage_exit
HDRSFILE="$1"
;;
--help)
usage_exit 0
;;
*) usage_exit
;;
esac
shift
done

# Check options validity

[ -z "$HDRSFILE" ] || [ -r "$HDRSFILE" ] || err_exit "cannot read file: '$HDRSFILE'"

# Do the work

awk -b -O -v F="$HDRSFILE" '{
if (F) getline HDR < F; else HDR = ">lcl|" NR
print HDR
print
}
END { if (F) close (F) }' "$@"

0 comments on commit 8847c50

Please sign in to comment.