From 8847c5070b5309a066f05b807c2b76f07abbe680 Mon Sep 17 00:00:00 2001 From: Marco van Zwetselaar Date: Tue, 26 Jan 2016 23:33:17 +0300 Subject: [PATCH] Add uf-dress as inverse of uf-bare. --- uf-bare | 30 +++++++++++++++++++--- uf-dress | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 3 deletions(-) create mode 100755 uf-dress diff --git a/uf-bare b/uf-bare index 2816c1b..0a9c40b 100755 --- a/uf-bare +++ b/uf-bare @@ -29,16 +29,32 @@ usage_exit() { echo echo "Usage: $(basename $0) [OPTIONS] [FILE] ..." echo - echo " Drop the headers and output only the bare sequences from each FILE." - echo " If no FILE is present or FILE is '-', read from standard input." + echo " Output only the bare sequences from each FILE to stanard output, dropping the" + echo " headers. If no FILE is present or FILE is '-', read from standard input." + echo + echo " Options" + echo " -w HDRSFILE Write the headers to HDRSFILE, which must not already exist." + echo " -f|--force Overwrite HDRSFILE if it exists." echo + echo " By default headers go to /dev/null. Use option -w to store them in a file." + echo " The file can be merged back in with bare sequences using 'uf-dress'." + echo exit ${1:-1} } # Parse options +unset HDRSFILE FORCE + while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do case $1 in + -w) shift + [ $# -ge 1 ] || usage_exit + HDRSFILE="$1" + ;; + -f|--force) + FORCE="yes" + ;; --help) usage_exit 0 ;; @@ -48,5 +64,13 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do shift done -awk -b -O 'NR%2==0' +# Check options validity + +[ -z "$HDRSFILE" ] || [ -n "$FORCE" ] || [ ! -e "$HDRSFILE" ] || err_exit "file exists (use --force to overwrite): '$HDRSFILE'" + +# Do the work + +awk -b -O -v F="$HDRSFILE" ' + NR%2==1 && F { print > "'"$HDRSFILE"'" } + NR%2==0' "$@" diff --git a/uf-dress b/uf-dress new file mode 100755 index 0000000..c737b96 --- /dev/null +++ b/uf-dress @@ -0,0 +1,78 @@ +#!/bin/sh +# +# uf-dress - Turn bare sequence data into an unfasta file. +# Copyright (C) 2016 Marco van Zwetselaar +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Created on 2016-01-25 + +# Function to exit this script with an error message on stderr +err_exit() { + echo "$(basename "$0"): $*" >&2 + exit 1 +} + +# Function to show usage information and exit +usage_exit() { + echo + echo "Usage: $(basename $0) [OPTIONS] [FILE] ..." + echo + echo " Insert headers to turn bare sequence FILE(s) into valid unfasta. If no FILE or" + echo " FILE is '-' read from standard input. Write unfasta to standard output." + echo + echo " Options" + echo " -r HDRSFILE Optional file to read header lines from. If not specified then" + echo " default headers 'lcl|NUM' will be generated." + echo + echo " The HDRSFILE could be a file previously written by 'uf-bare', or output from" + echo " the 'uf-hdrs' filter. Tip: use bash process substitution to avoid temp files:" + echo + echo ' $ uf file.fna | uf-bare | ..processing.. | uf-dress -r <(uf file.fna | uf-hdrs)' + echo + exit ${1:-1} +} + +# Parse options + +unset HDRSFILE + +while [ $# -ne 0 -a "$(expr "$1" : '\(.\).*')" = "-" ]; do + case $1 in + -r) shift + [ $# -ge 1 ] || usage_exit + HDRSFILE="$1" + ;; + --help) + usage_exit 0 + ;; + *) usage_exit + ;; + esac + shift +done + +# Check options validity + +[ -z "$HDRSFILE" ] || [ -r "$HDRSFILE" ] || err_exit "cannot read file: '$HDRSFILE'" + +# Do the work + +awk -b -O -v F="$HDRSFILE" '{ + if (F) getline HDR < F; else HDR = ">lcl|" NR + print HDR + print + } + END { if (F) close (F) }' "$@" +