From 32841e159d8558bd8ac3a0125e8704a58578c2cb Mon Sep 17 00:00:00 2001 From: Marco van Zwetselaar Date: Mon, 28 Dec 2020 10:47:33 +0300 Subject: [PATCH] [all] explicitly use GNU awk as gawk --- README.md | 2 +- uf | 4 ++-- uf-bare | 2 +- uf-circut | 4 ++-- uf-cut | 4 ++-- uf-dress | 2 +- uf-drop | 4 ++-- uf-freqs | 2 +- uf-hash | 2 +- uf-headers | 2 +- uf-map | 6 +++--- uf-rc | 2 +- uf-select | 6 +++--- uf-sort | 4 ++-- uf-take | 4 ++-- uf-valid | 2 +- 16 files changed, 26 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index c5a091b..536bbf8 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ Find [Unfasta on GitHub](https://github.com/zwets/unfasta). |`uf-take`| Take the initial N elements from a sequence, or take elements until N are left | |`uf-valid`| Validate an unfasta stream against its allowed alphabet and NCBI conventions | -Each has a `-h|--help` option for usage instructions. All depend only on `awk` and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension. +Each has a `-h|--help` option for usage instructions. All depend only on GNU awk (`gawk`) and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension. ## Design principles diff --git a/uf b/uf index df63149..770bbc3 100755 --- a/uf +++ b/uf @@ -78,11 +78,11 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do done if [ -n "$REVERT" ]; then - awk -b -O -v WIDTH=80 -v L=$LOWER -v U=$UPPER ' + gawk -bO -v WIDTH=80 -v L=$LOWER -v U=$UPPER ' /^>/ /^[^>]/ { for (i=1;i<=length();i+=WIDTH) { S=substr($0,i,WIDTH); print (L ? tolower(S) : U ? toupper(S) : S) } }' "$@" else - awk -b -O -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" } + gawk -bO -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" } /^>/ { if (NR!=1) print "\n"; print $0 "\n" } /^[^>]/ { gsub(/[[:space:]]/,""); print (L ? tolower($0) : U ? toupper($0) : $0) } END { print "\n" }' "$@" diff --git a/uf-bare b/uf-bare index f32824d..83b4e84 100755 --- a/uf-bare +++ b/uf-bare @@ -70,7 +70,7 @@ done # Do the work -awk -b -O -v F="$HDRSFILE" -v P="$(basename "$0")" ' +gawk -bO -v F="$HDRSFILE" -v P="$(basename "$0")" ' NR%2==1 && substr($0,1,1) != ">" { print P ": warning: discarding line that does not look like a FASTA header: " $0 > "/dev/stderr" } NR%2==1 && F { print > F } NR%2==0' "$@" diff --git a/uf-circut b/uf-circut index 18d73e1..a5a6b21 100755 --- a/uf-circut +++ b/uf-circut @@ -114,9 +114,9 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')" OP="$(expr "$CUT_SPEC" : '.*\([/:~]\).*')" RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')" -# Delegate to awk +# Delegate to gawk -awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK ' +gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK ' function bump0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 } # return 1 if from p1 to p2 crosses 0 BEGIN { diff --git a/uf-cut b/uf-cut index 438ce1a..e87c0d6 100755 --- a/uf-cut +++ b/uf-cut @@ -115,8 +115,8 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')" OP="$(expr "$CUT_SPEC" : '.*\([/:~]\).*')" RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')" -# Delegate to awk -awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK ' +# Delegate to gawk +gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK ' function abs(n) { return n < 0 ? -n : n } function cross0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 } # return 1 if from p1 to p2 crosses 0 function error_out(s) { print P ": error: " s > "/dev/stderr"; exit 1 } diff --git a/uf-dress b/uf-dress index ac850f0..aed2e54 100755 --- a/uf-dress +++ b/uf-dress @@ -73,7 +73,7 @@ done # Do the work -awk -b -O -v F="$HDRSFILE" '{ +gawk -bO -v F="$HDRSFILE" '{ HDR = "" if (F) getline HDR < F if (!HDR) HDR = ">lcl|" NR " Dummy header " NR diff --git a/uf-drop b/uf-drop index 315bc3d..3f86ec3 100755 --- a/uf-drop +++ b/uf-drop @@ -70,9 +70,9 @@ done NUM=$1 shift -# Delegate to awk +# Delegate to gawk -awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT ' +gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT ' NR%2==1 { print $0 (M ? " (uf:drop:" (B?"but:":"") N ")" : "") } NR%2==0 { print B ? substr ($0,length($0)-N+1) : substr ($0,N+1) } ' "$@" diff --git a/uf-freqs b/uf-freqs index 96ae6aa..7e03adb 100755 --- a/uf-freqs +++ b/uf-freqs @@ -67,7 +67,7 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do shift done -awk -b -O -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS ' +gawk -bO -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS ' BEGIN { PROCINFO["sorted_in"] = "@ind_str_asc" } NR%2==1 && !BARE NR%2==0 { diff --git a/uf-hash b/uf-hash index e7de112..47f612f 100755 --- a/uf-hash +++ b/uf-hash @@ -78,6 +78,6 @@ FILE="-" # Do the work -awk -b -O 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1 +gawk -bO 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1 # vim: sts=4:sw=4:et:si:ai diff --git a/uf-headers b/uf-headers index 2806072..b9235a5 100755 --- a/uf-headers +++ b/uf-headers @@ -48,6 +48,6 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do shift done -awk -b -O 'NR%2==1' "$@" +gawk -bO 'NR%2==1' "$@" # vim: sts=4:sw=4:et:si:ai diff --git a/uf-map b/uf-map index 9be9074..ffe2162 100755 --- a/uf-map +++ b/uf-map @@ -74,10 +74,10 @@ shift # Making this work produced two nice false starts: a race and a deadlock -# FAIL 1: using pipe inside awk (use the awk pipe feature) +# FAIL 1: using pipe inside gawk # Fails because of race: external OPERATION is buffering its output, # so lines do not come out in the right order, not even with flush. -# awk -b -O -v OPER="sh -c '$OPERATION'" ' +# gawk -bO -v OPER="sh -c '$OPERATION'" ' # NR%2==1 { print } # NR%2==0 { print | OPER } # { fflush() }' "$@" @@ -100,7 +100,7 @@ mkfifo "$FIFO" # The pipeline splits the lines between FIFO and OPERATION, then paste merges hem if [ -z "$TITLES" ]; then LHS="$FIFO"; RHS="-"; else LHS="-"; RHS="$FIFO"; fi -awk -b -O -v FIFO="$FIFO" -v ONE="${TITLES:-0}" ' +gawk -bO -v FIFO="$FIFO" -v ONE="${TITLES:-0}" ' (NR+ONE)%2==1 { print > FIFO }; (NR+ONE)%2==0' "$@" \ | $OPERATION \ diff --git a/uf-rc b/uf-rc index d08aaae..779efdb 100755 --- a/uf-rc +++ b/uf-rc @@ -84,7 +84,7 @@ done # Do the work -awk -b -O -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK ' +gawk -bO -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK ' NR%2==1 { print $0 (M ? " (uf:" (R?"reverse":"") (C?"complement":"") ")" : "") } NR%2==0 { if (R) for (i = length($0); i >= 1; --i) print_maybe_comp() diff --git a/uf-select b/uf-select index 6cdbf8f..1c688f9 100755 --- a/uf-select +++ b/uf-select @@ -110,16 +110,16 @@ FILE="${1:--}" while [ -n "$FILE" ]; do if [ -n "$NTH" ]; then - awk -b -v NTH=$NTH ' + gawk -bO -v NTH=$NTH ' BEGIN { split(NTH,lines,/,/) } { for (x in lines) if (int((NR+1)/2) == lines[x]) print } ' "$FILE" elif [ -n "$SEQID" ]; then - awk -b -v SEQID="$SEQID" ' + gawk -bO -v SEQID="$SEQID" ' NR % 2 == 1 && (P=index($1,SEQID)) != 0 { C=substr($1,P+length(SEQID),1); if (C=="" || C==" " || C=="|") { print; getline; print } } ' "$FILE" elif [ -n "$REGEX" ]; then - awk -b "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE" + gawk -bO "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE" else usage_exit fi diff --git a/uf-sort b/uf-sort index 9d23c21..9f588f0 100755 --- a/uf-sort +++ b/uf-sort @@ -66,7 +66,7 @@ done # Then we sort, and then we unpack everything again. # Pre-process into single records -awk -b -O -v OFS='\t' ' +gawk -bO -v OFS='\t' ' NR % 2 == 1 { HDR = $0; } NR % 2 == 0 { print length(), $0, HDR } ' "$@" | @@ -74,6 +74,6 @@ awk -b -O -v OFS='\t' ' #LC_ALL=C sort $REVERSE --buffer-size=1G --key='1rn,2' -t ' ' - | LC_ALL=C sort $REVERSE --key='1rn,2' -t "$(printf '\t')" - | # And unpack again - let's hope there are no tabs in headers ... -awk -b -O -F '\t' '{ print $3; print $2; }' +gawk -bO -F '\t' '{ print $3; print $2; }' # vim: sts=4:sw=4:et:si:ai diff --git a/uf-take b/uf-take index 4762738..37606ab 100755 --- a/uf-take +++ b/uf-take @@ -69,9 +69,9 @@ done NUM=$1 shift -# Delegate to awk +# Delegate to gawk -awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT ' +gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT ' NR%2==1 { print $0 (M ? " (uf:take:" (B?"but:":"") N ")" : "") } NR%2==0 { print B ? substr ($0,1,length($0)-N) : substr ($0,1,N) } ' "$@" diff --git a/uf-valid b/uf-valid index f5fc222..0b43e99 100755 --- a/uf-valid +++ b/uf-valid @@ -158,7 +158,7 @@ done # Do the work -awk -b -O -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{ +gawk -bO -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{ ERR = 0 HDR = $0 if ( HDR !~ /^>/ ) {