From 32841e159d8558bd8ac3a0125e8704a58578c2cb Mon Sep 17 00:00:00 2001
From: Marco van Zwetselaar <io@zwets.it>
Date: Mon, 28 Dec 2020 10:47:33 +0300
Subject: [PATCH] [all] explicitly use GNU awk as gawk

---
 README.md  | 2 +-
 uf         | 4 ++--
 uf-bare    | 2 +-
 uf-circut  | 4 ++--
 uf-cut     | 4 ++--
 uf-dress   | 2 +-
 uf-drop    | 4 ++--
 uf-freqs   | 2 +-
 uf-hash    | 2 +-
 uf-headers | 2 +-
 uf-map     | 6 +++---
 uf-rc      | 2 +-
 uf-select  | 6 +++---
 uf-sort    | 4 ++--
 uf-take    | 4 ++--
 uf-valid   | 2 +-
 16 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index c5a091b..536bbf8 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ Find [Unfasta on GitHub](https://github.com/zwets/unfasta).
 |`uf-take`| Take the initial N elements from a sequence, or take elements until N are left |
 |`uf-valid`| Validate an unfasta stream against its allowed alphabet and NCBI conventions |
 
-Each has a `-h|--help` option for usage instructions.  All depend only on `awk` and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension.
+Each has a `-h|--help` option for usage instructions.  All depend only on GNU awk (`gawk`) and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension.
 
 
 ## Design principles
diff --git a/uf b/uf
index df63149..770bbc3 100755
--- a/uf
+++ b/uf
@@ -78,11 +78,11 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
 done
 
 if [ -n "$REVERT" ]; then
-    awk -b -O -v WIDTH=80 -v L=$LOWER -v U=$UPPER '
+    gawk -bO -v WIDTH=80 -v L=$LOWER -v U=$UPPER '
     /^>/ 
     /^[^>]/ { for (i=1;i<=length();i+=WIDTH) { S=substr($0,i,WIDTH); print (L ? tolower(S) : U ? toupper(S) : S) } }' "$@"
 else
-    awk -b -O -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" }
+    gawk -bO -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" }
     /^>/    { if (NR!=1) print "\n"; print $0 "\n" }
     /^[^>]/ { gsub(/[[:space:]]/,""); print (L ? tolower($0) : U ? toupper($0) : $0) }
     END { print "\n" }' "$@"
diff --git a/uf-bare b/uf-bare
index f32824d..83b4e84 100755
--- a/uf-bare
+++ b/uf-bare
@@ -70,7 +70,7 @@ done
 
 # Do the work
 
-awk -b -O -v F="$HDRSFILE" -v P="$(basename "$0")" '
+gawk -bO -v F="$HDRSFILE" -v P="$(basename "$0")" '
     NR%2==1 && substr($0,1,1) != ">" { print P ": warning: discarding line that does not look like a FASTA header: " $0 > "/dev/stderr" }
     NR%2==1 && F { print > F }
     NR%2==0' "$@"
diff --git a/uf-circut b/uf-circut
index 18d73e1..a5a6b21 100755
--- a/uf-circut
+++ b/uf-circut
@@ -114,9 +114,9 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')"
 OP="$(expr  "$CUT_SPEC" : '.*\([/:~]\).*')"
 RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')"
 
-# Delegate to awk
+# Delegate to gawk
 
-awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
+gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
 function bump0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 }    # return 1 if from p1 to p2 crosses 0
 
 BEGIN { 
diff --git a/uf-cut b/uf-cut
index 438ce1a..e87c0d6 100755
--- a/uf-cut
+++ b/uf-cut
@@ -115,8 +115,8 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')"
 OP="$(expr  "$CUT_SPEC" : '.*\([/:~]\).*')"
 RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')"
 
-# Delegate to awk
-awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
+# Delegate to gawk
+gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
 function abs(n) { return n < 0 ? -n : n }
 function cross0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 }      # return 1 if from p1 to p2 crosses 0
 function error_out(s) { print P ": error: " s > "/dev/stderr"; exit 1 }
diff --git a/uf-dress b/uf-dress
index ac850f0..aed2e54 100755
--- a/uf-dress
+++ b/uf-dress
@@ -73,7 +73,7 @@ done
 
 # Do the work
 
-awk -b -O -v F="$HDRSFILE" '{
+gawk -bO -v F="$HDRSFILE" '{
     HDR = ""
     if (F) getline HDR < F
     if (!HDR) HDR = ">lcl|" NR " Dummy header " NR
diff --git a/uf-drop b/uf-drop
index 315bc3d..3f86ec3 100755
--- a/uf-drop
+++ b/uf-drop
@@ -70,9 +70,9 @@ done
 NUM=$1
 shift
 
-# Delegate to awk
+# Delegate to gawk
 
-awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
+gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
     NR%2==1 { print $0 (M ? " (uf:drop:" (B?"but:":"") N ")" : "") }
     NR%2==0 { print B ? substr ($0,length($0)-N+1) : substr ($0,N+1) }
     ' "$@"
diff --git a/uf-freqs b/uf-freqs
index 96ae6aa..7e03adb 100755
--- a/uf-freqs
+++ b/uf-freqs
@@ -67,7 +67,7 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
     shift
 done
 
-awk -b -O -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS '
+gawk -bO -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS '
 BEGIN { PROCINFO["sorted_in"] = "@ind_str_asc" }
 NR%2==1 && !BARE 
 NR%2==0 {
diff --git a/uf-hash b/uf-hash
index e7de112..47f612f 100755
--- a/uf-hash
+++ b/uf-hash
@@ -78,6 +78,6 @@ FILE="-"
 
 # Do the work
 
-awk -b -O 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1
+gawk -bO 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1
 
 # vim: sts=4:sw=4:et:si:ai
diff --git a/uf-headers b/uf-headers
index 2806072..b9235a5 100755
--- a/uf-headers
+++ b/uf-headers
@@ -48,6 +48,6 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
     shift
 done
 
-awk -b -O 'NR%2==1' "$@"
+gawk -bO 'NR%2==1' "$@"
 
 # vim: sts=4:sw=4:et:si:ai
diff --git a/uf-map b/uf-map
index 9be9074..ffe2162 100755
--- a/uf-map
+++ b/uf-map
@@ -74,10 +74,10 @@ shift
 
 # Making this work produced two nice false starts: a race and a deadlock
 
-# FAIL 1: using pipe inside awk (use the awk pipe feature)
+# FAIL 1: using pipe inside gawk
 # Fails because of race: external OPERATION is buffering its output,
 # so lines do not come out in the right order, not even with flush.
-# awk -b -O -v OPER="sh -c '$OPERATION'" '
+# gawk -bO -v OPER="sh -c '$OPERATION'" '
 #    NR%2==1 { print }
 #    NR%2==0 { print | OPER }
 #    { fflush() }' "$@"
@@ -100,7 +100,7 @@ mkfifo "$FIFO"
 # The pipeline splits the lines between FIFO and OPERATION, then paste merges hem
 if [ -z "$TITLES" ]; then LHS="$FIFO"; RHS="-"; else LHS="-"; RHS="$FIFO"; fi
 
-awk -b -O -v FIFO="$FIFO" -v ONE="${TITLES:-0}" '
+gawk -bO -v FIFO="$FIFO" -v ONE="${TITLES:-0}" '
         (NR+ONE)%2==1 { print > FIFO }; 
         (NR+ONE)%2==0' "$@" \
     | $OPERATION \
diff --git a/uf-rc b/uf-rc
index d08aaae..779efdb 100755
--- a/uf-rc
+++ b/uf-rc
@@ -84,7 +84,7 @@ done
 
 # Do the work
 
-awk -b -O -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK '
+gawk -bO -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK '
     NR%2==1 { print $0 (M ? " (uf:" (R?"reverse":"") (C?"complement":"") ")" : "") }
     NR%2==0 {
         if (R) for (i = length($0); i >= 1; --i) print_maybe_comp()
diff --git a/uf-select b/uf-select
index 6cdbf8f..1c688f9 100755
--- a/uf-select
+++ b/uf-select
@@ -110,16 +110,16 @@ FILE="${1:--}"
 
 while [ -n "$FILE" ]; do
     if [ -n "$NTH" ]; then
-        awk -b -v NTH=$NTH '
+        gawk -bO -v NTH=$NTH '
         BEGIN { split(NTH,lines,/,/) } 
         { for (x in lines) if (int((NR+1)/2) == lines[x]) print }
         ' "$FILE"
     elif [ -n "$SEQID" ]; then
-        awk -b -v SEQID="$SEQID" '
+        gawk -bO -v SEQID="$SEQID" '
             NR % 2 == 1 && (P=index($1,SEQID)) != 0 { C=substr($1,P+length(SEQID),1); if (C=="" || C==" " || C=="|") { print; getline; print } }
         ' "$FILE"
     elif [ -n "$REGEX" ]; then
-        awk -b "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE"
+        gawk -bO "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE"
     else
         usage_exit
     fi
diff --git a/uf-sort b/uf-sort
index 9d23c21..9f588f0 100755
--- a/uf-sort
+++ b/uf-sort
@@ -66,7 +66,7 @@ done
 # Then we sort, and then we unpack everything again.
 
 # Pre-process into single records
-awk -b -O -v OFS='\t' '
+gawk -bO -v OFS='\t' '
     NR % 2 == 1 { HDR = $0; }
     NR % 2 == 0 { print length(), $0, HDR }
     ' "$@" |
@@ -74,6 +74,6 @@ awk -b -O -v OFS='\t' '
 #LC_ALL=C sort $REVERSE --buffer-size=1G --key='1rn,2' -t '    ' - |
 LC_ALL=C sort $REVERSE --key='1rn,2' -t "$(printf '\t')" - |
 # And unpack again - let's hope there are no tabs in headers ...
-awk -b -O -F '\t' '{ print $3; print $2; }'
+gawk -bO -F '\t' '{ print $3; print $2; }'
 
 # vim: sts=4:sw=4:et:si:ai
diff --git a/uf-take b/uf-take
index 4762738..37606ab 100755
--- a/uf-take
+++ b/uf-take
@@ -69,9 +69,9 @@ done
 NUM=$1
 shift
 
-# Delegate to awk
+# Delegate to gawk
 
-awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
+gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
     NR%2==1 { print $0 (M ? " (uf:take:" (B?"but:":"") N ")" : "") }
     NR%2==0 { print B ? substr ($0,1,length($0)-N) : substr ($0,1,N) }
     ' "$@"
diff --git a/uf-valid b/uf-valid
index f5fc222..0b43e99 100755
--- a/uf-valid
+++ b/uf-valid
@@ -158,7 +158,7 @@ done
 
 # Do the work
 
-awk -b -O -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{
+gawk -bO -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{
     ERR = 0
     HDR = $0
     if ( HDR !~ /^>/ ) {