Skip to content

Commit

Permalink
[all] explicitly use GNU awk as gawk
Browse files Browse the repository at this point in the history
  • Loading branch information
zwets committed Dec 28, 2020
1 parent 52903c1 commit 32841e1
Show file tree
Hide file tree
Showing 16 changed files with 26 additions and 26 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Find [Unfasta on GitHub](https://github.com/zwets/unfasta).
|`uf-take`| Take the initial N elements from a sequence, or take elements until N are left |
|`uf-valid`| Validate an unfasta stream against its allowed alphabet and NCBI conventions |

Each has a `-h|--help` option for usage instructions. All depend only on `awk` and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension.
Each has a `-h|--help` option for usage instructions. All depend only on GNU awk (`gawk`) and a POSIX shell, except `uf-random` which requires `bash` for its `RANDOM` extension.


## Design principles
Expand Down
4 changes: 2 additions & 2 deletions uf
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
done

if [ -n "$REVERT" ]; then
awk -b -O -v WIDTH=80 -v L=$LOWER -v U=$UPPER '
gawk -bO -v WIDTH=80 -v L=$LOWER -v U=$UPPER '
/^>/
/^[^>]/ { for (i=1;i<=length();i+=WIDTH) { S=substr($0,i,WIDTH); print (L ? tolower(S) : U ? toupper(S) : S) } }' "$@"
else
awk -b -O -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" }
gawk -bO -v L=$LOWER -v U=$UPPER 'BEGIN { ORS="" }
/^>/ { if (NR!=1) print "\n"; print $0 "\n" }
/^[^>]/ { gsub(/[[:space:]]/,""); print (L ? tolower($0) : U ? toupper($0) : $0) }
END { print "\n" }' "$@"
Expand Down
2 changes: 1 addition & 1 deletion uf-bare
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ done

# Do the work

awk -b -O -v F="$HDRSFILE" -v P="$(basename "$0")" '
gawk -bO -v F="$HDRSFILE" -v P="$(basename "$0")" '
NR%2==1 && substr($0,1,1) != ">" { print P ": warning: discarding line that does not look like a FASTA header: " $0 > "/dev/stderr" }
NR%2==1 && F { print > F }
NR%2==0' "$@"
Expand Down
4 changes: 2 additions & 2 deletions uf-circut
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')"
OP="$(expr "$CUT_SPEC" : '.*\([/:~]\).*')"
RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')"

# Delegate to awk
# Delegate to gawk

awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
function bump0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 } # return 1 if from p1 to p2 crosses 0
BEGIN {
Expand Down
4 changes: 2 additions & 2 deletions uf-cut
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ LHS="$(expr "$CUT_SPEC" : '\(.*\)[/:~].*')"
OP="$(expr "$CUT_SPEC" : '.*\([/:~]\).*')"
RHS="$(expr "$CUT_SPEC" : '.*[/:~]\(.*\)')"

# Delegate to awk
awk -b -O -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
# Delegate to gawk
gawk -bO -v P="$(basename "$0")" -v FROM=$LHS -v MID=$LHS -v UPTO=$RHS -v LEN=$RHS -v DIST=$RHS -v OP="$OP" -v C=$CLIP -v Z=$ZERO -v Q=$QUIET -v M=$MARK '
function abs(n) { return n < 0 ? -n : n }
function cross0(p1,p2) { return p1 < 0 && p2 >= 0 ? 1 : 0 } # return 1 if from p1 to p2 crosses 0
function error_out(s) { print P ": error: " s > "/dev/stderr"; exit 1 }
Expand Down
2 changes: 1 addition & 1 deletion uf-dress
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ done

# Do the work

awk -b -O -v F="$HDRSFILE" '{
gawk -bO -v F="$HDRSFILE" '{
HDR = ""
if (F) getline HDR < F
if (!HDR) HDR = ">lcl|" NR " Dummy header " NR
Expand Down
4 changes: 2 additions & 2 deletions uf-drop
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ done
NUM=$1
shift

# Delegate to awk
# Delegate to gawk

awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
NR%2==1 { print $0 (M ? " (uf:drop:" (B?"but:":"") N ")" : "") }
NR%2==0 { print B ? substr ($0,length($0)-N+1) : substr ($0,N+1) }
' "$@"
Expand Down
2 changes: 1 addition & 1 deletion uf-freqs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
shift
done

awk -b -O -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS '
gawk -bO -v P="$(basename "$0")" -v BARE=$BARE -v LENGTH_ONLY=$LENGTH_ONLY -v TOTALS=$TOTALS '
BEGIN { PROCINFO["sorted_in"] = "@ind_str_asc" }
NR%2==1 && !BARE
NR%2==0 {
Expand Down
2 changes: 1 addition & 1 deletion uf-hash
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,6 @@ FILE="-"

# Do the work

awk -b -O 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1
gawk -bO 'NR % 2 == 0' "$FILE" | $HASH_PGM | cut -d' ' -f1

# vim: sts=4:sw=4:et:si:ai
2 changes: 1 addition & 1 deletion uf-headers
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
shift
done

awk -b -O 'NR%2==1' "$@"
gawk -bO 'NR%2==1' "$@"

# vim: sts=4:sw=4:et:si:ai
6 changes: 3 additions & 3 deletions uf-map
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ shift

# Making this work produced two nice false starts: a race and a deadlock

# FAIL 1: using pipe inside awk (use the awk pipe feature)
# FAIL 1: using pipe inside gawk
# Fails because of race: external OPERATION is buffering its output,
# so lines do not come out in the right order, not even with flush.
# awk -b -O -v OPER="sh -c '$OPERATION'" '
# gawk -bO -v OPER="sh -c '$OPERATION'" '
# NR%2==1 { print }
# NR%2==0 { print | OPER }
# { fflush() }' "$@"
Expand All @@ -100,7 +100,7 @@ mkfifo "$FIFO"
# The pipeline splits the lines between FIFO and OPERATION, then paste merges hem
if [ -z "$TITLES" ]; then LHS="$FIFO"; RHS="-"; else LHS="-"; RHS="$FIFO"; fi

awk -b -O -v FIFO="$FIFO" -v ONE="${TITLES:-0}" '
gawk -bO -v FIFO="$FIFO" -v ONE="${TITLES:-0}" '
(NR+ONE)%2==1 { print > FIFO };
(NR+ONE)%2==0' "$@" \
| $OPERATION \
Expand Down
2 changes: 1 addition & 1 deletion uf-rc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ done

# Do the work

awk -b -O -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK '
gawk -bO -v P="$(basename "$0")" -v C=$COMPLEMENT -v R=$REVERSE -v M=$MARK '
NR%2==1 { print $0 (M ? " (uf:" (R?"reverse":"") (C?"complement":"") ")" : "") }
NR%2==0 {
if (R) for (i = length($0); i >= 1; --i) print_maybe_comp()
Expand Down
6 changes: 3 additions & 3 deletions uf-select
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,16 @@ FILE="${1:--}"

while [ -n "$FILE" ]; do
if [ -n "$NTH" ]; then
awk -b -v NTH=$NTH '
gawk -bO -v NTH=$NTH '
BEGIN { split(NTH,lines,/,/) }
{ for (x in lines) if (int((NR+1)/2) == lines[x]) print }
' "$FILE"
elif [ -n "$SEQID" ]; then
awk -b -v SEQID="$SEQID" '
gawk -bO -v SEQID="$SEQID" '
NR % 2 == 1 && (P=index($1,SEQID)) != 0 { C=substr($1,P+length(SEQID),1); if (C=="" || C==" " || C=="|") { print; getline; print } }
' "$FILE"
elif [ -n "$REGEX" ]; then
awk -b "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE"
gawk -bO "$(printf 'NR %% 2 == 1 && /%s/ { print; getline; print }\n' "$REGEX")" "$FILE"
else
usage_exit
fi
Expand Down
4 changes: 2 additions & 2 deletions uf-sort
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ done
# Then we sort, and then we unpack everything again.

# Pre-process into single records
awk -b -O -v OFS='\t' '
gawk -bO -v OFS='\t' '
NR % 2 == 1 { HDR = $0; }
NR % 2 == 0 { print length(), $0, HDR }
' "$@" |
# Sort in order
#LC_ALL=C sort $REVERSE --buffer-size=1G --key='1rn,2' -t ' ' - |
LC_ALL=C sort $REVERSE --key='1rn,2' -t "$(printf '\t')" - |
# And unpack again - let's hope there are no tabs in headers ...
awk -b -O -F '\t' '{ print $3; print $2; }'
gawk -bO -F '\t' '{ print $3; print $2; }'

# vim: sts=4:sw=4:et:si:ai
4 changes: 2 additions & 2 deletions uf-take
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ done
NUM=$1
shift

# Delegate to awk
# Delegate to gawk

awk -b -O -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
gawk -bO -v P="$(basename "$0")" -v N=$NUM -v M=$MARK -v B=$BUT '
NR%2==1 { print $0 (M ? " (uf:take:" (B?"but:":"") N ")" : "") }
NR%2==0 { print B ? substr ($0,1,length($0)-N) : substr ($0,1,N) }
' "$@"
Expand Down
2 changes: 1 addition & 1 deletion uf-valid
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ done

# Do the work

awk -b -O -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{
gawk -bO -v P="$(basename "$0")" -v WRONG="[^$ALLOW]" -v H=$VAL_HEADERS -v Q=$QUIET -v K=$KEEP_ERRORS -v S=$STOP_ON_ERR -v IGNORECASE=$IGNORE_CASE '{
ERR = 0
HDR = $0
if ( HDR !~ /^>/ ) {
Expand Down

0 comments on commit 32841e1

Please sign in to comment.