From 1bd115de47a9c5ab1106498ea7237b611090c629 Mon Sep 17 00:00:00 2001 From: WATANABE Yuki Date: Tue, 19 Nov 2024 01:05:11 +0900 Subject: [PATCH] WIP --- NEWS | 5 ++ NEWS.ja | 4 ++ exec.c | 37 +++++++---- parser.c | 63 ++++++++++++++++--- parser.h | 10 ++- tests/case-p.tst | 21 +++++++ tests/case-y.tst | 144 ++++++++++++++++++++++++++++++++++++++++++- tests/cmdprint-y.tst | 23 +++++++ 8 files changed, 281 insertions(+), 26 deletions(-) diff --git a/NEWS b/NEWS index 0ecc16c7..674f28ee 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,11 @@ directory names for the first operand even before the user enters a slash. - Improved POSIX.1-2024 support: + - Case command items now can be terminated by `;&` instead of `;;' + to force the shell to execute the next item. + - The non-standard terminators `;|` and `;;&` are also supported + to resume pattern matching with the next item unless in the + POSIXly-correct mode. - After the `bg` built-in resumed a job, the `!` special parameter expands to the process ID of the job. - An interactive shell no longer exits on an error in the `exec` diff --git a/NEWS.ja b/NEWS.ja index 60d6be2a..a8355b05 100644 --- a/NEWS.ja +++ b/NEWS.ja @@ -9,6 +9,10 @@ - [行編集] . 組込みコマンドの最初の引数の補完で、スラッシュを入力する 前からディレクトリ名を補完候補として出すようにした - POSIX.1-2024 のサポートを強化: + - `case` コマンドの分岐を `;;` の代わりに `;&` で区切ることで次の + 分岐も実行させることができるようになった + - 非標準の拡張として `;|` もしくは `;;&` で区切ることで次の分岐 + からパターンマッチングを再開させることもできる - `bg` 組込みでジョブを再開した後は `!` 特殊パラメータはジョブの プロセス ID に展開されるようになった - POSIX 準拠モードであっても、対話シェルが `exec` 組込みで失敗した diff --git a/exec.c b/exec.c index f8b16eda..cd127467 100644 --- a/exec.c +++ b/exec.c @@ -1395,28 +1395,41 @@ void exec_case(const command_T *c, bool finally_exit) if (word == NULL) goto fail; + bool match = false, emptycmd = true; + for (const caseitem_T *ci = c->c_casitems; ci != NULL; ci = ci->next) { - for (void **pats = ci->ci_patterns; *pats != NULL; pats++) { + for (void **pats = ci->ci_patterns; !match && *pats != NULL; pats++) { wchar_t *pattern = expand_single(*pats, TT_SINGLE, Q_WORD, ES_QUOTED); if (pattern == NULL) goto fail; - bool match = match_pattern(word, pattern); + match = match_pattern(word, pattern); free(pattern); - if (match) { - if (ci->ci_commands != NULL) { - exec_and_or_lists(ci->ci_commands, finally_exit); - goto done; - } else { - goto success; - } - } + } + if (!match) + continue; + + exec_and_or_lists( + ci->ci_commands, + finally_exit && (ci->next == NULL || ci->ci_cont == CC_BREAK)); + emptycmd = (ci->ci_commands == NULL); + + switch (ci->ci_cont) { + case CC_BREAK: + goto done; + case CC_FALLTHRU: + match = true; + break; + case CC_CONTINUE: + match = false; + break; } } -success: - laststatus = Exit_SUCCESS; + done: + if (emptycmd) + laststatus = Exit_SUCCESS; free(word); if (finally_exit) exit_shell(); diff --git a/parser.c b/parser.c index ae4cb660..f1345222 100644 --- a/parser.c +++ b/parser.c @@ -293,6 +293,7 @@ typedef enum tokentype_T { /* operators */ TT_NEWLINE, TT_AMP, TT_AMPAMP, TT_LPAREN, TT_RPAREN, TT_SEMICOLON, TT_DOUBLE_SEMICOLON, + TT_SEMICOLONAMP, TT_SEMICOLONPIPE, TT_DOUBLE_SEMICOLON_AMP, TT_PIPE, TT_PIPEPIPE, TT_LESS, TT_LESSLESS, TT_LESSAMP, TT_LESSLESSDASH, TT_LESSLESSLESS, TT_LESSGREATER, TT_LESSLPAREN, TT_GREATER, TT_GREATERGREATER, TT_GREATERGREATERPIPE, TT_GREATERPIPE, TT_GREATERAMP, @@ -526,6 +527,9 @@ bool is_closing_tokentype(tokentype_T tt) case TT_DO: case TT_DONE: case TT_DOUBLE_SEMICOLON: + case TT_DOUBLE_SEMICOLON_AMP: + case TT_SEMICOLONAMP: + case TT_SEMICOLONPIPE: case TT_ESAC: return true; default: @@ -881,6 +885,9 @@ const char *get_errmsg_unexpected_tokentype(tokentype_T tokentype) case TT_RBRACE: return Ngt("encountered `%ls' without a matching `{'"); case TT_DOUBLE_SEMICOLON: + case TT_DOUBLE_SEMICOLON_AMP: + case TT_SEMICOLONAMP: + case TT_SEMICOLONPIPE: return Ngt("`%ls' is used outside `case'"); case TT_BANG: return Ngt("`%ls' cannot be used as a command name"); @@ -1050,11 +1057,19 @@ void next_token(parsestate_T *ps) case L')': ps->tokentype = TT_RPAREN; index++; break; case L';': maybe_line_continuations(ps, ++index); - if (ps->src.contents[index] == L';') { - ps->tokentype = TT_DOUBLE_SEMICOLON; - index++; - } else { - ps->tokentype = TT_SEMICOLON; + switch (ps->src.contents[index]) { + default: ps->tokentype = TT_SEMICOLON; break; + case L'&': ps->tokentype = TT_SEMICOLONAMP; index++; break; + case L'|': ps->tokentype = TT_SEMICOLONPIPE; index++; break; + case L';': + maybe_line_continuations(ps, ++index); + if (ps->src.contents[index] == L'&') { + ps->tokentype = TT_DOUBLE_SEMICOLON_AMP; + index++; + } else { + ps->tokentype = TT_DOUBLE_SEMICOLON; + } + break; } break; case L'&': @@ -2673,11 +2688,27 @@ caseitem_T *parse_case_list(parsestate_T *ps) ci->ci_patterns = parse_case_patterns(ps); ci->ci_commands = parse_compound_list(ps); /* `ci_commands' may be NULL unlike for and while commands */ - if (ps->tokentype == TT_DOUBLE_SEMICOLON) - next_token(ps); - else - break; + switch (ps->tokentype) { + case TT_DOUBLE_SEMICOLON: + ci->ci_cont = CC_BREAK; + break; + case TT_SEMICOLONAMP: + ci->ci_cont = CC_FALLTHRU; + break; + case TT_SEMICOLONPIPE: + case TT_DOUBLE_SEMICOLON_AMP: + ci->ci_cont = CC_CONTINUE; + if (posixly_correct) + serror(ps, Ngt("The ;| or ;;& operator is not supported " + "in the POSIXly-correct mode")); + break; + default: + ci->ci_cont = CC_BREAK; + goto done; + } + next_token(ps); } while (!ps->error); +done: return first; } @@ -3389,6 +3420,8 @@ static void print_caseitems( struct print *restrict pr, const caseitem_T *restrict caseitems, unsigned indent) __attribute__((nonnull(1))); +static const wchar_t *case_item_terminator(casecont_T cc) + __attribute__((const)); #if YASH_ENABLE_DOUBLE_BRACKET static void print_double_bracket( struct print *restrict pr, const command_T *restrict c, unsigned indent) @@ -3722,13 +3755,23 @@ void print_caseitems(struct print *restrict pr, const caseitem_T *restrict ci, } print_indent(pr, indent + 1); - wb_cat(&pr->buffer, L";;"); + wb_cat(&pr->buffer, case_item_terminator(ci->ci_cont)); print_space_or_newline(pr); ci = ci->next; } } +const wchar_t *case_item_terminator(casecont_T cc) +{ + switch (cc) { + case CC_BREAK: return L";;"; + case CC_FALLTHRU: return L";&"; + case CC_CONTINUE: return L";|"; + } + assert(false); +} + #if YASH_ENABLE_DOUBLE_BRACKET void print_double_bracket( diff --git a/parser.h b/parser.h index a5f9982d..555fd599 100644 --- a/parser.h +++ b/parser.h @@ -1,6 +1,6 @@ /* Yash: yet another shell */ /* parser.h: syntax parser */ -/* (C) 2007-2018 magicant */ +/* (C) 2007-2024 magicant */ /* This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -131,11 +131,19 @@ typedef struct ifcommand_T { } ifcommand_T; /* For an "else" clause, `next' and `ic_condition' are NULL. */ +/* type of an case item terminator symbol */ +typedef enum { + CC_BREAK, // ;; + CC_FALLTHRU, // ;& + CC_CONTINUE, // ;| aka ;;& +} casecont_T; + /* patterns and commands of a case command */ typedef struct caseitem_T { struct caseitem_T *next; void **ci_patterns; /* patterns to do matching */ struct and_or_T *ci_commands; /* commands executed if match succeeds */ + casecont_T ci_cont; /* terminator symbol type */ } caseitem_T; /* `ci_patterns' is a NULL-terminated array of pointers to `wordunit_T' that are * cast to `void *'. */ diff --git a/tests/case-p.tst b/tests/case-p.tst index 4386588a..edfca44d 100644 --- a/tests/case-p.tst +++ b/tests/case-p.tst @@ -200,6 +200,27 @@ case $(echo 2; exit 2) in esac __IN__ +test_oE -e 42 'executing item after ;&' +case 1 in + 0) echo not reached 0;; + 1) echo matched 1;& + 2) echo matched 2; (exit 42);& +esac +__IN__ +matched 1 +matched 2 +__OUT__ + +test_oE 'exit status after empty ;& in case command' +(exit 1) +case i in + i) ;& + j) echo $? +esac +__IN__ +1 +__OUT__ + test_oE 'patterns can be preceded by (' case a in (a) echo matched 1;; diff --git a/tests/case-y.tst b/tests/case-y.tst index 653c742b..68360c88 100644 --- a/tests/case-y.tst +++ b/tests/case-y.tst @@ -23,12 +23,90 @@ case $(echo 2; exit 2) in esac __IN__ -test_oE 'reserved word esac as pattern (preceded by parenthesis, +o POSIX)' -case esac in (esac) echo matched;; esac +# The behavior is unspecified in POSIX, but many existing shells seem to behave +# this way (with the notable exception of ksh). +test_OE -e 0 'exit status of case command with ;& followed by empty item' +case i in + i) (exit 1);& + j) ;; +esac __IN__ -matched + +test_oE -e 42 'pattern matching after ;| (+o posix)' +case 1 in + 0) echo not reached 0;; + 1) echo matched 1; (exit 12);| + 2) echo not reached 2;; + 1) echo matched 2 $?; (exit 42);| + 2) echo not reached 3;; +esac +__IN__ +matched 1 +matched 2 12 +__OUT__ + +test_oE -e 42 'pattern matching after ;;& (+o posix)' +case 1 in + 0) echo not reached 0;; + 1) echo matched 1; (exit 12);;& + 2) echo not reached 2;; + 1) echo matched 2 $?; (exit 42);;& + 2) echo not reached 3;; +esac +__IN__ +matched 1 +matched 2 12 +__OUT__ + +( +posix="true" + +test_Oe -e 2 'pattern matching after ;| (-o posix)' +case 1 in + 0) echo not reached 0;; + 1) echo matched 1; (exit 12);| + 2) echo not reached 2;; + 1) echo matched 2 $?; (exit 42);| + 2) echo not reached 3;; +esac +__IN__ +syntax error: The ;| or ;;& operator is not supported in the POSIXly-correct mode +syntax error: `esac' is missing +__ERR__ +#' +#` + +test_Oe -e 2 'pattern matching after ;;& (-o posix)' +case 1 in + 0) echo not reached 0;; + 1) echo matched 1; (exit 12);;& + 2) echo not reached 2;; + 1) echo matched 2 $?; (exit 42);;& + 2) echo not reached 3;; +esac +__IN__ +syntax error: The ;| or ;;& operator is not supported in the POSIXly-correct mode +syntax error: `esac' is missing +__ERR__ +#' +#` + +) + +# Existing shells disagree on the behavior of this case. +test_oE 'exit status in case command with subject containing command substitution' +case $(echo 1; exit 42) in + 1) echo $? +esac +__IN__ +0 __OUT__ +# Many existing shells behave this way (with the notable exception of ksh). +test_OE -e 0 'exit status of case command with subject containing command substitution' +case $(echo 1; exit 42) in esac +__IN__ + test_Oe -e 2 'in without case' in __IN__ @@ -57,6 +135,66 @@ __ERR__ #' #` +test_Oe -e 2 ';& outside case (at beginning of line)' +;& +__IN__ +syntax error: `;&' is used outside `case' +__ERR__ +#' +#` +#' +#` + +test_Oe -e 2 ';& outside case (after simple command)' +echo foo;& +__IN__ +syntax error: `;&' is used outside `case' +__ERR__ +#' +#` +#' +#` + +test_Oe -e 2 ';| outside case (at beginning of line)' +;| +__IN__ +syntax error: `;|' is used outside `case' +__ERR__ +#' +#` +#' +#` + +test_Oe -e 2 ';| outside case (after simple command)' +echo foo;| +__IN__ +syntax error: `;|' is used outside `case' +__ERR__ +#' +#` +#' +#` + +test_Oe -e 2 ';;& outside case (at beginning of line)' +;;& +__IN__ +syntax error: `;;&' is used outside `case' +__ERR__ +#' +#` +#' +#` + +test_Oe -e 2 ';;& outside case (after simple command)' +echo foo;;& +__IN__ +syntax error: `;;&' is used outside `case' +__ERR__ +#' +#` +#' +#` + test_Oe -e 2 'esac without case' esac __IN__ diff --git a/tests/cmdprint-y.tst b/tests/cmdprint-y.tst index f0f7a0b1..b89523f2 100644 --- a/tests/cmdprint-y.tst +++ b/tests/cmdprint-y.tst @@ -2,6 +2,8 @@ mkfifo fifo +# Commands used in testcase_single must perform exactly one "cat fifo" so that +# the background job finishes after "jobs" prints the job status. testcase_single() { testcase "$@" 3<<__IN__ 5<&- 4<<__OUT__ $(cat <&3) & @@ -414,6 +416,27 @@ case i in esac __OUT__ +test_single 'case command, terminators, single line' +case 1 in (1) cat fifo;& (2) ;| (3) ./oops&;;& esac +__IN__ +case 1 in (1) cat fifo ;& (2) ;| (3) ./oops& ;| esac +__OUT__ + +test_multi 'case command, terminators, multi-line' +case 1 in (1) cat fifo;& (2) ;| (3) ./oops&;;& esac +__IN__ +case 1 in + (1) + cat fifo + ;& + (2) + ;| + (3) + ./oops& + ;| +esac +__OUT__ + ( if ! testee -c 'command -v [[' >/dev/null; then skip="true"