From db1d539d499afd04234f19dd63b2029713261791 Mon Sep 17 00:00:00 2001 From: Johnothan King Date: Thu, 2 Jul 2020 10:40:15 -0700 Subject: [PATCH] Fix ERE repetition expressions in [[ ... =~ ERE{x,y} ]] (#54) Regular expressions that combine a repetition expression with a parenthesized sub-expression throw a garbled syntax error: $ [[ AATAAT =~ (AAT){2} ]] ksh: syntax error: `~(E)(AAT){2} ]] :'%Cred%h%Creseksh: syntax error: `~(E)(AAT){2} ]] :'%Cred%h%Creseksh: syntax' unexpected The syntax error occurs because ksh is not fully accounting for '=~' when it runs into a curly bracket. This fix disables the syntax error when the operator is '=~' and adds handling for '(str){x}' (to allow for more than one sub-expression). This bugfix and the regression tests for it were backported from ksh93v- 2014-12-24-beta. src/cmd/ksh93/sh/lex.c: - Do not trigger a syntax error for '{x}' when the operator is '=~' and add handling for multiple parentheses when combined with '{x}'. src/cmd/ksh93/tests/bracket.sh: - Add two tests from ksh93v- to test sub-expressions combined with the '{x}' quantifier. --- NEWS | 3 +++ src/cmd/ksh93/sh/lex.c | 8 +++++++- src/cmd/ksh93/tests/bracket.sh | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 9ab5949a6..bf83cd421 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,9 @@ Any uppercase BUG_* names are modernish shell bug IDs. - 'read -u' will no longer crash with a memory fault when given an out of range or negative file descriptor. +- The '=~' operator no longer raises an error if a regular expression + combines the '{x}' quantifier with a sub-expression. + 2020-06-28: - Variables created with 'typeset -RF' no longer cause a memory fault diff --git a/src/cmd/ksh93/sh/lex.c b/src/cmd/ksh93/sh/lex.c index b6c504daf..916561b57 100644 --- a/src/cmd/ksh93/sh/lex.c +++ b/src/cmd/ksh93/sh/lex.c @@ -388,6 +388,12 @@ int sh_lex(Lex_t* lp) switch(n) { case S_BREAK: + if(lp->lex.incase>TEST_RE && mode==ST_NORM && c==LPAREN) + { + pushlevel(lp,RPAREN,mode); + mode = ST_NESTED; + continue; + } fcseek(-LEN); goto breakloop; case S_EOF: @@ -1163,7 +1169,7 @@ int sh_lex(Lex_t* lp) } if(mode==ST_NONE) return(0); - if(c!=n) + if(c!=n && lp->lex.incasetoken = c; sh_syntax(lp); diff --git a/src/cmd/ksh93/tests/bracket.sh b/src/cmd/ksh93/tests/bracket.sh index 7447aa5f5..5b7745fe2 100755 --- a/src/cmd/ksh93/tests/bracket.sh +++ b/src/cmd/ksh93/tests/bracket.sh @@ -388,5 +388,10 @@ var=$(echo begin; exec >/dev/tty; [ -t 1 ] && test -t 1 && [[ -t 1 ]]) \ test 123 -eq 123x 2>/dev/null [[ $? -ge 2 ]] || err_exit 'test builtin should return value greater than 1 on error' +# ====== +# The '=~' operator should work with curly brackets +$SHELL -c '[[ AATAAT =~ (AAT){2} ]]' || err_exit '[[ AATAAT =~ (AAT){2} ]] does not match' +$SHELL -c '[[ AATAATCCCAATAAT =~ (AAT){2}CCC(AAT){2} ]]' || err_exit '[[ AATAATCCCAATAAT =~ (AAT){2}CCC(AAT){2} ]] does not match' + # ====== exit $((Errors<125?Errors:125))