1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00

Fix multiple bugs in .sh.match (#455)

This commit backports all of the relevant .sh.match bugfixes from
ksh93v-. Most of the .sh.match rewrite is from versions 2012-08-24
and 2012-10-04, with patches from later releases of 93v- and
ksh2020 also applied. Note that there are still some remaining bugs
in .sh.match, although now the total count of .sh.match bugs should
be less that before.

These are the relevant changes in the ksh93v- changelog that were
backported:
12-08-07  .sh.match no longer gets set for patterns in PS4 during
          set -x.
12-08-10  Rewrote .sh.match expansions fixing several bugs and
          improving performance.
12-08-22  .sh.match now handles subpatterns that had no matches with
          ${var//pattern} correctly.
12-08-21  A bug in setting .sh.match after ${var//pattern/string}
          when string is empty has been fixed.
12-08-21  A bug in setting .sh.match after [[ string == pattern ]]
          has been fixed.
12-08-31  A bug that could cause a core dump after
          typeset -m var=.sh.match has been fixed.
12-09-10  Fixed a bug in typeset -m the .sh.match is being renamed.
12-09-07  Fixed a bug in .sh.match code that coud cause the shell
          to quitely
13-02-21  The 12-01-16 bug fix prevented .sh.match from being used
          in the replacement string. The previous code was restored
          and a different fix which prevented .sh.match from being
          computed for nested replacement has been used instead.
13-05-28  Fixed two bug for typeset -c and typeset -m for variable
          .sh.match.

Changes:
- The SHOPT_2DMATCH option has been removed. This was already the
  default behavior previously, and now it's documented in the man
  page.
- init.c: Backported the sh_setmatch() rewrite from 93v- 2012-08-24
  and 2012-10-04.
- Backported the libast 93v- strngrpmatch() function, as the
  .sh.match rewrite requires this API.
- Backported the sh_match regression tests from ksh93v-, with many
  other sh_match tests backported from ksh2020. Much of the sh_match
  script is based on code from Roland Mainz:
  https://marc.info/?l=ast-developers&m=134606574109162&w=2
  https://marc.info/?l=ast-developers&m=134490505607093
- tests/{substring,treemove}.sh: Backported other relevant .sh.match
  fixes, with tests added to the substring and treemove test scripts.
- tests/types.sh: One of the (now reverted) memory leak bugfixes
  introduced a CI test failure in this script, so for that test the
  error message has been improved.
- string/strmatch.c: The original ksh93v- code for the strngrpmatch()
  changes introduced a crash that could occur because strlen would
  be used on a null pointer. This has been fixed by avoiding strlen
  if the string is null.

One nice side effect of these changes is a considerable performance
improvement in the shbench[1] gsub benchmark (results from 20
iterations with CCFLAGS=-Os):
--------------------------------------------------
name      /tmp/ksh-current     /tmp/ksh-matchfixes
--------------------------------------------------
gsub.ksh  0.883 [0.822-0.959]  0.457 [0.442-0.505]
--------------------------------------------------

Despite all of the many fixes and improvements in the backported
93v- .sh.match code, there are a few remaining bugs:

- .sh.match is printed with a default [0] subscript (see also
  https://github.com/ksh93/ksh/issues/308#issuecomment-1025016088):
     $ arch/*/bin/ksh -c 'echo ${!.sh.match}'
       .sh.match[0]
  This bug appears to have been introduced by the changes from
  ksh93v- 2012-08-24.
- The wrong variable name is given for 'parameter not set' errors
  (from https://marc.info/?l=ast-developers&m=134489094602596):
     $ arch/*/bin/ksh -u
     $ x=1234
     $ true "${x//~(X)([012])|([345])/}"
     $ compound co
     $ typeset -m co.array=.sh.match
     $ printf "%q\n" "${co.array[2][0]}"
     arch/linux.i386-64/bin/ksh: co.array[2][(null)]: parameter not set
- .sh.match leaks out of subshells. Further information and a
  reproducer can be found here:
  https://marc.info/?l=ast-developers&m=136292897330187

[1]: https://github.com/ksh-community/shbench
This commit is contained in:
Johnothan King 2022-02-08 16:01:40 -08:00 committed by Martijn Dekker
parent 232b7bff30
commit f38494ea1d
24 changed files with 1355 additions and 143 deletions

View file

@ -1,8 +1,8 @@
iff AST_API
ver ast 20220201
ver ast 20220208
api ast 20120528 regexec regnexec regrexec regsubexec strgrpmatch
api ast 20120528 regexec regnexec regrexec regsubexec
api ast 20120411 cmdopen

View file

@ -2,7 +2,7 @@
* *
* This software is part of the ast package *
* Copyright (c) 1985-2011 AT&T Intellectual Property *
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
* Copyright (c) 2020-2022 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
@ -322,6 +322,8 @@ main()
printf("#define strgid _ast_strgid\n");
printf("#undef strgrpmatch\n");
printf("#define strgrpmatch _ast_strgrpmatch\n");
printf("#undef strngrpmatch\n");
printf("#define strngrpmatch _ast_strngrpmatch\n");
printf("#undef strhash\n");
printf("#define strhash _ast_strhash\n");
printf("#undef strkey\n");

View file

@ -169,11 +169,12 @@ typedef struct
* strgrpmatch() flags
*/
#define STR_MAXIMAL 01 /* maximal match */
#define STR_LEFT 02 /* implicit left anchor */
#define STR_RIGHT 04 /* implicit right anchor */
#define STR_ICASE 010 /* ignore case */
#define STR_GROUP 020 /* (|&) inside [@|&](...) only */
#define STR_MAXIMAL 0x01 /* maximal match */
#define STR_LEFT 0x02 /* implicit left anchor */
#define STR_RIGHT 0x04 /* implicit right anchor */
#define STR_ICASE 0x08 /* ignore case */
#define STR_GROUP 0x10 /* (|&) inside [@|&](...) only */
#define STR_INT 0x20 /* int* match array */
/*
* fmtquote() flags
@ -371,8 +372,8 @@ extern int strexp(char*, int);
extern long streval(const char*, char**, long(*)(const char*, char**));
extern long strexpr(const char*, char**, long(*)(const char*, char**, void*), void*);
extern int strgid(const char*);
extern int strgrpmatch(const char*, const char*, int*, int, int);
extern int strgrpmatch_20120528(const char*, const char*, ssize_t*, int, int);
extern int strgrpmatch(const char*, const char*, ssize_t*, int, int);
extern int strngrpmatch(const char*, size_t, const char*, ssize_t*, int, int);
extern unsigned int strhash(const char*);
extern void* strlook(const void*, size_t, const char*);
extern int strmatch(const char*, const char*);

View file

@ -124,7 +124,7 @@ Returns the error message text corresponding to the
\fIerrno\fP.
.Ss "char* strerror(int \fIerrno\fP)"
Equivalent to fmterror(\fIerrno\fP).
.Ss "int strgrpmatch(const char* \fIstring\fP, const char* \fIpattern\fP, int* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)"
.Ss "int strgrpmatch(const char* \fIstring\fP, const char* \fIpattern\fP, ssize_t* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)"
Matches the null-terminated \fIstring\fP against the null-terminated
.BR ksh (1)
augmented \fIpattern\fP.
@ -148,8 +148,6 @@ Ignore case.
.Tp
\f3STR_GROUP\fP:
(|&) inside [@|*|+{n,m}](...) only.
.Ss "int strmatch(const char* \fIstring\fP, const char* \fIpattern\fP, int* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)"
Equivalent to strgrpmatch(\fIstring\fP,\fIpattern\fP,0,0,STR_MAXIMAL|STR_LEFT|STR_RIGHT).
.SH "SEE ALSO"
.BR ast (3),
.BR ccode (3),

View file

@ -2,7 +2,7 @@
* *
* This software is part of the ast package *
* Copyright (c) 1985-2012 AT&T Intellectual Property *
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
* Copyright (c) 2020-2022 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
@ -64,8 +64,6 @@ static struct State_s
int nmatch;
} matchstate;
#define STR_INT 040000
/*
* subgroup match
* 0 returned if no match
@ -77,7 +75,7 @@ static struct State_s
*/
int
strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flags)
strngrpmatch(const char* b, size_t z, const char* p, ssize_t* sub, int n, register int flags)
{
register regex_t* re;
register ssize_t* end;
@ -140,7 +138,7 @@ strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flag
return 0;
matchstate.nmatch = n;
}
if (regexec(re, b, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE)))
if (regnexec(re, b, z, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE)))
return 0;
if (!sub || n <= 0)
return 1;
@ -176,7 +174,7 @@ strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flag
int
strmatch(const char* s, const char* p)
{
return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
return strngrpmatch(s, s ? strlen(s) : 0, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
}
/*
@ -192,7 +190,7 @@ strsubmatch(const char* s, const char* p, int flags)
{
ssize_t match[2];
return strgrpmatch(s, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0;
return strngrpmatch(s, s ? strlen(s) : 0, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0;
}
#undef strgrpmatch
@ -201,7 +199,7 @@ strsubmatch(const char* s, const char* p, int flags)
#endif
int
strgrpmatch(const char* b, const char* p, int* sub, int n, int flags)
strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, int flags)
{
return strgrpmatch_20120528(b, p, (ssize_t*)sub, n, flags|STR_INT);
return strngrpmatch(b, b ? strlen(b) : 0, p, sub, n, flags|STR_INT);
}