From f38494ea1dd17a8e785cca0b524adb46db71bc08 Mon Sep 17 00:00:00 2001 From: Johnothan King Date: Tue, 8 Feb 2022 16:01:40 -0800 Subject: [PATCH] Fix multiple bugs in .sh.match (#455) This commit backports all of the relevant .sh.match bugfixes from ksh93v-. Most of the .sh.match rewrite is from versions 2012-08-24 and 2012-10-04, with patches from later releases of 93v- and ksh2020 also applied. Note that there are still some remaining bugs in .sh.match, although now the total count of .sh.match bugs should be less that before. These are the relevant changes in the ksh93v- changelog that were backported: 12-08-07 .sh.match no longer gets set for patterns in PS4 during set -x. 12-08-10 Rewrote .sh.match expansions fixing several bugs and improving performance. 12-08-22 .sh.match now handles subpatterns that had no matches with ${var//pattern} correctly. 12-08-21 A bug in setting .sh.match after ${var//pattern/string} when string is empty has been fixed. 12-08-21 A bug in setting .sh.match after [[ string == pattern ]] has been fixed. 12-08-31 A bug that could cause a core dump after typeset -m var=.sh.match has been fixed. 12-09-10 Fixed a bug in typeset -m the .sh.match is being renamed. 12-09-07 Fixed a bug in .sh.match code that coud cause the shell to quitely 13-02-21 The 12-01-16 bug fix prevented .sh.match from being used in the replacement string. The previous code was restored and a different fix which prevented .sh.match from being computed for nested replacement has been used instead. 13-05-28 Fixed two bug for typeset -c and typeset -m for variable .sh.match. Changes: - The SHOPT_2DMATCH option has been removed. This was already the default behavior previously, and now it's documented in the man page. - init.c: Backported the sh_setmatch() rewrite from 93v- 2012-08-24 and 2012-10-04. - Backported the libast 93v- strngrpmatch() function, as the .sh.match rewrite requires this API. - Backported the sh_match regression tests from ksh93v-, with many other sh_match tests backported from ksh2020. Much of the sh_match script is based on code from Roland Mainz: https://marc.info/?l=ast-developers&m=134606574109162&w=2 https://marc.info/?l=ast-developers&m=134490505607093 - tests/{substring,treemove}.sh: Backported other relevant .sh.match fixes, with tests added to the substring and treemove test scripts. - tests/types.sh: One of the (now reverted) memory leak bugfixes introduced a CI test failure in this script, so for that test the error message has been improved. - string/strmatch.c: The original ksh93v- code for the strngrpmatch() changes introduced a crash that could occur because strlen would be used on a null pointer. This has been fixed by avoiding strlen if the string is null. One nice side effect of these changes is a considerable performance improvement in the shbench[1] gsub benchmark (results from 20 iterations with CCFLAGS=-Os): -------------------------------------------------- name /tmp/ksh-current /tmp/ksh-matchfixes -------------------------------------------------- gsub.ksh 0.883 [0.822-0.959] 0.457 [0.442-0.505] -------------------------------------------------- Despite all of the many fixes and improvements in the backported 93v- .sh.match code, there are a few remaining bugs: - .sh.match is printed with a default [0] subscript (see also https://github.com/ksh93/ksh/issues/308#issuecomment-1025016088): $ arch/*/bin/ksh -c 'echo ${!.sh.match}' .sh.match[0] This bug appears to have been introduced by the changes from ksh93v- 2012-08-24. - The wrong variable name is given for 'parameter not set' errors (from https://marc.info/?l=ast-developers&m=134489094602596): $ arch/*/bin/ksh -u $ x=1234 $ true "${x//~(X)([012])|([345])/}" $ compound co $ typeset -m co.array=.sh.match $ printf "%q\n" "${co.array[2][0]}" arch/linux.i386-64/bin/ksh: co.array[2][(null)]: parameter not set - .sh.match leaks out of subshells. Further information and a reproducer can be found here: https://marc.info/?l=ast-developers&m=136292897330187 [1]: https://github.com/ksh-community/shbench --- .github/workflows/ci.yml | 2 +- NEWS | 3 + src/cmd/ksh93/README | 2 - src/cmd/ksh93/SHOPT.sh | 1 - src/cmd/ksh93/bltins/test.c | 6 +- src/cmd/ksh93/include/defs.h | 4 +- src/cmd/ksh93/include/shell.h | 1 + src/cmd/ksh93/sh.1 | 7 + src/cmd/ksh93/sh/array.c | 6 +- src/cmd/ksh93/sh/fault.c | 1 + src/cmd/ksh93/sh/init.c | 191 ++++-- src/cmd/ksh93/sh/macro.c | 79 ++- src/cmd/ksh93/sh/name.c | 26 +- src/cmd/ksh93/sh/nvtree.c | 5 +- src/cmd/ksh93/sh/xec.c | 2 + src/cmd/ksh93/tests/sh_match.sh | 1076 ++++++++++++++++++++++++++++++ src/cmd/ksh93/tests/substring.sh | 21 +- src/cmd/ksh93/tests/treemove.sh | 19 +- src/cmd/ksh93/tests/types.sh | 3 +- src/lib/libast/features/api | 4 +- src/lib/libast/features/map.c | 4 +- src/lib/libast/include/ast.h | 15 +- src/lib/libast/man/astsa.3 | 4 +- src/lib/libast/string/strmatch.c | 16 +- 24 files changed, 1355 insertions(+), 143 deletions(-) create mode 100755 src/cmd/ksh93/tests/sh_match.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 303a4ad3f..aebe8e859 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: LANG=ja_JP.SJIS script -q -e -c "bin/shtests --locale --nocompile" && : disable most SHOPTs, rebuild ksh && sed --regexp-extended --in-place=.orig \ - '/^SHOPT (2DMATCH|AUDIT|BGX|BRACEPAT|DEVFD|DYNAMIC|EDPREDICT|ESH|FIXEDARRAY|HISTEXPAND|MULTIBYTE|NAMESPACE|OPTIMIZE|SPAWN|STATS|SUID_EXEC|VSH)=/ s/=1?/=0/' \ + '/^SHOPT (AUDIT|BGX|BRACEPAT|DEVFD|DYNAMIC|EDPREDICT|ESH|FIXEDARRAY|HISTEXPAND|MULTIBYTE|NAMESPACE|OPTIMIZE|SPAWN|STATS|SUID_EXEC|VSH)=/ s/=1?/=0/' \ src/cmd/ksh93/SHOPT.sh && bin/package make && : default regression tests with SHOPTs disabled && diff --git a/NEWS b/NEWS index d219b2300..2f1ec7b5b 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,9 @@ Any uppercase BUG_* names are modernish shell bug IDs. associative array when read back in by the shell. Elements that are sparse indexed arrays are now prefixed with "typeset -a". +- The rewritten .sh.match code from ksh93v- has been backported to ksh93u+m, + fixing many bugs and improving performance by a considerable amount. + 2022-02-05: - Fixed: for indexed arrays, given an unset array member a[i] with i > 0, diff --git a/src/cmd/ksh93/README b/src/cmd/ksh93/README index 44ff60aa2..1d4dcf003 100644 --- a/src/cmd/ksh93/README +++ b/src/cmd/ksh93/README @@ -20,8 +20,6 @@ options where no feature probe is available, probe is the same as off. The options have the following defaults and meanings: - 2DMATCH on Two-dimensional ${.sh.match} for ${var//pat/str}. - ACCT off Shell accounting. Noted by "L" in the version string when enabled. See README-AUDIT.md. diff --git a/src/cmd/ksh93/SHOPT.sh b/src/cmd/ksh93/SHOPT.sh index b842d3459..68e35c654 100644 --- a/src/cmd/ksh93/SHOPT.sh +++ b/src/cmd/ksh93/SHOPT.sh @@ -5,7 +5,6 @@ # For a more complete description of the options, see src/cmd/ksh93/README. # -SHOPT 2DMATCH=1 # two dimensional ${.sh.match} for ${var//pat/str} SHOPT ACCT=0 # accounting SHOPT ACCTFILE=0 # per-user accounting info SHOPT AUDIT=1 # enable auditing per SHOPT_AUDITFILE diff --git a/src/cmd/ksh93/bltins/test.c b/src/cmd/ksh93/bltins/test.c index e1d1c9895..db465ba12 100644 --- a/src/cmd/ksh93/bltins/test.c +++ b/src/cmd/ksh93/bltins/test.c @@ -83,7 +83,7 @@ static int e3(struct test*); static int test_strmatch(const char *str, const char *pat) { - regoff_t match[2*(MATCH_MAX+1)],n; + int match[2*(MATCH_MAX+1)],n; register int c, m=0; register const char *cp=pat; while(c = *cp++) @@ -99,9 +99,9 @@ static int test_strmatch(const char *str, const char *pat) match[0] = 0; if(m > elementsof(match)/2) m = elementsof(match)/2; - n = strgrpmatch(str, pat, match, m, STR_GROUP|STR_MAXIMAL|STR_LEFT|STR_RIGHT); + n = strgrpmatch(str, pat, (ssize_t*)match, m, STR_GROUP|STR_MAXIMAL|STR_LEFT|STR_RIGHT|STR_INT); if(m==0 && n==1) - match[1] = strlen(str); + match[1] = (int)strlen(str); if(n) sh_setmatch(str, -1, n, match, 0); return(n); diff --git a/src/cmd/ksh93/include/defs.h b/src/cmd/ksh93/include/defs.h index 1a577ad11..fb19ca009 100644 --- a/src/cmd/ksh93/include/defs.h +++ b/src/cmd/ksh93/include/defs.h @@ -29,8 +29,8 @@ #define defs_h_defined #include -#if !defined(AST_VERSION) || AST_VERSION < 20220201 -#error libast version 20220201 or later is required +#if !defined(AST_VERSION) || AST_VERSION < 20220208 +#error libast version 20220208 or later is required #endif #if !_lib_fork #error In 2021, ksh joined the 21st century and started requiring fork(2). diff --git a/src/cmd/ksh93/include/shell.h b/src/cmd/ksh93/include/shell.h index 3de9a7209..fde61894d 100644 --- a/src/cmd/ksh93/include/shell.h +++ b/src/cmd/ksh93/include/shell.h @@ -327,6 +327,7 @@ struct Shell_s char instance; /* in set_instance */ char decomma; /* decimal_point=',' */ char redir0; /* redirect of 0 */ + char intrace; /* set when trace expands PS4 */ char *readscript; /* set before reading a script */ int subdup; /* bitmask for dups of 1 */ int *inpipe; /* input pipe pointer */ diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index 8c812a1fc..93ef1ad46 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -1695,6 +1695,13 @@ element stores the complete match and the element stores the .IR i\^ -th submatch. +For +.B // +the array is two dimensional with the first subscript indicating the +most recent match and subpattern match and the second script indicating +which match with +.B 0 +representing the first match. The .B .sh.match variable diff --git a/src/cmd/ksh93/sh/array.c b/src/cmd/ksh93/sh/array.c index 4281c1eba..97f4d8e64 100644 --- a/src/cmd/ksh93/sh/array.c +++ b/src/cmd/ksh93/sh/array.c @@ -1176,7 +1176,7 @@ Namval_t *nv_putsub(Namval_t *np,register char *sp,register long mode) if(!ap || !ap->header.fun) #endif /* SHOPT_FIXEDARRAY */ { - if(sp) + if(sp && sp!=Empty) { if(ap && ap->xp && !strmatch(sp,"+([0-9])")) { @@ -1185,7 +1185,11 @@ Namval_t *nv_putsub(Namval_t *np,register char *sp,register long mode) size = nv_getnum(mp); } else + { + Dt_t *root = sh.last_root; size = (int)sh_arith((char*)sp); + sh.last_root = root; + } } if(size <0 && ap) size += array_maxindex(np); diff --git a/src/cmd/ksh93/sh/fault.c b/src/cmd/ksh93/sh/fault.c index 5a0087971..56f7ede7b 100644 --- a/src/cmd/ksh93/sh/fault.c +++ b/src/cmd/ksh93/sh/fault.c @@ -585,6 +585,7 @@ void sh_exit(register int xno) if(!pp) sh_done(sig); sh.arithrecursion = 0; + sh.intrace = 0; sh.prefix = 0; #if SHOPT_TYPEDEF sh.mktype = 0; diff --git a/src/cmd/ksh93/sh/init.c b/src/cmd/ksh93/sh/init.c index 560c69159..3622bbd28 100644 --- a/src/cmd/ksh93/sh/init.c +++ b/src/cmd/ksh93/sh/init.c @@ -176,10 +176,13 @@ struct match const char *v; char *val; char *rval[2]; - regoff_t *match; - char node[NV_MINSZ+sizeof(char*)+sizeof(Dtlink_t)]; - regoff_t first; + int *match; + char *nodes; + char *names; + int first; int vsize; + int vlen; + int msize; int nmatch; int index; int lastsub[2]; @@ -775,76 +778,118 @@ static void put_lastarg(Namval_t* np,const char *val,int flags,Namfun_t *fp) np->nvenv = 0; } +static void match2d(struct match *mp) +{ + Namval_t *np; + int i; + Namarr_t *ap; + nv_disc(SH_MATCHNOD, &mp->hdr, NV_POP); + np = nv_namptr(mp->nodes, 0); + for(i=0; i < mp->nmatch; i++) + { + np->nvname = mp->names + 3 * i; + if(i > 9) + { + *np->nvname = '0' + i / 10; + np->nvname[1] = '0' + (i % 10); + } + else + *np->nvname = '0' + i; + nv_putsub(np, (char*)0, 1); + nv_putsub(np, (char*)0, 0); + nv_putsub(SH_MATCHNOD, (char*)0, i); + nv_arraychild(SH_MATCHNOD, np, 0); + np = nv_namptr(np + 1, 0); + } + if(ap = nv_arrayptr(SH_MATCHNOD)) + ap->nelem = mp->nmatch; +} + /* * store the most recent value for use in .sh.match * treat .sh.match as a two dimensional array */ -void sh_setmatch(const char *v, int vsize, int nmatch, regoff_t match[],int index) +void sh_setmatch(const char *v, int vsize, int nmatch, int match[], int index) { - struct match *mp = &((Init_t*)sh.init_context)->SH_MATCH_init; - Namval_t *np = (Namval_t*)(&(mp->node[0])); - register int i,n,x; - unsigned int savesub = sh.subshell; + Init_t *ip = sh.init_context; + struct match *mp = &ip->SH_MATCH_init; + register int i,n,x, savesub=sh.subshell; Namarr_t *ap = nv_arrayptr(SH_MATCHNOD); - Namarr_t *ap_save = ap; - /* do not crash if .sh.match is unset */ - if(!ap) + Namval_t *np; + if(sh.intrace) return; sh.subshell = 0; -#if !SHOPT_2DMATCH - index = 0; -#else - if(index==0) -#endif /* !SHOPT_2DMATCH */ + if(index<0) { - if(ap->hdr.next != &mp->hdr) + np = nv_namptr(mp->nodes,0); + if(mp->index==0) + match2d(mp); + for(i=0; i < mp->nmatch; i++) { - free((void*)ap); - ap = nv_arrayptr(np); - SH_MATCHNOD->nvfun = &ap->hdr; - } - if(ap) - { - ap->nelem &= ~ARRAY_SCAN; - i = array_elem(ap); - ap->nelem++; - while(--i>= 0) + nv_disc(np,&mp->hdr,NV_LAST); + nv_putsub(np,(char*)0,mp->index); + for(x=mp->index; x >=0; x--) { - nv_putsub(SH_MATCHNOD, (char*)0,i); + n = i + x*mp->nmatch; + if(mp->match[2*n+1]>mp->match[2*n]) + nv_putsub(np,Empty,ARRAY_ADD|x); + } + if((ap=nv_arrayptr(np)) && array_elem(ap)==0) + { + nv_putsub(SH_MATCHNOD,(char*)0,i); _nv_unset(SH_MATCHNOD,NV_RDONLY); } - ap->nelem--; + np = nv_namptr(np+1,0); + } + sh.subshell = savesub; + return; + } + mp->index = index; + if(index==0) + { + if(mp->nodes) + { + np = nv_namptr(mp->nodes,0); + for(i=0; i < mp->nmatch; i++) + { + if(np->nvfun && np->nvfun != &mp->hdr) + { + free((void*)np->nvfun); + np->nvfun = 0; + } + np = nv_namptr(np+1,0); + } + free((void*)mp->nodes); + mp->nodes = 0; + } + mp->vlen = 0; + if(ap && ap->hdr.next != &mp->hdr) + free((void*)ap); + SH_MATCHNOD->nvalue.cp = 0; + SH_MATCHNOD->nvfun = 0; + if(!(mp->nmatch=nmatch) && !v) + { + sh.subshell = savesub; + return; + } + mp->nodes = sh_calloc(mp->nmatch*(NV_MINSZ+sizeof(void*)+3),1); + mp->names = mp->nodes + mp->nmatch*(NV_MINSZ+sizeof(void*)); + np = nv_namptr(mp->nodes,0); + nv_disc(SH_MATCHNOD,&mp->hdr,NV_LAST); + for(i=nmatch; --i>=0;) + { + if(match[2*i]>=0) + nv_putsub(SH_MATCHNOD,Empty,ARRAY_ADD|i); } - if(!nv_hasdisc(SH_MATCHNOD,mp->hdr.disc)) - nv_disc(SH_MATCHNOD,&mp->hdr,NV_LAST); - if(nmatch) - nv_putsub(SH_MATCHNOD, NIL(char*), (nmatch-1)|ARRAY_FILL|ARRAY_SETSUB); - ap_save->nelem = mp->nmatch = nmatch; mp->v = v; mp->first = match[0]; } -#if SHOPT_2DMATCH else { if(index==1) - { - np->nvalue.cp = Empty; - np->nvfun = SH_MATCHNOD->nvfun; - nv_onattr(np,NV_NOFREE|NV_ARRAY); - SH_MATCHNOD->nvfun = 0; - for(i=0; i < mp->nmatch; i++) - { - nv_putsub(SH_MATCHNOD, (char*)0, i); - nv_arraychild(SH_MATCHNOD, np,0); - } - ap_save->nelem = mp->nmatch; - } - ap = nv_arrayptr(np); - nv_putsub(np, NIL(char*), index|ARRAY_FILL|ARRAY_SETSUB); + match2d(mp); } -#endif /* SHOPT_2DMATCH */ sh.subshell = savesub; - index *= 2*mp->nmatch; if(mp->nmatch) { for(n=mp->first+(mp->v-v),vsize=0,i=0; i < 2*nmatch; i++) @@ -852,33 +897,39 @@ void sh_setmatch(const char *v, int vsize, int nmatch, regoff_t match[],int inde if(match[i]>=0 && (match[i] - n) > vsize) vsize = match[i] -n; } + index *= 2*mp->nmatch; i = (index+2*mp->nmatch)*sizeof(match[0]); - if((i+vsize) >= mp->vsize) + if(i >= mp->msize) + { + if(mp->msize) + mp->match = (int*)sh_realloc(mp->match,2*i); + else + mp->match = (int*)sh_malloc(2*i); + mp->msize = 2*i; + } + if(vsize >= mp->vsize) { if(mp->vsize) - mp->match = (int*)sh_realloc(mp->match,i+vsize+1); + mp->val = (char*)sh_realloc(mp->val,x=2*vsize); else - mp->match = (int*)sh_malloc(i+vsize+1); - mp->vsize = i+vsize+1; + mp->val = (char*)sh_malloc(x=vsize+1); + mp->vsize = x; } - mp->val = ((char*)mp->match)+i; memcpy(mp->match+index,match,nmatch*2*sizeof(match[0])); - for(x=0,i=0; i < 2*nmatch; i++) + for(i=0; i < 2*nmatch; i++) { if(match[i]>=0) mp->match[index+i] -= n; - else - x=1; - } - ap_save->nelem -= x; while(i < 2*mp->nmatch) mp->match[index+i++] = -1; - memcpy(mp->val,v+n,vsize); - mp->val[vsize] = 0; + if(index==0) + v+= mp->first; + memcpy(mp->val+mp->vlen,v,vsize-mp->vlen); + mp->val[mp->vlen=vsize] = 0; mp->lastsub[0] = mp->lastsub[1] = -1; } -} +} static char* get_match(register Namval_t* np, Namfun_t *fp) { @@ -886,6 +937,8 @@ static char* get_match(register Namval_t* np, Namfun_t *fp) int sub,sub2=0,n,i =!mp->index; char *val; sub = nv_aindex(SH_MATCHNOD); + if(sub<0) + sub = 0; if(np!=SH_MATCHNOD) sub2 = nv_aindex(np); if(sub>=mp->nmatch) @@ -915,7 +968,15 @@ static char* get_match(register Namval_t* np, Namfun_t *fp) return(mp->rval[i]); } -static const Namdisc_t SH_MATCH_disc = { sizeof(struct match), 0, get_match }; +static char *name_match(Namval_t *np, Namfun_t *fp) +{ + int sub = nv_aindex(SH_MATCHNOD); + sfprintf(sh.strbuf,".sh.match[%d]",sub); + return(sfstruse(sh.strbuf)); +} + +static const Namdisc_t SH_MATCH_disc = { sizeof(struct match), 0, get_match, + 0,0,0,0,name_match }; static char* get_version(register Namval_t* np, Namfun_t *fp) { diff --git a/src/cmd/ksh93/sh/macro.c b/src/cmd/ksh93/sh/macro.c index 72e1ad6dc..b27794d9b 100644 --- a/src/cmd/ksh93/sh/macro.c +++ b/src/cmd/ksh93/sh/macro.c @@ -46,11 +46,6 @@ #include "national.h" #include "streval.h" -#undef STR_GROUP -#ifndef STR_GROUP -# define STR_GROUP 0 -#endif - #if _WINIX static int Skip; #endif /* _WINIX */ @@ -74,6 +69,7 @@ typedef struct _mac_ char arith; /* set for ((...)) */ char arrayok; /* $x[] ok for arrays */ char subcopy; /* set when copying subscript */ + char macsub; /* set to 1 when running mac_substitute */ int dotdot; /* set for .. in subscript */ void *nvwalk; /* for name space walking */ } Mac_t; @@ -96,7 +92,7 @@ typedef struct _mac_ #define M_TYPE 8 /* ${@var} */ static noreturn void mac_error(Namval_t*); -static int substring(const char*, const char*, int[], int); +static int substring(const char*, size_t, const char*, int[], int); static void copyto(Mac_t*, int, int); static void comsubst(Mac_t*, Shnode_t*, int); static int varsub(Mac_t*); @@ -862,7 +858,22 @@ done: static void mac_substitute(Mac_t *mp, register char *cp,char *str,register int subexp[],int subsize) { register int c,n; - register char *first=cp; + register char *first=fcseek(0); + char *ptr; + Mac_t savemac; + n = stktell(sh.stk); + savemac = *mp; + mp->pattern = 3; + mp->split = 0; + mp->macsub++; + fcsopen(cp); + copyto(mp,0,0); + sfputc(sh.stk,0); + ptr = cp = sh_strdup(stkptr(sh.stk,n)); + stkseek(sh.stk,n); + *mp = savemac; + fcsopen(first); + first = cp; while(1) { while((c= *cp++) && c!=ESCAPE); @@ -890,6 +901,7 @@ static void mac_substitute(Mac_t *mp, register char *cp,char *str,register int s } if(n=cp-first-1) mac_copy(mp,first,n); + free(ptr); } #if SHOPT_FILESCAN @@ -1362,7 +1374,7 @@ retry1: ap = nv_arrayptr(np=nq); if(ap) { - nv_putsub(np,v,ARRAY_SCAN); + np = nv_putsub(np,v,ARRAY_SCAN); v = stkptr(stkp,mp->dotdot); dolmax =1; if(array_assoc(ap)) @@ -1796,22 +1808,7 @@ retry1: } pattern = sh_strdup(argp); if((type=='/' || c=='/') && (repstr = mac_getstring(pattern))) - { - Mac_t savemac; - char *first = fcseek(0); - int n = stktell(stkp); - savemac = *mp; - fcsopen(repstr); - mp->pattern = 3; - mp->split = 0; - copyto(mp,0,0); - sfputc(stkp,0); - repstr = sh_strdup(stkptr(stkp,n)); replen = strlen(repstr); - stkseek(stkp,n); - *mp = savemac; - fcsopen(first); - } if(v || c=='/' && offset>=0) stkseek(stkp,offset); } @@ -1822,29 +1819,30 @@ retry2: if(v && (!nulflg || *v ) && c!='+') { int ofs_size = 0; - regoff_t match[2*(MATCH_MAX+1)]; - int nmatch, nmatch_prev, vsize_last; - char *vlast = NIL(char*); + int match[2*(MATCH_MAX+1)],index; + int nmatch, nmatch_prev, vsize_last, tsize; + char *vlast = NIL(char*), *oldv; while(1) { if(!v) v= ""; if(c=='/' || c=='#' || c== '%') { - int index = 0; flag = (type || c=='/')?(STR_GROUP|STR_MAXIMAL):STR_GROUP; if(c!='/') flag |= STR_LEFT; - nmatch = 0; + index = nmatch = 0; + tsize = (int)strlen(v); while(1) { - vsize = strlen(v); + vsize = tsize; + oldv = v; nmatch_prev = nmatch; if(c=='%') - nmatch=substring(v,pattern,match,flag&STR_MAXIMAL); + nmatch=substring(v,tsize,pattern,match,flag&STR_MAXIMAL); else - nmatch=strgrpmatch(v,pattern,match,elementsof(match)/2,flag); - if(nmatch && replen>0) + nmatch=strngrpmatch(v,vsize,pattern,(ssize_t*)match,elementsof(match)/2,flag|STR_INT); + if(nmatch && repstr && !mp->macsub) sh_setmatch(v,vsize,nmatch,match,index++); if(nmatch) { @@ -1871,13 +1869,16 @@ retry2: mac_copy(mp,v,1); v++; } + tsize -= v-oldv; continue; } vsize = -1; break; } - if(replen==0) + if(!mp->macsub && (!repstr || (nmatch==0 && index==0))) sh_setmatch(vlast,vsize_last,nmatch,match,index++); + if(!mp->macsub && index>0 && c=='/' && type) + sh_setmatch(0,0,nmatch,0,-1); } if(vsize) mac_copy(mp,v,vsize>0?vsize:strlen(v)); @@ -2044,8 +2045,6 @@ retry2: nv_close(np); if(pattern) free(pattern); - if(repstr) - free(repstr); if(idx) free(idx); return(1); @@ -2534,27 +2533,27 @@ static void endfield(register Mac_t *mp,int split) * Finds the right substring of STRING using the expression PAT * the longest substring is found when FLAG is set. */ -static int substring(register const char *string,const char *pat,int match[], int flag) +static int substring(register const char *string,size_t len,const char *pat,int match[], int flag) { register const char *sp=string; - register int size,len,nmatch,n; + register int size,nmatch,n; int smatch[2*(MATCH_MAX+1)]; if(flag) { - if(n=strgrpmatch(sp,pat,smatch,elementsof(smatch)/2,STR_RIGHT|STR_MAXIMAL)) + if(n=strngrpmatch(sp,len,pat,(ssize_t*)smatch,elementsof(smatch)/2,STR_RIGHT|STR_MAXIMAL|STR_INT)) { memcpy(match,smatch,n*2*sizeof(smatch[0])); return(n); } return(0); } - size = len = strlen(sp); + size = (int)len; sp += size; while(sp>=string) { if(mbwide()) sp = lastchar(string,sp); - if(n=strgrpmatch(sp,pat,smatch,elementsof(smatch)/2,STR_RIGHT|STR_LEFT|STR_MAXIMAL)) + if(n=strgrpmatch(sp,pat,(ssize_t*)smatch,elementsof(smatch)/2,STR_RIGHT|STR_LEFT|STR_MAXIMAL|STR_INT)) { nmatch = n; memcpy(match,smatch,n*2*sizeof(smatch[0])); diff --git a/src/cmd/ksh93/sh/name.c b/src/cmd/ksh93/sh/name.c index 30c300e31..3035a7432 100644 --- a/src/cmd/ksh93/sh/name.c +++ b/src/cmd/ksh93/sh/name.c @@ -3355,7 +3355,28 @@ int nv_rename(register Namval_t *np, int flags) } else mp = np; - nv_clone(nr,mp,(flags&NV_MOVE)|NV_COMVAR); + if(nr==SH_MATCHNOD) + { + Sfio_t *iop; + Dt_t *save_root = sh.var_tree; + int trace = sh_isoption(SH_XTRACE); + sfprintf(sh.strbuf,"typeset -a %s=",nv_name(mp)); + nv_outnode(nr,sh.strbuf,-1,0); + sfwrite(sh.strbuf,")\n",2); + cp = sfstruse(sh.strbuf); + iop = sfopen((Sfio_t*)0,cp,"s"); + if(trace) + sh_offoption(SH_XTRACE); + sh.var_tree = last_root; + sh_eval(iop,SH_READEVAL); + sh.var_tree = save_root; + if(trace) + sh_onoption(SH_XTRACE); + if(flags&NV_MOVE) + sh_setmatch(0,0,0,0,0); + } + else + nv_clone(nr,mp,(flags&NV_MOVE)|NV_COMVAR); mp->nvenv = nvenv; if(flags&NV_MOVE) { @@ -3632,6 +3653,8 @@ char *nv_name(register Namval_t *np) #endif /* SHOPT_NAMESPACE */ return(np->nvname); } + if(!np->nvname) + goto skip; #if SHOPT_FIXEDARRAY ap = nv_arrayptr(np); #endif /* SHOPT_FIXEDARRAY */ @@ -3651,6 +3674,7 @@ char *nv_name(register Namval_t *np) sh.last_table = nv_parent(np); else if(!nv_isref(np)) { + skip: for(fp= np->nvfun ; fp; fp=fp->next) if(fp->disc && fp->disc->namef) { diff --git a/src/cmd/ksh93/sh/nvtree.c b/src/cmd/ksh93/sh/nvtree.c index befd8fd74..449f92c9f 100644 --- a/src/cmd/ksh93/sh/nvtree.c +++ b/src/cmd/ksh93/sh/nvtree.c @@ -670,11 +670,12 @@ void nv_outnode(Namval_t *np, Sfio_t* out, int indent, int special) static void outval(char *name, const char *vname, struct Walk *wp) { register Namval_t *np, *nq=0, *last_table=sh.last_table; - register Namfun_t *fp; + register Namfun_t *fp; int isarray=0, special=0,mode=0; + Dt_t *root = wp->root?wp->root:sh.var_base; if(*name!='.' || vname[strlen(vname)-1]==']') mode = NV_ARRAY; - if(!(np=nv_open(vname,wp->root,mode|NV_VARNAME|NV_NOADD|NV_NOASSIGN|NV_NOFAIL|wp->noscope))) + if(!(np=nv_open(vname,root,mode|NV_VARNAME|NV_NOADD|NV_NOASSIGN|NV_NOFAIL|wp->noscope))) { sh.last_table = last_table; return; diff --git a/src/cmd/ksh93/sh/xec.c b/src/cmd/ksh93/sh/xec.c index aeb7c3cf1..287e5da61 100644 --- a/src/cmd/ksh93/sh/xec.c +++ b/src/cmd/ksh93/sh/xec.c @@ -2816,9 +2816,11 @@ int sh_trace(register char *argv[], register int nl) cp = "+ "; else { + sh.intrace = 1; sh_offoption(SH_XTRACE); cp = sh_mactry(cp); sh_onoption(SH_XTRACE); + sh.intrace = 0; } if(*cp) sfputr(sfstderr,cp,-1); diff --git a/src/cmd/ksh93/tests/sh_match.sh b/src/cmd/ksh93/tests/sh_match.sh new file mode 100755 index 000000000..ced31ea34 --- /dev/null +++ b/src/cmd/ksh93/tests/sh_match.sh @@ -0,0 +1,1076 @@ +######################################################################## +# # +# This software is part of the ast package # +# Copyright (c) 1982-2012 AT&T Intellectual Property # +# Copyright (c) 2012 Roland Mainz # +# Copyright (c) 2020-2022 Contributors to ksh 93u+m # +# and is licensed under the # +# Eclipse Public License, Version 1.0 # +# by AT&T Intellectual Property # +# # +# A copy of the License is available at # +# http://www.eclipse.org/org/documents/epl-v10.html # +# (with md5 checksum b35adb5213ca9657e911e9befb180842) # +# # +# Information and Software Systems Research # +# AT&T Research # +# Florham Park NJ # +# # +# David Korn # +# Roland Mainz # +# # +######################################################################## + +# +# This test module tests the .sh.match pattern matching facility +# + +. "${SHTESTS_COMMON:-${0%/*}/_common}" + +# ===== +# Start with basic character class matching tests backported from ksh2020. This +# is primarily to verify that the underlying AST regex code is working as +# expected before moving on to more complex tests. +[[ 1 =~ [[:digit:]] ]] || err_exit 'pattern [[:digit:]] broken' +[[ x =~ [[:digit:]] ]] && err_exit 'pattern [[:digit:]] broken' +[[ 5 =~ [[:alpha:]] ]] && err_exit 'pattern [[:alpha:]] broken' +[[ z =~ [[:alpha:]] ]] || err_exit 'pattern [[:alpha:]] broken' +[[ 3 =~ [[:alnum:]] ]] || err_exit 'pattern [[:alnum:]] broken' +[[ y =~ [[:alnum:]] ]] || err_exit 'pattern [[:alnum:]] broken' +[[ / =~ [[:alnum:]] ]] && err_exit 'pattern [[:alnum:]] broken' +[[ 3 =~ [[:lower:]] ]] && err_exit 'pattern [[:lower:]] broken' +[[ y =~ [[:lower:]] ]] || err_exit 'pattern [[:lower:]] broken' +[[ B =~ [[:lower:]] ]] && err_exit 'pattern [[:lower:]] broken' +[[ 3 =~ [[:upper:]] ]] && err_exit 'pattern [[:upper:]] broken' +[[ y =~ [[:upper:]] ]] && err_exit 'pattern [[:upper:]] broken' +[[ B =~ [[:upper:]] ]] || err_exit 'pattern [[:upper:]] broken' +[[ 7 =~ [[:word:]] ]] || err_exit 'pattern [[:word:]] broken' +[[ x =~ [[:word:]] ]] || err_exit 'pattern [[:word:]] broken' +[[ _ =~ [[:word:]] ]] || err_exit 'pattern [[:word:]] broken' +[[ + =~ [[:word:]] ]] && err_exit 'pattern [[:word:]] broken' +[[ . =~ [[:space:]] ]] && err_exit 'pattern [[:space:]] broken' +[[ X =~ [[:space:]] ]] && err_exit 'pattern [[:space:]] broken' +[[ ' ' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\t' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\v' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\f' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\n' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ . =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ X =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ ' ' =~ [[:blank:]] ]] || err_exit 'pattern [[:blank:]] broken' +[[ $'\t' =~ [[:blank:]] ]] || err_exit 'pattern [[:blank:]] broken' +[[ $'\v' =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ ' ' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\t' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\v' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\f' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ $'\n' =~ [[:space:]] ]] || err_exit 'pattern [[:space:]] broken' +[[ . =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ X =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ ' ' =~ [[:blank:]] ]] || err_exit 'pattern [[:blank:]] broken' +[[ $'\t' =~ [[:blank:]] ]] || err_exit 'pattern [[:blank:]] broken' +[[ $'\v' =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ $'\f' =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ $'\n' =~ [[:blank:]] ]] && err_exit 'pattern [[:blank:]] broken' +[[ Z =~ [[:print:]] ]] || err_exit 'pattern [[:print:]] broken' +[[ ' ' =~ [[:print:]] ]] || err_exit 'pattern [[:print:]] broken' +[[ $'\cg' =~ [[:print:]] ]] && err_exit 'pattern [[:print:]] broken' +[[ Z =~ [[:cntrl:]] ]] && err_exit 'pattern [[:cntrl:]] broken' +[[ ' ' =~ [[:cntrl:]] ]] && err_exit 'pattern [[:cntrl:]] broken' +[[ $'\cg' =~ [[:cntrl:]] ]] || err_exit 'pattern [[:cntrl:]] broken' +[[ \$ =~ [[:graph:]] ]] || err_exit 'pattern [[:graph:]] broken' +[[ ' ' =~ [[:graph:]] ]] && err_exit 'pattern [[:graph:]] broken' +[[ \$ =~ [[:punct:]] ]] || err_exit 'pattern [[:punct:]] broken' +[[ / =~ [[:punct:]] ]] || err_exit 'pattern [[:punct:]] broken' +[[ ' ' =~ [[:punct:]] ]] && err_exit 'pattern [[:punct:]] broken' +[[ x =~ [[:punct:]] ]] && err_exit 'pattern [[:punct:]] broken' +[[ ' ' =~ [[:xdigit:]] ]] && err_exit 'pattern [[:xdigit:]] broken' +[[ x =~ [[:xdigit:]] ]] && err_exit 'pattern [[:xdigit:]] broken' +[[ 0 =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ 9 =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ A =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ a =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ F =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ f =~ [[:xdigit:]] ]] || err_exit 'pattern [[:xdigit:]] broken' +[[ G =~ [[:xdigit:]] ]] && err_exit 'pattern [[:xdigit:]] broken' +[[ g =~ [[:xdigit:]] ]] && err_exit 'pattern [[:xdigit:]] broken' + +[[ 3 =~ \w ]] || err_exit 'pattern \w broken' +[[ y =~ \w ]] || err_exit 'pattern \w broken' +[[ / =~ \w ]] && err_exit 'pattern \w broken' +[[ 3 =~ \W ]] && err_exit 'pattern \w broken' +[[ y =~ \W ]] && err_exit 'pattern \w broken' +[[ / =~ \W ]] || err_exit 'pattern \w broken' +[[ . =~ \s ]] && err_exit 'pattern \s broken' +[[ X =~ \s ]] && err_exit 'pattern \s broken' +[[ ' ' =~ \s ]] || err_exit 'pattern \s broken' +[[ $'\t' =~ \s ]] || err_exit 'pattern \s broken' +[[ $'\v' =~ \s ]] || err_exit 'pattern \s broken' +[[ $'\f' =~ \s ]] || err_exit 'pattern \s broken' +[[ $'\n' =~ \s ]] || err_exit 'pattern \s broken' +[[ x =~ \d ]] && err_exit 'pattern \d broken' +[[ 9 =~ \d ]] || err_exit 'pattern \d broken' +[[ x =~ \D ]] || err_exit 'pattern \D broken' +[[ 9 =~ \D ]] && err_exit 'pattern \D broken' +[[ 7 =~ \b ]] || err_exit 'pattern \b broken' +[[ x =~ \b ]] || err_exit 'pattern \b broken' +[[ _ =~ \b ]] || err_exit 'pattern \b broken' +[[ + =~ \b ]] || err_exit 'pattern \b broken' +[[ 'x y ' =~ .\b.\b ]] || err_exit 'pattern \b broken' +[[ ' xy ' =~ .\b.\b ]] && err_exit 'pattern \b broken' +[[ 7 =~ \B ]] && err_exit 'pattern \B broken' +[[ x =~ \B ]] && err_exit 'pattern \B broken' +[[ _ =~ \B ]] && err_exit 'pattern \B broken' +[[ + =~ \B ]] || err_exit 'pattern \B broken' + +# ====== +# Tests backported from ksh93v- +function test_xmlfragment1 +{ + typeset -r testscript='test1_script.sh' +cat >"${testscript}" <<-TEST1SCRIPT + # memory safeguards to prevent out-of-control memory consumption + ulimit -M \$(( 1024 * 1024 )) + ulimit -v \$(( 1024 * 1024 )) + ulimit -d \$(( 1024 * 1024 )) + + # input text + xmltext="\$( < "\$1" )" + + print -f "%d characters to process...\\n" "\${#xmltext}" + + # + # parse the XML data + # + typeset dummy + function parse_xmltext + { + typeset xmltext="\$2" + nameref ar="\$1" + + # fixme: + # - We want to enforce standard conformance - does ~(Exp) or ~(Ex-p) does that ? + dummy="\${xmltext//~(Ex-p)(?: + ()+?| # xml comments + (<[:_[:alnum:]-]+ + (?: # attributes + [[:space:]]+ + (?: # four different types of name=value syntax + (?:[:_[:alnum:]-]+=[^\\"\\'[:space:]]+?)| #x='foo=bar huz=123' + (?:[:_[:alnum:]-]+=\\"[^\\"]*?\\")| #x='foo="ba=r o" huz=123' + (?:[:_[:alnum:]-]+=\\'[^\\']*?\\')| #x="foox huz=123" + (?:[:_[:alnum:]-]+) #x="foox huz=123" + ) + )* + [[:space:]]* + \\/? # start tags which are end tags, too (like ) + >)+?| # xml start tags + (<\\/[:_[:alnum:]-]+>)+?| # xml end tags + ([^<]+) # xml text + )/D}" + + # copy ".sh.match" to array "ar" + integer i j + for i in "\${!.sh.match[@]}" ; do + for j in "\${!.sh.match[i][@]}" ; do + [[ -v .sh.match[i][j] ]] && ar[i][j]="\${.sh.match[i][j]}" + done + done + + return 0 + } + + function rebuild_xml_and_verify + { + nameref ar="\$1" + typeset xtext="\$2" # xml text + + # + # rebuild the original text from "ar" (copy of ".sh.match") + # and compare it to the content of "xtext" + # + tmpfile=rebuild_xml_and_verify.\$\$ + + { + # rebuild the original text, based on our matches + nameref nodes_all=ar[0] # contains all matches + nameref nodes_comments=ar[1] # contains only XML comment matches + nameref nodes_start_tags=ar[2] # contains only XML start tag matches + nameref nodes_end_tags=ar[3] # contains only XML end tag matches + nameref nodes_text=ar[4] # contains only XML text matches + integer i + for (( i = 0 ; i < \${#nodes_all[@]} ; i++ )) ; do + [[ -v nodes_comments[i] ]] && printf '%s' "\${nodes_comments[i]}" + [[ -v nodes_start_tags[i] ]] && printf '%s' "\${nodes_start_tags[i]}" + [[ -v nodes_end_tags[i] ]] && printf '%s' "\${nodes_end_tags[i]}" + [[ -v nodes_text[i] ]] && printf '%s' "\${nodes_text[i]}" + done + printf '\\n' + } >"\${tmpfile}" + + diff -u <( printf '%s\\n' "\${xtext}") "\${tmpfile}" + if cmp <( printf '%s\\n' "\${xtext}") "\${tmpfile}" ; then + printf "#input and output OK (%d characters).\\n" "\$(wc -m <"\${tmpfile}")" + else + printf "#difference between input and output found.\\n" + fi + + rm -f "\${tmpfile}" + return 0 + } + + # main + set -o nounset + + typeset -a xar + parse_xmltext xar "\$xmltext" + rebuild_xml_and_verify xar "\$xmltext" +TEST1SCRIPT + +cat >'testfile1.xml' <<-EOF + + + &dhtitle; + &dhpackage; + &dhrelease; + &dhdate; + + + XXXX + YYYYYYYYYYYY + Wrote this example manpage for the "SunOS Man Page Howto", available at or . +
+ mailmail@YYYYYYYYYYYY.xxx +
+
+ + &dhfirstname; + &dhsurname; + Rewrote and extended the example manpage in DocBook XML for the Zebras distribution. +
+ &dhemail; +
+
+
+ + 1995 + 1996 + 1997 + 1998 + 1999 + 2000 + 2001 + 2002 + 2003 + XXXX YYYYYYYYYYYY + + + 2006 + &dhusername; + + + The Howto containing this example, was offered under the following conditions: + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +
+ + &dhucpackage; + &dhsection; + + + &dhpackage; + frobnicate the bar library + + + + &dhpackage; + + + + + + + + + + + + + + + this + + + + + + + + this + that + + + file(s) + + + &dhpackage; + + + + + + + + + + + + + + + + + + + DESCRIPTION + &dhpackage; frobnicates the bar library by tweaking internal symbol tables. By default it parses all baz segments and rearranges them in reverse order by time for the xyzzy1 linker to find them. The symdef entry is then compressed using the WBG (Whiz-Bang-Gizmo) algorithm. All files are processed in the order specified. + + + OPTIONS + + + + + + + Do not write busy to stdout while processing. + + + + + + + Use the alternate system wide config-file instead of the /etc/foo.conf. This overrides any FOOCONF environment variable. + + + + + + In addition to the baz segments, also parse the blurfl3 headers. + + + + + + Recursive mode. Operates as fast as lightning at the expense of a megabyte of virtual memory. + + + + + + FILES + + + /etc/foo.conf + + The system-wide configuration file. See foo.conf5 for further details. + + + + \${HOME}/.foo.conf + + The per-user configuration file. See foo.conf5 for further details. + + + + + + ENVIRONMENT + + + FOOCONF + + The full pathname for an alternate system wide configuration file foo.conf5 (see also ). Overridden by the option. + + + + + + DIAGNOSTICS + The following diagnostics may be issued on stderr: + + + Bad magic number. + + The input file does not look like an archive file. + + + + Old style baz segments. + + &dhpackage; can only handle new style baz segments. COBOL object libraries are not supported in this version. + + + + The following return codes can be used in scripts: + + Errorcode + Errortext + Diagnostic + + 0 + Program exited normally. + No error. Program ran successfully. + + + 1 + Bad magic number. + The input file does not look like an archive file. + + + 2 + Old style baz segments. + &dhpackage; can only handle new style baz segments. COBOL object libraries are not supported in this version. + + + + + + BUGS + The command name should have been chosen more carefully to reflect its purpose. + The upstreams BTS can be found at . + + + SEE ALSO + + + bar + 1 + , + foo + 1 + , + foo.conf + 5 + , + xyzzy + 1 + + The programs are documented fully by The Rise and Fall of a Fooish Bar available via the Info system. + +
+EOF + +# Note: Standalone '>' is valid XML text +printf "%s" $'

>

a text
More [TEXT].

' >'testfile2.xml' + + compound -r -a tests=( + ( + file='testfile1.xml' + expected_output=$'9764 characters to process...\n#input and output OK (9765 characters).' + ) + ( + file='testfile2.xml' + expected_output=$'201 characters to process...\n#input and output OK (202 characters).' + ) + ) + compound out=( typeset stdout stderr ; integer res ) + integer i + typeset expected_output + typeset testname + + for (( i=0 ; i < ${#tests[@]} ; i++ )) ; do + nameref tst=tests[i] + testname="${0}/${i}/${tst.file}" + expected_output="${tst.expected_output}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset "${testscript}" "${tst.file}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${expected_output}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${expected_output}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + done + + rm "${testscript}" + rm 'testfile1.xml' + rm 'testfile2.xml' + + return 0 +} + +# test whether the [[ -v .sh.match[x][y] ]] operator works, try1 +function test_testop_v1 +{ + compound out=( typeset stdout stderr ; integer res ) + integer i + typeset testname + typeset expected_output + + compound -r -a tests=( + ( + cmd='s="aaa bbb 333 ccc 555" ; s="${s//~(E)([[:alpha:]]+)|([[:digit:]]+)/NOP}" ; [[ -v .sh.match[2][3] ]] || print "OK"' + expected_output='OK' + ) + ( + cmd='s="aaa bbb 333 ccc 555" ; s="${s//~(E)([[:alpha:]]+)|([[:digit:]]+)/NOP}" ; integer i=2 j=3 ; [[ -v .sh.match[$i][$j] ]] || print "OK"' + expected_output='OK' + ) + ( + cmd='s="aaa bbb 333 ccc 555" ; s="${s//~(E)([[:alpha:]]+)|([[:digit:]]+)/NOP}" ; integer i=2 j=3 ; [[ -v .sh.match[i][j] ]] || print "OK"' + expected_output='OK' + ) + ) + + for (( i=0 ; i < ${#tests[@]} ; i++ )) ; do + nameref tst=tests[i] + testname="${0}/${i}/${tst.cmd}" + expected_output="${tst.expected_output}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${tst.cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${expected_output}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${expected_output}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + done + + return 0 +} + +# test whether the [[ -v .sh.match[x][y] ]] operator works, try2 +function test_testop_v2 +{ + compound out=( typeset stdout stderr ; integer res ) + integer i + integer j + integer j + typeset testname + typeset cmd + + compound -r -a tests=( + ( + cmd='s="aaa bbb 333 ccc 555" ; s="${s//~(E)([[:alpha:]]+)|([[:digit:]]+)/NOP}"' + integer y=6 + expected_output_1d=$'[0]\n[1]\n[2]' + expected_output_2d=$'[0][0]\n[0][1]\n[0][2]\n[0][3]\n[0][4]\n[1][0]\n[1][1]\n[1][3]\n[2][2]\n[2][4]' + ) + # FIXME: Add more hideous horror tests here + ) + + for (( i=0 ; i < ${#tests[@]} ; i++ )) ; do + nameref tst=tests[i] + + # + # test first dimension, by plain number + # + cmd="${tst.cmd}" + for (( j=0 ; j < tst.y ; j++ )) ; do + cmd+="; $( printf "[[ -v .sh.match[%d] ]] && print '[%d]'\n" j j )" + done + cmd+='; true' + + testname="${0}/${i}/plain_number_index_1d/${cmd}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${tst.expected_output_1d}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.expected_output_1d}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + + # + # test second dimension, by plain number + # + cmd="${tst.cmd}" + for (( j=0 ; j < tst.y ; j++ )) ; do + for (( k=0 ; k < tst.y ; k++ )) ; do + cmd+="; $( printf "[[ -v .sh.match[%d][%d] ]] && print '[%d][%d]'\n" j k j k )" + done + done + cmd+='; true' + + testname="${0}/${i}/plain_number_index_2d/${cmd}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${tst.expected_output_2d}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.expected_output_2d}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + # + # test first dimension, by variable index + # + cmd="${tst.cmd} ; integer i" + for (( j=0 ; j < tst.y ; j++ )) ; do + cmd+="; $( printf "(( i=%d )) ; [[ -v .sh.match[i] ]] && print '[%d]'\n" j j )" + done + cmd+='; true' + + testname="${0}/${i}/variable_index_1d/${cmd}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${tst.expected_output_1d}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.expected_output_1d}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + + # + # test second dimension, by variable index + # + cmd="${tst.cmd} ; integer i j" + for (( j=0 ; j < tst.y ; j++ )) ; do + for (( k=0 ; k < tst.y ; k++ )) ; do + cmd+="; $( printf "(( i=%d , j=%d )) ; [[ -v .sh.match[i][j] ]] && print '[%d][%d]'\n" j k j k )" + done + done + cmd+='; true' + + testname="${0}/${i}/variable_index_2d/${cmd}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${tst.expected_output_2d}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.expected_output_2d}" ;}, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + done + + return 0 +} + +# test whether ${#.sh.match[0][@]} returns the right number of elements +function test_num_elements1 +{ + compound out=( typeset stdout stderr ; integer res ) + integer i + typeset testname + typeset expected_output + + compound -r -a tests=( + ( + cmd='s="a1a2a3" ; d="${s//~(E)([[:alpha:]])|([[:digit:]])/dummy}" ; printf "num=%d\n" "${#.sh.match[0][@]}"' + expected_output='num=6' + ) + ( + cmd='s="ababab" ; d="${s//~(E)([[:alpha:]])|([[:digit:]])/dummy}" ; printf "num=%d\n" "${#.sh.match[0][@]}"' + expected_output='num=6' + ) + ( + cmd='s="123456" ; d="${s//~(E)([[:alpha:]])|([[:digit:]])/dummy}" ; printf "num=%d\n" "${#.sh.match[0][@]}"' + expected_output='num=6' + ) + ) + + for (( i=0 ; i < ${#tests[@]} ; i++ )) ; do + nameref tst=tests[i] + testname="${0}/${i}/${tst.cmd}" + expected_output="${tst.expected_output}" + + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset -c "${tst.cmd}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${expected_output}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${expected_output}" ; }, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + done + + return 0 +} + +# dgk's test which checks whether typeset -m (rename variable) works for .sh.match +function test_shmatch_varmove_dgk1 +{ + typeset out + # we use an array of $'...\n' here to get correct line numbers + typeset -r -a script=( + $'set -o nounset\n' + $'x=1234\n' + $'compound co\n' + $': "${x//~(X)([012])|([345])/ }"\n' + $'x="$(print -v .sh.match)"\n' + $'typeset -m co.array=.sh.match\n' + $'y="$(print -v co.array)"\n' + $'[[ "$y" == "$x" ]] && print "MATCH"\n' + +# fixme: this currently outputs as ${co.array[2][(null)]}, which isn't correct +# # added later by gisburn +# $'printf "%s" "${co.array[2][1]}"' + ) + + out="$(${SHELL} -c "${script[*]}" 2>&1 ; print -- "$?")" + + [[ "${out}" == $'MATCH\n0' ]] || err_exit "${0}: typeset -m of .sh.match to variable not working, expected 'MATCH', got ${ printf '%q\n' "${out}" ; }" + + return 0 +} + +function test_nomatch_dgk1 +{ +cat >'testscript1.sh' <<'EOF' + integer j k + compound c + compound -a c.attrs + + attrdata=$' x=\'1\' y=\'2\' z="3" end="world"' + dummy="${attrdata//~(Ex-p)(?: + [[:space:]]+ + ( # four different types of name=value syntax + (?:([:_[:alnum:]-]+)=([^\"\'[:space:]]+?))| #x='foo=bar huz=123' + (?:([:_[:alnum:]-]+)=\"([^\"]*?)\")| #x='foo="ba=r o" huz=123' + (?:([:_[:alnum:]-]+)=\'([^\']*?)\')| #x="foox huz=123" + (?:([:_[:alnum:]-]+)) #x="foox huz=123" + ) + )/D}" + for (( j=0 ; j < ${#.sh.match[0][@]} ; j++ )) + do + if [[ -v .sh.match[2][j] && -v .sh.match[3][j] ]] + then c.attrs+=( name="${.sh.match[2][j]}" value="${.sh.match[3][j]}" ) + fi + if [[ -v .sh.match[4][j] && -v .sh.match[5][j] ]] + then c.attrs+=( name="${.sh.match[4][j]}" value="${.sh.match[5][j]}" ) + fi + if [[ -v .sh.match[6][j] && -v .sh.match[7][j] ]] ; then + c.attrs+=( name="${.sh.match[6][j]}" value="${.sh.match[7][j]}" ) + fi + done + print -v c +EOF + expect='( + typeset -a attrs=( + [0]=( + name=x + value=1 + ) + [1]=( + name=y + value=2 + ) + [2]=( + name=z + value=3 + ) + [3]=( + name=end + value=world + ) + ) +)' + compound out=( typeset stdout stderr ; integer res ) + typeset testname + + # plain + testname="${0}/plain" + out.stderr="${ { out.stdout="${ ${SHELL} -o nounset 'testscript1.sh' ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${expect}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${expect}" ; }, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + # compiled + testname="${0}/compiled" + out.stderr="${ { out.stdout="${ ${SHCOMP} -n 'testscript1.sh' 'testscript1.shbin' ; ${SHELL} -o nounset 'testscript1.shbin' ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${expect}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${expect}" ; }, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + + rm 'testscript1.sh' 'testscript1.shbin' + + return 0 +} + +function test_sh_match_varmove2 +{ +cat >'testscript1.sh' <&1 ; }" + + [[ "${out.stdout}" == "${tst.output}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.output}" ; }, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + (( numtests++ )) + + # compiled + testname="${0}/${i}/${mode}/compiled" + out.stderr="${ { out.stdout="${ ${SHELL} 'testscript1.shbin' ${mode} "${tst.attrstr}" ; (( out.res=$? )) ; }" ; } 2>&1 ; }" + + [[ "${out.stdout}" == "${tst.output}" ]] || err_exit "${testname}: Expected stdout==${ printf '%q\n' "${tst.output}" ; }, got ${ printf '%q\n' "${out.stdout}" ; }" + [[ "${out.stderr}" == '' ]] || err_exit "${testname}: Expected empty stderr, got ${ printf '%q\n' "${out.stderr}" ; }" + (( out.res == 0 )) || err_exit "${testname}: Unexpected exit code ${out.res}" + (( numtests++ )) + done + done + + rm 'testscript1.sh' 'testscript1.shbin' + + # safeguard against malfunctions in the test chain + (( numtests == 40 )) || err_exit "${0}: Internal test script error, expected numtests == 40, got ${numtests}" + + return 0 +} + +# run tests +test_xmlfragment1 +test_testop_v1 +test_testop_v2 +test_num_elements1 +test_shmatch_varmove_dgk1 +test_sh_match_varmove2 +test_nomatch_dgk1 + +# ====== +set +u +x=1234 +compound co +: "${x//~(X)([012])|([345])/ }" +x=$(print -v .sh.match) +typeset -m co.array=.sh.match +y=$(print -v co.array) +[[ $y == "$x" ]] || 'typeset -m of .sh.match to variable not working' + +# ====== +# https://github.com/ksh93/ksh/issues/308 +exp='typeset -a .sh.match=((1 2 3 4) (1 2) ([2]=3 [3]=4) ) +typeset -a .sh.match[1]=(1 2) +typeset -a .sh.match[2]=([2]=3 [3]=4) +3 2 2 +2 3' +got=$("$SHELL" -c ' + x=1234 + true ${x//~(X)([012])|([345])/ } + typeset -p .sh.match .sh.match[1] .sh.match[2] + echo ${#.sh.match[@]} ${#.sh.match[1][@]} ${#.sh.match[2][@]} + echo ${!.sh.match[2][@]}; +') +[[ $exp == "$got" ]] || err_exit "listing .sh.match indexed array results doesn't work correctly" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" + +# https://marc.info/?l=ast-developers&m=134604855504311&w=2 +nummatches=$tmp/nummatches.sh +cat > "$nummatches" << 'EOF' +attrdata=$' aname=avalue ' + +dummy="${attrdata//~(Ex-p)(?: +[[:space:]]+ +( # four different types of name=value syntax + (?:([:_[:alnum:]-]+)=([^\"\'[:space:]]+?))| + (?:([:_[:alnum:]-]+)=\"([^\"]*?)\")| + (?:([:_[:alnum:]-]+)=\'([^\']*?)\')| + (?:([:_[:alnum:]-]+)) +) +)/D}" + +print -v .sh.match +print "Nummatches=${#.sh.match[0][@]}" +EOF +exp=$'( + ( + \' aname=a\' + ) + ( + aname\\=a + ) + ( + aname + ) + ( + a + ) +) +Nummatches=1' +got=$("$SHELL" "$nummatches") +[[ $exp == "$got" ]] || err_exit "Nummatches should be one" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" + +# https://marc.info/?l=ast-developers&m=134490505607093 +if ((SHOPT_NAMESPACE)); then + type_nameref=$tmp/ksh93v_typeset_T_nameref_fails001.sh + cat > "$type_nameref" << 'EOF' + namespace xmlfragmentparser + { + typeset -T parser_t=( + typeset -a data # "raw" data from .sh.match + compound -a context # parsed tag data + + function build_context + { + typeset dummy + typeset attrdata # data after "" "" ) + xd.build_context + print "$xd" + return 0 + } + + # main + set -o nounset + main +EOF + exp="( + typeset -a data=( + $'' + $'' + ) + typeset -C -a context=( + [0]=( + typeset -a attrs=( + [0]=( + name=x + value=1 + ) + [1]=( + name=y + value=2 + ) + ) + tagname=foo + ) + [1]=( + typeset -a attrs=( + [0]=( + name=a + value=1 + ) + [1]=( + name=b + value=2 + ) + ) + tagname=bar + ) + ) +)" + got=$("$SHELL" "$type_nameref") + [[ $exp == "$got" ]] || err_exit "Compound variable \$context is not printed with 'print -v'." \ + $'Diff follows:\n'"$(diff -u <(print -r -- "$exp") <(print -r -- "$got") | sed $'s/^/\t| /')" +fi + +# ====== +exit $((Errors<125?Errors:125)) diff --git a/src/cmd/ksh93/tests/substring.sh b/src/cmd/ksh93/tests/substring.sh index 737721ef5..1b64e143b 100755 --- a/src/cmd/ksh93/tests/substring.sh +++ b/src/cmd/ksh93/tests/substring.sh @@ -252,7 +252,7 @@ then err_exit '${var//+(\S)/Q} not workding' fi var=$($SHELL -c 'v=/vin:/usr/vin r=vin; : ${v//vin/${r//v/b}};typeset -p .sh.match') 2> /dev/null -((SHOPT_2DMATCH)) && exp='typeset -a .sh.match=((vin vin) )' || exp='typeset -a .sh.match=(vin)' +exp='typeset -a .sh.match=((vin vin) )' [[ $var == "$exp" ]] || err_exit '.sh.match not correct when replacement pattern contains a substring match' \ "(expected $(printf %q "$exp"), got $(printf %q "$var"))" @@ -683,5 +683,24 @@ Errors=$? # Test for a crash after unsetting ${.sh.match} then matching a pattern $SHELL -c 'unset .sh.match; [[ bar == ba* ]]' || err_exit 'crash after unsetting .sh.match then trying to match a pattern' +# Tests for ${.sh.match} backported from ksh93v- +unset v d +v=abbbc +d="${v/~(E)b{2,4}/dummy}" +[[ ${.sh.match} == bbb ]] || err_exit '.sh.match wrong after ${s/~(E)b{2,4}/dummy}' +[[ $d == adummyc ]] || err_exit '${s/~(E)b{2,4}/dummy} not working' + +x=1234 +: "${x//~(X)([012])|([345])/}" +[[ ${.sh.match[1][600..602]} ]] && err_exit '${.sh.match[0][600..602]} is not the empty string' + +: "${x//~(X)([012])|([345])/}" +x=$(print -v .sh.match) +compound co +typeset -m co.array=.sh.match +[[ $x == "$(print -v co.array)" ]] || err_exit 'typeset -m for .sh.match to compound variable not working (1)' +: "${x//~(X)([345])|([012])/}" +[[ $x == "$(print -v co.array)" ]] || err_exit 'typeset -m for .sh.match to compound variable not working (2)' + # ====== exit $((Errors<125?Errors:125)) diff --git a/src/cmd/ksh93/tests/treemove.sh b/src/cmd/ksh93/tests/treemove.sh index d14573bb1..48a5831a4 100755 --- a/src/cmd/ksh93/tests/treemove.sh +++ b/src/cmd/ksh93/tests/treemove.sh @@ -2,7 +2,7 @@ # # # This software is part of the ast package # # Copyright (c) 1982-2011 AT&T Intellectual Property # -# Copyright (c) 2020-2021 Contributors to ksh 93u+m # +# Copyright (c) 2020-2022 Contributors to ksh 93u+m # # and is licensed under the # # Eclipse Public License, Version 1.0 # # by AT&T Intellectual Property # @@ -152,4 +152,21 @@ then if [[ $(kill -l $exitval) == SEGV ]] else err_exit 'typeset -m "c.board[1][i]=el" gives wrong value' fi fi + +function f2 +{ + nameref mar=$1 exp=$2 + typeset dummy x="-1a2b3c4d9u" + dummy="${x//~(E)([[:digit:]])|([[:alpha:]])/D}" + exp=${ print -v .sh.match;} + typeset -m "mar=.sh.match" +} +function f1 +{ + typeset matchar exp + f2 matchar exp + [[ ${ print -v matchar;} == "$exp" ]] || err_exit 'moving .sh.match to a function local variable using a name reference fails' +} +f1 + exit $((Errors<125?Errors:125)) diff --git a/src/cmd/ksh93/tests/types.sh b/src/cmd/ksh93/tests/types.sh index a9616044f..abfd1c7da 100755 --- a/src/cmd/ksh93/tests/types.sh +++ b/src/cmd/ksh93/tests/types.sh @@ -552,7 +552,8 @@ compound -a b.ca b_t b.ca[4].b exp='typeset -C b=(typeset -C -a ca=( [4]=(b_t b=(a_t b=(a=hello))));)' got=$(typeset -p b) -[[ $got == "$exp" ]] || err_exit 'typeset -p of nested type not correct' +[[ $got == "$exp" ]] || err_exit 'typeset -p of nested type not correct' \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" typeset -T u_t=( integer dummy diff --git a/src/lib/libast/features/api b/src/lib/libast/features/api index edaf10f05..1daf15f43 100644 --- a/src/lib/libast/features/api +++ b/src/lib/libast/features/api @@ -1,8 +1,8 @@ iff AST_API -ver ast 20220201 +ver ast 20220208 -api ast 20120528 regexec regnexec regrexec regsubexec strgrpmatch +api ast 20120528 regexec regnexec regrexec regsubexec api ast 20120411 cmdopen diff --git a/src/lib/libast/features/map.c b/src/lib/libast/features/map.c index aa654da81..c1efe7885 100644 --- a/src/lib/libast/features/map.c +++ b/src/lib/libast/features/map.c @@ -2,7 +2,7 @@ * * * This software is part of the ast package * * Copyright (c) 1985-2011 AT&T Intellectual Property * -* Copyright (c) 2020-2021 Contributors to ksh 93u+m * +* Copyright (c) 2020-2022 Contributors to ksh 93u+m * * and is licensed under the * * Eclipse Public License, Version 1.0 * * by AT&T Intellectual Property * @@ -322,6 +322,8 @@ main() printf("#define strgid _ast_strgid\n"); printf("#undef strgrpmatch\n"); printf("#define strgrpmatch _ast_strgrpmatch\n"); + printf("#undef strngrpmatch\n"); + printf("#define strngrpmatch _ast_strngrpmatch\n"); printf("#undef strhash\n"); printf("#define strhash _ast_strhash\n"); printf("#undef strkey\n"); diff --git a/src/lib/libast/include/ast.h b/src/lib/libast/include/ast.h index e1348665e..c435c3730 100644 --- a/src/lib/libast/include/ast.h +++ b/src/lib/libast/include/ast.h @@ -169,11 +169,12 @@ typedef struct * strgrpmatch() flags */ -#define STR_MAXIMAL 01 /* maximal match */ -#define STR_LEFT 02 /* implicit left anchor */ -#define STR_RIGHT 04 /* implicit right anchor */ -#define STR_ICASE 010 /* ignore case */ -#define STR_GROUP 020 /* (|&) inside [@|&](...) only */ +#define STR_MAXIMAL 0x01 /* maximal match */ +#define STR_LEFT 0x02 /* implicit left anchor */ +#define STR_RIGHT 0x04 /* implicit right anchor */ +#define STR_ICASE 0x08 /* ignore case */ +#define STR_GROUP 0x10 /* (|&) inside [@|&](...) only */ +#define STR_INT 0x20 /* int* match array */ /* * fmtquote() flags @@ -371,8 +372,8 @@ extern int strexp(char*, int); extern long streval(const char*, char**, long(*)(const char*, char**)); extern long strexpr(const char*, char**, long(*)(const char*, char**, void*), void*); extern int strgid(const char*); -extern int strgrpmatch(const char*, const char*, int*, int, int); -extern int strgrpmatch_20120528(const char*, const char*, ssize_t*, int, int); +extern int strgrpmatch(const char*, const char*, ssize_t*, int, int); +extern int strngrpmatch(const char*, size_t, const char*, ssize_t*, int, int); extern unsigned int strhash(const char*); extern void* strlook(const void*, size_t, const char*); extern int strmatch(const char*, const char*); diff --git a/src/lib/libast/man/astsa.3 b/src/lib/libast/man/astsa.3 index 450a7ec5f..cf5ff6620 100644 --- a/src/lib/libast/man/astsa.3 +++ b/src/lib/libast/man/astsa.3 @@ -124,7 +124,7 @@ Returns the error message text corresponding to the \fIerrno\fP. .Ss "char* strerror(int \fIerrno\fP)" Equivalent to fmterror(\fIerrno\fP). -.Ss "int strgrpmatch(const char* \fIstring\fP, const char* \fIpattern\fP, int* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)" +.Ss "int strgrpmatch(const char* \fIstring\fP, const char* \fIpattern\fP, ssize_t* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)" Matches the null-terminated \fIstring\fP against the null-terminated .BR ksh (1) augmented \fIpattern\fP. @@ -148,8 +148,6 @@ Ignore case. .Tp \f3STR_GROUP\fP: (|&) inside [@|*|+{n,m}](...) only. -.Ss "int strmatch(const char* \fIstring\fP, const char* \fIpattern\fP, int* \fIsub\fP, int \fInsub\fP, int \fIflags\fP)" -Equivalent to strgrpmatch(\fIstring\fP,\fIpattern\fP,0,0,STR_MAXIMAL|STR_LEFT|STR_RIGHT). .SH "SEE ALSO" .BR ast (3), .BR ccode (3), diff --git a/src/lib/libast/string/strmatch.c b/src/lib/libast/string/strmatch.c index 18c52e3e6..c4af30823 100644 --- a/src/lib/libast/string/strmatch.c +++ b/src/lib/libast/string/strmatch.c @@ -2,7 +2,7 @@ * * * This software is part of the ast package * * Copyright (c) 1985-2012 AT&T Intellectual Property * -* Copyright (c) 2020-2021 Contributors to ksh 93u+m * +* Copyright (c) 2020-2022 Contributors to ksh 93u+m * * and is licensed under the * * Eclipse Public License, Version 1.0 * * by AT&T Intellectual Property * @@ -64,8 +64,6 @@ static struct State_s int nmatch; } matchstate; -#define STR_INT 040000 - /* * subgroup match * 0 returned if no match @@ -77,7 +75,7 @@ static struct State_s */ int -strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flags) +strngrpmatch(const char* b, size_t z, const char* p, ssize_t* sub, int n, register int flags) { register regex_t* re; register ssize_t* end; @@ -140,7 +138,7 @@ strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flag return 0; matchstate.nmatch = n; } - if (regexec(re, b, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE))) + if (regnexec(re, b, z, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE))) return 0; if (!sub || n <= 0) return 1; @@ -176,7 +174,7 @@ strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, register int flag int strmatch(const char* s, const char* p) { - return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT); + return strngrpmatch(s, s ? strlen(s) : 0, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT); } /* @@ -192,7 +190,7 @@ strsubmatch(const char* s, const char* p, int flags) { ssize_t match[2]; - return strgrpmatch(s, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0; + return strngrpmatch(s, s ? strlen(s) : 0, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0; } #undef strgrpmatch @@ -201,7 +199,7 @@ strsubmatch(const char* s, const char* p, int flags) #endif int -strgrpmatch(const char* b, const char* p, int* sub, int n, int flags) +strgrpmatch(const char* b, const char* p, ssize_t* sub, int n, int flags) { - return strgrpmatch_20120528(b, p, (ssize_t*)sub, n, flags|STR_INT); + return strngrpmatch(b, b ? strlen(b) : 0, p, sub, n, flags|STR_INT); }