1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00

libast: regex: backport robustness improvements from 93v- beta

There are two main changes:

1. The regex code now creates and uses its own stack (env->mst)
   instead of using the shared standard stack (stkstd). That seems
   likely to be a good thing.

2. Missing mbinit() calls were inserted. The 93v- code uses a
   completely different multibyte characters API, so these needed
   to be translated back to the older API. But, as mbinit() is no
   longer a no-op as of 300cd199, these calls do stop things from
   breaking if a previous operation is interrupted mid-character.

I think there might be a couple of off-by-one errors fixed as well,
as there are two instances of this change:

-		while ((index += skip[buf[index]]) < mid);
+		while (index < mid)
+			index += skip[buf[index]];
This commit is contained in:
Martijn Dekker 2021-12-15 00:41:46 +01:00
parent 7c30a59e25
commit 1aa8f771d8
8 changed files with 89 additions and 60 deletions

View file

@ -1,7 +1,7 @@
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2011 AT&T Intellectual Property *
* Copyright (c) 1985-2013 AT&T Intellectual Property *
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
@ -70,7 +70,7 @@ detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
}
static int
decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
decomp(register Rex_t* e, Rex_t* parent, Sfio_t* sp, int type, int delimiter, regflags_t flags)
{
Rex_t* q;
unsigned char* s;
@ -92,10 +92,10 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
switch (e->type)
{
case REX_ALT:
if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.binary.left, e, sp, type, delimiter, flags))
return 1;
sfputc(sp, '|');
if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, e, sp, type, delimiter, flags))
return 1;
break;
case REX_BACK:
@ -141,11 +141,13 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
sfputc(sp, '?');
else
c = 0;
if (e->re.group.expr.rex && e->re.group.expr.rex->type == REX_GROUP)
c = 0;
}
switch (e->type)
{
case REX_REP:
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.rex, e, sp, type, delimiter, flags))
return 1;
break;
case REX_CLASS:
@ -322,7 +324,7 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
case REX_NEG:
if (type >= SRE)
sfprintf(sp, "!(");
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.rex, e, sp, type, delimiter, flags))
return 1;
if (type >= SRE)
sfputc(sp, ')');
@ -330,17 +332,17 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
sfputc(sp, '!');
break;
case REX_CONJ:
if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.binary.left, e, sp, type, delimiter, flags))
return 1;
sfputc(sp, '&');
if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.binary.right, e, sp, type, delimiter, flags))
return 1;
break;
case REX_GROUP:
if (type >= SRE)
if (type >= SRE && parent->type != REX_REP)
sfputc(sp, '@');
meta(sp, '(', type, 1, delimiter);
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.rex, e, sp, type, delimiter, flags))
return 1;
meta(sp, ')', type, 1, delimiter);
break;
@ -350,22 +352,22 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
case REX_GROUP_BEHIND_NOT:
meta(sp, '(', type, 1, delimiter);
sfputc(sp, '?');
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.rex, e, sp, type, delimiter, flags))
return 1;
meta(sp, ')', type, 1, delimiter);
break;
case REX_GROUP_COND:
meta(sp, '(', type, 1, delimiter);
sfputc(sp, '?');
if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, e, sp, type, delimiter, flags))
return 1;
if (q = e->re.group.expr.binary.right)
{
sfputc(sp, ':');
if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, q, sp, type, delimiter, flags))
return 1;
sfputc(sp, ':');
if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, q, sp, type, delimiter, flags))
return 1;
}
meta(sp, ')', type, 1, delimiter);
@ -373,7 +375,7 @@ decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
case REX_GROUP_CUT:
meta(sp, '(', type, 1, delimiter);
sfputc(sp, '?');
if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
if (decomp(e->re.group.expr.rex, e, sp, type, delimiter, flags))
return 1;
meta(sp, ')', type, 1, delimiter);
break;
@ -430,7 +432,7 @@ regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
}
else
delimiter = -1;
if (decomp(p->env->rex, sp, type, delimiter, flags))
if (decomp(p->env->rex, p->env->rex, sp, type, delimiter, flags))
r = 0;
else
{