1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00
cde/src/lib/libast/regex/regrexec.c
Martijn Dekker de511cfbc2 libast: regex: re-backport robustness improvements from 93v- beta
That intermittent regression test failure in types.sh seems to be
gone. So let's reimport the regex changes into the 1.0 branch to
subject them to wider testing and make sure any failures stay gone.
(re: 48568476, 38aab428, 1aa8f771)

[Original commit message from 1aa8f771 follows]

There are two main changes:

1. The regex code now creates and uses its own stack (env->mst)
   instead of using the shared standard stack (stkstd). That seems
   likely to be a good thing.

2. Missing mbinit() calls were inserted. The 93v- code uses a
   completely different multibyte characters API, so these needed
   to be translated back to the older API. But, as mbinit() is no
   longer a no-op as of 300cd199, these calls do stop things from
   breaking if a previous operation is interrupted mid-character.

I think there might be a couple of off-by-one errors fixed as well,
as there are two instances of this change:

-               while ((index += skip[buf[index]]) < mid);
+               while (index < mid)
+                       index += skip[buf[index]];
2021-12-28 22:24:41 +00:00

182 lines
4.9 KiB
C

/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2013 AT&T Intellectual Property *
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.eclipse.org/org/documents/epl-v10.html *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
/*
* POSIX regex record executor
* multiple record sized-buffer interface
*/
#include "reglib.h"
/*
* call regnexec() on records selected by Boyer-Moore
*/
int
regrexec_20120528(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
{
register unsigned char* buf;
register unsigned char* beg;
register unsigned char* l;
register unsigned char* r;
register unsigned char* x;
register size_t* skip;
register size_t* fail;
register Bm_mask_t** mask;
register size_t index;
register ssize_t n;
unsigned char* end;
size_t mid;
int complete;
int exactlen;
int leftlen;
int rightlen;
int inv;
Bm_mask_t m;
Env_t* env;
Rex_t* e;
if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
return REG_BADPAT;
inv = (flags & REG_INVERT) != 0;
buf = beg = (unsigned char*)s;
end = buf + len;
mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
skip = e->re.bm.skip;
fail = e->re.bm.fail;
mask = e->re.bm.mask;
complete = e->re.bm.complete && !nmatch;
exactlen = e->re.bm.size;
leftlen = e->re.bm.left + exactlen;
rightlen = exactlen + e->re.bm.right;
index = leftlen++;
for (;;)
{
while (index < mid)
index += skip[buf[index]];
if (index < HIT)
goto impossible;
index -= HIT;
m = mask[n = exactlen - 1][buf[index]];
do
{
if (!n--)
goto possible;
} while (m &= mask[n][buf[--index]]);
if ((index += fail[n + 1]) < len)
continue;
impossible:
if (inv)
{
l = r = buf + len;
goto invert;
}
n = 0;
goto done;
possible:
r = (l = buf + index) + exactlen;
while (l > beg)
if (*--l == sep)
{
l++;
break;
}
if ((r - l) < leftlen)
goto spanned;
while (r < end && *r != sep)
r++;
if ((r - (buf + index)) < rightlen)
goto spanned;
if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
{
if (inv)
{
invert:
x = beg;
while (beg < l)
{
while (x < l && *x != sep)
x++;
if (n = (*record)(handle, (char*)beg, x - beg))
goto done;
beg = ++x;
}
}
else if (n = (*record)(handle, (char*)l, r - l))
goto done;
if ((index = (r - buf) + leftlen) >= len)
{
n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
goto done;
}
beg = r + 1;
}
else if (n != REG_NOMATCH)
goto done;
else
{
spanned:
if ((index += exactlen) >= mid)
goto impossible;
}
}
done:
env->rex = e;
return n;
}
/*
* 20120528: regoff_t changed from int to ssize_t
*/
#if defined(__EXPORT__)
#define extern __EXPORT__
#endif
#undef regrexec
#if _map_libc
#define regrexec _ast_regrexec
#endif
extern int
regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags, int sep, void* handle, regrecord_t record)
{
if (oldmatch)
{
regmatch_t* match;
ssize_t i;
int r;
if (!(match = oldof(0, regmatch_t, nmatch, 0)))
return -1;
if (!(r = regrexec_20120528(p, s, len, nmatch, match, flags, sep, handle, record)))
for (i = 0; i < nmatch; i++)
{
oldmatch[i].rm_so = match[i].rm_so;
oldmatch[i].rm_eo = match[i].rm_eo;
}
free(match);
return r;
}
return regrexec_20120528(p, s, len, 0, NiL, flags, sep, handle, record);
}