mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
That intermittent regression test failure in types.sh seems to be gone. So let's reimport the regex changes into the 1.0 branch to subject them to wider testing and make sure any failures stay gone. (re:48568476,38aab428,1aa8f771) [Original commit message from1aa8f771follows] There are two main changes: 1. The regex code now creates and uses its own stack (env->mst) instead of using the shared standard stack (stkstd). That seems likely to be a good thing. 2. Missing mbinit() calls were inserted. The 93v- code uses a completely different multibyte characters API, so these needed to be translated back to the older API. But, as mbinit() is no longer a no-op as of300cd199, these calls do stop things from breaking if a previous operation is interrupted mid-character. I think there might be a couple of off-by-one errors fixed as well, as there are two instances of this change: - while ((index += skip[buf[index]]) < mid); + while (index < mid) + index += skip[buf[index]];
182 lines
4.9 KiB
C
182 lines
4.9 KiB
C
/***********************************************************************
|
|
* *
|
|
* This software is part of the ast package *
|
|
* Copyright (c) 1985-2013 AT&T Intellectual Property *
|
|
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
|
|
* and is licensed under the *
|
|
* Eclipse Public License, Version 1.0 *
|
|
* by AT&T Intellectual Property *
|
|
* *
|
|
* A copy of the License is available at *
|
|
* http://www.eclipse.org/org/documents/epl-v10.html *
|
|
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
|
|
* *
|
|
* Information and Software Systems Research *
|
|
* AT&T Research *
|
|
* Florham Park NJ *
|
|
* *
|
|
* Glenn Fowler <gsf@research.att.com> *
|
|
* David Korn <dgk@research.att.com> *
|
|
* Phong Vo <kpv@research.att.com> *
|
|
* *
|
|
***********************************************************************/
|
|
|
|
/*
|
|
* POSIX regex record executor
|
|
* multiple record sized-buffer interface
|
|
*/
|
|
|
|
#include "reglib.h"
|
|
|
|
/*
|
|
* call regnexec() on records selected by Boyer-Moore
|
|
*/
|
|
|
|
int
|
|
regrexec_20120528(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
|
|
{
|
|
register unsigned char* buf;
|
|
register unsigned char* beg;
|
|
register unsigned char* l;
|
|
register unsigned char* r;
|
|
register unsigned char* x;
|
|
register size_t* skip;
|
|
register size_t* fail;
|
|
register Bm_mask_t** mask;
|
|
register size_t index;
|
|
register ssize_t n;
|
|
unsigned char* end;
|
|
size_t mid;
|
|
int complete;
|
|
int exactlen;
|
|
int leftlen;
|
|
int rightlen;
|
|
int inv;
|
|
Bm_mask_t m;
|
|
Env_t* env;
|
|
Rex_t* e;
|
|
|
|
if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
|
|
return REG_BADPAT;
|
|
inv = (flags & REG_INVERT) != 0;
|
|
buf = beg = (unsigned char*)s;
|
|
end = buf + len;
|
|
mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
|
|
skip = e->re.bm.skip;
|
|
fail = e->re.bm.fail;
|
|
mask = e->re.bm.mask;
|
|
complete = e->re.bm.complete && !nmatch;
|
|
exactlen = e->re.bm.size;
|
|
leftlen = e->re.bm.left + exactlen;
|
|
rightlen = exactlen + e->re.bm.right;
|
|
index = leftlen++;
|
|
for (;;)
|
|
{
|
|
while (index < mid)
|
|
index += skip[buf[index]];
|
|
if (index < HIT)
|
|
goto impossible;
|
|
index -= HIT;
|
|
m = mask[n = exactlen - 1][buf[index]];
|
|
do
|
|
{
|
|
if (!n--)
|
|
goto possible;
|
|
} while (m &= mask[n][buf[--index]]);
|
|
if ((index += fail[n + 1]) < len)
|
|
continue;
|
|
impossible:
|
|
if (inv)
|
|
{
|
|
l = r = buf + len;
|
|
goto invert;
|
|
}
|
|
n = 0;
|
|
goto done;
|
|
possible:
|
|
r = (l = buf + index) + exactlen;
|
|
while (l > beg)
|
|
if (*--l == sep)
|
|
{
|
|
l++;
|
|
break;
|
|
}
|
|
if ((r - l) < leftlen)
|
|
goto spanned;
|
|
while (r < end && *r != sep)
|
|
r++;
|
|
if ((r - (buf + index)) < rightlen)
|
|
goto spanned;
|
|
if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
|
|
{
|
|
if (inv)
|
|
{
|
|
invert:
|
|
x = beg;
|
|
while (beg < l)
|
|
{
|
|
while (x < l && *x != sep)
|
|
x++;
|
|
if (n = (*record)(handle, (char*)beg, x - beg))
|
|
goto done;
|
|
beg = ++x;
|
|
}
|
|
}
|
|
else if (n = (*record)(handle, (char*)l, r - l))
|
|
goto done;
|
|
if ((index = (r - buf) + leftlen) >= len)
|
|
{
|
|
n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
|
|
goto done;
|
|
}
|
|
beg = r + 1;
|
|
}
|
|
else if (n != REG_NOMATCH)
|
|
goto done;
|
|
else
|
|
{
|
|
spanned:
|
|
if ((index += exactlen) >= mid)
|
|
goto impossible;
|
|
}
|
|
}
|
|
done:
|
|
env->rex = e;
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* 20120528: regoff_t changed from int to ssize_t
|
|
*/
|
|
|
|
#if defined(__EXPORT__)
|
|
#define extern __EXPORT__
|
|
#endif
|
|
|
|
#undef regrexec
|
|
#if _map_libc
|
|
#define regrexec _ast_regrexec
|
|
#endif
|
|
|
|
extern int
|
|
regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags, int sep, void* handle, regrecord_t record)
|
|
{
|
|
if (oldmatch)
|
|
{
|
|
regmatch_t* match;
|
|
ssize_t i;
|
|
int r;
|
|
|
|
if (!(match = oldof(0, regmatch_t, nmatch, 0)))
|
|
return -1;
|
|
if (!(r = regrexec_20120528(p, s, len, nmatch, match, flags, sep, handle, record)))
|
|
for (i = 0; i < nmatch; i++)
|
|
{
|
|
oldmatch[i].rm_so = match[i].rm_so;
|
|
oldmatch[i].rm_eo = match[i].rm_eo;
|
|
}
|
|
free(match);
|
|
return r;
|
|
}
|
|
return regrexec_20120528(p, s, len, 0, NiL, flags, sep, handle, record);
|
|
}
|