1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00
cde/src/lib/libast/regex/regcoll.c
Martijn Dekker 1aa8f771d8 libast: regex: backport robustness improvements from 93v- beta
There are two main changes:

1. The regex code now creates and uses its own stack (env->mst)
   instead of using the shared standard stack (stkstd). That seems
   likely to be a good thing.

2. Missing mbinit() calls were inserted. The 93v- code uses a
   completely different multibyte characters API, so these needed
   to be translated back to the older API. But, as mbinit() is no
   longer a no-op as of 300cd199, these calls do stop things from
   breaking if a previous operation is interrupted mid-character.

I think there might be a couple of off-by-one errors fixed as well,
as there are two instances of this change:

-		while ((index += skip[buf[index]]) < mid);
+		while (index < mid)
+			index += skip[buf[index]];
2021-12-15 00:50:59 +01:00

122 lines
3.3 KiB
C

/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2012 AT&T Intellectual Property *
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.eclipse.org/org/documents/epl-v10.html *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* regex collation symbol support
*/
#include "reglib.h"
/*
* return the collating symbol delimited by [c c], where c is either '=' or '.'
* s points to the first char after the initial [
* if e!=0 it is set to point to the next char in s on return
*
* the collating symbol is converted to multibyte in <buf,size>
* the return value is:
* -1 syntax error / invalid collating element
* >=0 size with 0-terminated mb character (*wc != 0)
* or collating element (*wc == 0) in buf
*/
int
regcollate(register const char* s, char** e, char* buf, size_t size, wchar_t* wc)
{
register int c;
register char* b;
register char* x;
const char* t;
int i;
int r;
int term;
wchar_t w;
char xfm[256];
char tmp[sizeof(xfm)];
if (size < 2 || (term = *s) != '.' && term != '=' || !*++s || *s == term && *(s + 1) == ']')
goto nope;
t = s;
mbinit();
w = mbchar(s);
if ((r = (s - t)) > 1)
{
if (*s++ != term || *s++ != ']')
goto oops;
goto done;
}
if (*s == term && *(s + 1) == ']')
{
s += 2;
goto done;
}
b = buf;
x = buf + size - 2;
s = t;
for (;;)
{
if (!(c = *s++))
goto oops;
if (c == term)
{
if (!(c = *s++))
goto oops;
if (c != term)
{
if (c != ']')
goto oops;
break;
}
}
if (b < x)
*b++ = c;
}
r = s - t - 2;
w = 0;
if (b >= x)
goto done;
*b = 0;
for (i = 0; i < r && i < sizeof(tmp) - 1; i++)
tmp[i] = '0';
tmp[i] = 0;
if (mbxfrm(xfm, buf, sizeof(xfm)) >= mbxfrm(xfm, tmp, sizeof(xfm)))
goto nope;
t = (const char*)buf;
done:
if (r <= size && (char*)t != buf)
{
memcpy(buf, t, r);
if (r < size)
buf[r] = 0;
}
if (wc)
*wc = w;
if (e)
*e = (char*)s;
return r;
oops:
s--;
nope:
if (e)
*e = (char*)s;
return -1;
}