1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00

doc2sdl: use POSIX regex functions.

This commit is contained in:
Liang Chang 2021-12-21 01:53:53 +08:00
parent 73078fb103
commit 1e9b4660ad
28 changed files with 31 additions and 2371 deletions

View file

@ -1,11 +1,12 @@
MAINTAINERCLEANFILES = Makefile.in MAINTAINERCLEANFILES = Makefile.in
pkglibexec_PROGRAMS = instant include ../doc2sdl.am
instant_CFLAGS = -I../lib/tptregexp $(DT_INCDIR) doc2sdllibexec_PROGRAMS = instant
instant_LDADD = $(DTCLIENTLIBS) $(XTOOLLIB) -L../lib/tptregexp \ instant_CFLAGS = $(DT_INCDIR)
-ltptregexp
instant_LDADD = $(DTCLIENTLIBS) $(XTOOLLIB)
instant_CFLAGS += @TCL_INCLUDE_SPEC@ instant_CFLAGS += @TCL_INCLUDE_SPEC@
instant_LDADD += @TCL_LIB_SPEC@ instant_LDADD += @TCL_LIB_SPEC@

View file

@ -76,7 +76,6 @@ static char *RCSid =
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <tptregexp.h>
#include "general.h" #include "general.h"
#include "translate.h" #include "translate.h"

View file

@ -67,7 +67,6 @@ static char *RCSid =
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <tptregexp.h>
#include "general.h" #include "general.h"
#include "translate.h" #include "translate.h"
@ -262,7 +261,7 @@ RememberTransSpec(
if (do_regex) { if (do_regex) {
t->depth = MAX_DEPTH; t->depth = MAX_DEPTH;
if (!(t->context_re=tpt_regcomp(t->context))) { if (regcomp(&t->context_re, t->context, 0)) {
fprintf(stderr, "Regex error in Context: %s\n", t->context); fprintf(stderr, "Regex error in Context: %s\n", t->context);
} }
} }
@ -300,16 +299,15 @@ RememberTransSpec(
else { /* value not found */ else { /* value not found */
t->attpair[i].val = "."; t->attpair[i].val = ".";
} }
if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) { if (regcomp(&t->attpair[i].rex, t->attpair[i].val, 0)) {
fprintf(stderr, "Regex error in AttValue: %s %s\n", fprintf(stderr, "Regex error in AttValue: %s %s\n",
t->attpair[i].name, t->attpair[i].val); t->attpair[i].name, t->attpair[i].val);
} }
} }
/* Compile regular expression for content */ /* Compile regular expression for content */
t->content_re = 0;
if (t->content) { if (t->content) {
if (!(t->content_re=tpt_regcomp(t->content))) if (regcomp(&t->content_re, t->content, 0))
fprintf(stderr, "Regex error in Content: %s\n", fprintf(stderr, "Regex error in Content: %s\n",
t->content); t->content);
} }

View file

@ -64,7 +64,6 @@ static char *RCSid =
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <tptregexp.h>
#include "general.h" #include "general.h"
#define STORAGE #define STORAGE
#include "translate.h" #include "translate.h"
@ -137,8 +136,15 @@ DoTranslate(
* program is normally done at this point anyway. */ * program is normally done at this point anyway. */
for (t=TrSpecs; t; ) { for (t=TrSpecs; t; ) {
tn = t->next; tn = t->next;
regfree(&t->context_re);
regfree(&t->content_re);
for (int i = 0; i < t->nattpairs; ++i) regfree(&t->attpair[i].rex);
/* free the contents of t here ... */ /* free the contents of t here ... */
(void)free((void* )t); (void)free((void* )t);
t = tn; t = tn;
} }
TrSpecs = 0; TrSpecs = 0;
@ -268,7 +274,7 @@ ExpandVariables(
if ( modifier && *modifier == 'l' ) { if ( modifier && *modifier == 'l' ) {
while (*s) { while (*s) {
*op = tolower(*s); *op = tolower(*s);
op++, *s++; op++, s++;
} }
} else } else
while (*s) *op++ = *s++; while (*s) *op++ = *s++;
@ -470,19 +476,15 @@ FindTrans(
if (t->context) { /* no context specified -> a match */ if (t->context) { /* no context specified -> a match */
FindContext(e, t->depth, context); FindContext(e, t->depth, context);
/* If reg expr set, do regex compare; else just string compare. */ if (regexec(&t->context_re, context, 0, NULL, 0)) continue;
if (t->context_re) {
if (! tpt_regexec(t->context_re, context)) continue;
}
else {
/* Is depth of spec deeper than element's depth? */
if (t->depth > e->depth) continue;
/* See if context of element matches "context" of transpec */ /* Is depth of spec deeper than element's depth? */
match = ( (t->context[0] == context[0]) && if (t->depth > e->depth) continue;
/* See if context of element matches "context" of transpec */
match = ( (t->context[0] == context[0]) &&
!strcmp(t->context, context) ); !strcmp(t->context, context) );
if (!match) continue; if (!match) continue;
}
} }
/* Check attributes. Loop through list, comparing each. */ /* Check attributes. Loop through list, comparing each. */
@ -492,7 +494,8 @@ FindTrans(
match = 0; match = 0;
break; break;
} }
if (!tpt_regexec(t->attpair[a].rex, atval)) match = 0; if (regexec(&t->attpair[a].rex, atval, 0, NULL, 0))
match = 0;
} }
if (!match) continue; if (!match) continue;
} }
@ -551,7 +554,7 @@ FindTrans(
/* check content */ /* check content */
if (t->content) { /* no att specified -> a match */ if (t->content) { /* no att specified -> a match */
for (match=0,i=0; i<e->ndcont; i++) { for (match=0,i=0; i<e->ndcont; i++) {
if (tpt_regexec(t->content_re, e->dcont[i])) { if (!regexec(&t->content_re, e->dcont[i], 0, NULL, 0)) {
match = 1; match = 1;
break; break;
} }

View file

@ -48,6 +48,8 @@
* ________________________________________________________________________ * ________________________________________________________________________
*/ */
#include <regex.h>
#ifdef STORAGE #ifdef STORAGE
#ifndef lint #ifndef lint
static char *tr_h_RCSid = static char *tr_h_RCSid =
@ -73,7 +75,7 @@ typedef enum { CONT_CONTINUE, CONT_BREAK } ContParse_t;
typedef struct { typedef struct {
char *name; /* attribute name string */ char *name; /* attribute name string */
char *val; /* attribute value string */ char *val; /* attribute value string */
regexp *rex; /* attribute value reg expr (compiled) */ regex_t rex; /* attribute value reg expr (compiled) */
} AttPair_t; } AttPair_t;
typedef struct _Trans { typedef struct _Trans {
@ -81,14 +83,14 @@ typedef struct _Trans {
char *gi; /* element name of tag under consideration */ char *gi; /* element name of tag under consideration */
char **gilist; /* list of element names (multiple gi's) */ char **gilist; /* list of element names (multiple gi's) */
char *context; /* context in tree - looking depth levels up */ char *context; /* context in tree - looking depth levels up */
regexp *context_re; /* tree hierarchy looking depth levels up */ regex_t context_re; /* tree hierarchy looking depth levels up */
int depth; /* number of levels to look up the tree */ int depth; /* number of levels to look up the tree */
AttPair_t *attpair; /* attr name-value pairs */ AttPair_t *attpair; /* attr name-value pairs */
int nattpairs; /* number of name-value pairs */ int nattpairs; /* number of name-value pairs */
char *parent; /* GI has this element as parent */ char *parent; /* GI has this element as parent */
int nth_child; /* GI is Nth child of this of parent element */ int nth_child; /* GI is Nth child of this of parent element */
char *content; /* element has this string in content */ char *content; /* element has this string in content */
regexp *content_re; /* content reg expr (compiled) */ regex_t content_re; /* content reg expr (compiled) */
char *pattrset; /* is this attr set (any value) in parent? */ char *pattrset; /* is this attr set (any value) in parent? */
char *var_name; /* variable name */ char *var_name; /* variable name */
char *var_value; /* variable value */ char *var_value; /* variable value */

View file

@ -61,7 +61,6 @@ static char *RCSid =
#include <sys/types.h> #include <sys/types.h>
#include <errno.h> #include <errno.h>
#include <tptregexp.h>
#include "general.h" #include "general.h"
#include "translate.h" #include "translate.h"

View file

@ -1,3 +0,0 @@
MAINTAINERCLEANFILES = Makefile.in
SUBDIRS = tptregexp

View file

@ -1,5 +0,0 @@
MAINTAINERCLEANFILES = Makefile.in
noinst_LIBRARIES = libtptregexp.a
libtptregexp_a_SOURCES = regexp.c regsub.c regerror.c

View file

@ -1,101 +0,0 @@
/* $XConsortium: README /main/2 1996/07/15 14:09:54 drk $ */
#
# Copyright (c) 1994
# Open Software Foundation, Inc.
#
# Permission is hereby granted to use, copy, modify and freely distribute
# the software in this file and its documentation for any purpose without
# fee, provided that the above copyright notice appears in all copies and
# that both the copyright notice and this permission notice appear in
# supporting documentation. Further, provided that the name of Open
# Software Foundation, Inc. ("OSF") not be used in advertising or
# publicity pertaining to distribution of the software without prior
# written permission from OSF. OSF makes no representations about the
# suitability of this software for any purpose. It is provided "as is"
# without express or implied warranty.
#
This is a nearly-public-domain reimplementation of the V8 regexp(3) package.
It gives C programs the ability to use egrep-style regular expressions, and
does it in a much cleaner fashion than the analogous routines in SysV.
Copyright (c) 1986 by University of Toronto.
Written by Henry Spencer. Not derived from licensed software.
Permission is granted to anyone to use this software for any
purpose on any computer system, and to redistribute it freely,
subject to the following restrictions:
1. The author is not responsible for the consequences of use of
this software, no matter how awful, even if they arise
from defects in it.
2. The origin of this software must not be misrepresented, either
by explicit claim or by omission.
3. Altered versions must be plainly marked as such, and must not
be misrepresented as being the original software.
Barring a couple of small items in the BUGS list, this implementation is
believed 100% compatible with V8. It should even be binary-compatible,
sort of, since the only fields in a "struct regexp" that other people have
any business touching are declared in exactly the same way at the same
location in the struct (the beginning).
This implementation is *NOT* AT&T/Bell code, and is not derived from licensed
software. Even though U of T is a V8 licensee. This software is based on
a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed
here is a complete rewrite and hence is not covered by AT&T copyright).
The software was nearly complete at the time of arrival of our V8 tape.
I haven't even looked at V8 yet, although a friend elsewhere at U of T has
been kind enough to run a few test programs using the V8 regexp(3) to resolve
a few fine points. I admit to some familiarity with regular-expression
implementations of the past, but the only one that this code traces any
ancestry to is the one published in Kernighan & Plauger (from which this
one draws ideas but not code).
Simplistically: put this stuff into a source directory, copy regexp.h into
/usr/include, inspect Makefile for compilation options that need changing
to suit your local environment, and then do "make r". This compiles the
regexp(3) functions, compiles a test program, and runs a large set of
regression tests. If there are no complaints, then put regexp.o, regsub.o,
and regerror.o into your C library, and regexp.3 into your manual-pages
directory.
Note that if you don't put regexp.h into /usr/include *before* compiling,
you'll have to add "-I." to CFLAGS before compiling.
The files are:
Makefile instructions to make everything
regexp.3 manual page
regexp.h header file, for /usr/include
regexp.c source for regcomp() and regexec()
regsub.c source for regsub()
regerror.c source for default regerror()
regmagic.h internal header file
try.c source for test program
timer.c source for timing program
tests test list for try and timer
This implementation uses nondeterministic automata rather than the
deterministic ones found in some other implementations, which makes it
simpler, smaller, and faster at compiling regular expressions, but slower
at executing them. In theory, anyway. This implementation does employ
some special-case optimizations to make the simpler cases (which do make
up the bulk of regular expressions actually used) run quickly. In general,
if you want blazing speed you're in the wrong place. Replacing the insides
of egrep with this stuff is probably a mistake; if you want your own egrep
you're going to have to do a lot more work. But if you want to use regular
expressions a little bit in something else, you're in luck. Note that many
existing text editors use nondeterministic regular-expression implementations,
so you're in good company.
This stuff should be pretty portable, given appropriate option settings.
If your chars have less than 8 bits, you're going to have to change the
internal representation of the automaton, although knowledge of the details
of this is fairly localized. There are no "reserved" char values except for
NUL, and no special significance is attached to the top bit of chars.
The string(3) functions are used a fair bit, on the grounds that they are
probably faster than coding the operations in line. Some attempts at code
tuning have been made, but this is invariably a bit machine-specific.

View file

@ -1,37 +0,0 @@
/* $XConsortium: README.osf /main/2 1996/07/15 14:10:16 drk $ */
#
# Copyright (c) 1994
# Open Software Foundation, Inc.
#
# Permission is hereby granted to use, copy, modify and freely distribute
# the software in this file and its documentation for any purpose without
# fee, provided that the above copyright notice appears in all copies and
# that both the copyright notice and this permission notice appear in
# supporting documentation. Further, provided that the name of Open
# Software Foundation, Inc. ("OSF") not be used in advertising or
# publicity pertaining to distribution of the software without prior
# written permission from OSF. OSF makes no representations about the
# suitability of this software for any purpose. It is provided "as is"
# without express or implied warranty.
#
OSF did not write this code.
Changes made:
Renamed "regexp.h" to "tptregexp.h" to avoid conflicts with systems that
have a /usr/include/regexp.h. Changed the *.c files in the appropriate
places.
Renamed these routines:
regcomp() -> tpt_regcomp()
regexec() -> tpt_regexec()
regsub() -> tpt_regsub()
regerror() -> tpt_regerror()
This is to avoid conflicts in the standard C library.
In Makefile - wrote "all" and "install" targets. Changed "regexp.h"
to "tptregexp.h", as appropriate.
In regexp.c - added #include <string.h> to keep compiler happy.

View file

@ -1,37 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: regerror.c /main/3 1996/06/19 17:13:36 drk $ */
#include <stdio.h>
#include <stdlib.h>
void
tpt_regerror(char *s)
{
#ifdef ERRAVAIL
error("tpt_regexp: %s", s);
#else
fprintf(stderr, "tpt_regexp(3): %s", s);
exit(1);
#endif
/* NOTREACHED */
}

View file

@ -1,180 +0,0 @@
.\" $XConsortium: regexp.3 /main/2 1996/10/29 15:06:28 drk $
.TH REGEXP 3 local
.DA 30 Nov 1985
.SH NAME
regcomp, regexec, regsub, regerror \- regular expression handler
.SH SYNOPSIS
.ft B
.nf
#include <regexp.h>
regexp *regcomp(exp)
char *exp;
int regexec(prog, string)
regexp *prog;
char *string;
regsub(prog, source, dest)
regexp *prog;
char *source;
char *dest;
regerror(msg)
char *msg;
.SH DESCRIPTION
These functions implement
.IR egrep (1)-style
regular expressions and supporting facilities.
.PP
.I Regcomp
compiles a regular expression into a structure of type
.IR regexp ,
and returns a pointer to it.
The space has been allocated using
.IR malloc (3)
and may be released by
.IR free .
.PP
.I Regexec
matches a NUL-terminated \fIstring\fR against the compiled regular expression
in \fIprog\fR.
It returns 1 for success and 0 for failure, and adjusts the contents of
\fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.
.PP
The members of a
.I regexp
structure include at least the following (not necessarily in order):
.PP
.RS
char *startp[NSUBEXP];
.br
char *endp[NSUBEXP];
.RE
.PP
where
.I NSUBEXP
is defined (as 10) in the header file.
Once a successful \fIregexec\fR has been done using the \fIregexp\fR,
each \fIstartp\fR-\fIendp\fR pair describes one substring
within the \fIstring\fR,
with the \fIstartp\fR pointing to the first character of the substring and
the \fIendp\fR pointing to the first character following the substring.
The 0th substring is the substring of \fIstring\fR that matched the whole
regular expression.
The others are those substrings that matched parenthesized expressions
within the regular expression, with parenthesized expressions numbered
in left-to-right order of their opening parentheses.
.PP
.I Regsub
copies \fIsource\fR to \fIdest\fR, making substitutions according to the
most recent \fIregexec\fR performed using \fIprog\fR.
Each instance of `&' in \fIsource\fR is replaced by the substring
indicated by \fIstartp\fR[\fI0\fR] and
\fIendp\fR[\fI0\fR].
Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by
the substring indicated by
\fIstartp\fR[\fIn\fR] and
\fIendp\fR[\fIn\fR].
To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';
to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with
another `\e'.
.PP
.I Regerror
is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,
or \fIregsub\fR.
The default \fIregerror\fR writes the string \fImsg\fR,
with a suitable indicator of origin,
on the standard
error output
and invokes \fIexit\fR(2).
.I Regerror
can be replaced by the user if other actions are desirable.
.SH "REGULAR EXPRESSION SYNTAX"
A regular expression is zero or more \fIbranches\fR, separated by `|'.
It matches anything that matches one of the branches.
.PP
A branch is zero or more \fIpieces\fR, concatenated.
It matches a match for the first, followed by a match for the second, etc.
.PP
A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
An atom followed by `?' matches a match of the atom, or the null string.
.PP
An atom is a regular expression in parentheses (matching a match for the
regular expression), a \fIrange\fR (see below), `.'
(matching any single character), `^' (matching the null string at the
beginning of the input string), `$' (matching the null string at the
end of the input string), a `\e' followed by a single character (matching
that character), or a single character with no other significance
(matching that character).
.PP
A \fIrange\fR is a sequence of characters enclosed in `[]'.
It normally matches any single character from the sequence.
If the sequence begins with `^',
it matches any single character \fInot\fR from the rest of the sequence.
If two characters in the sequence are separated by `\-', this is shorthand
for the full list of ASCII characters between them
(e.g. `[0-9]' matches any decimal digit).
To include a literal `]' in the sequence, make it the first character
(following a possible `^').
To include a literal `\-', make it the first or last character.
.SH AMBIGUITY
If a regular expression could match two different parts of the input string,
it will match the one which begins earliest.
If both begin in the same place but match different lengths, or match
the same length in different ways, life gets messier, as follows.
.PP
In general, the possibilities in a list of branches are considered in
left-to-right order, the possibilities for `*', `+', and `?' are
considered longest-first, nested constructs are considered from the
outermost in, and concatenated constructs are considered leftmost-first.
The match that will be chosen is the one that uses the earliest
possibility in the first choice that has to be made.
If there is more than one choice, the next will be made in the same manner
(earliest possibility) subject to the decision on the first choice.
And so forth.
.PP
For example, `(ab|a)b*c' could match `abc' in one of two ways.
The first choice is between `ab' and `a'; since `ab' is earlier, and does
lead to a successful overall match, it is chosen.
Since the `b' is already spoken for,
the `b*' must match its last possibility\(emthe empty string\(emsince
it must respect the earlier choice.
.PP
In the particular case where no `|'s are present and there is only one
`*', `+', or `?', the net effect is that the longest possible
match will be chosen.
So `ab*', presented with `xabbbby', will match `abbbb'.
Note that if `ab*' is tried against `xabyabbbz', it
will match `ab' just after `x', due to the begins-earliest rule.
(In effect, the decision on where to start the match is the first choice
to be made, hence subsequent choices must respect it even if this leads them
to less-preferred alternatives.)
.SH SEE ALSO
egrep(1), expr(1)
.SH DIAGNOSTICS
\fIRegcomp\fR returns NULL for a failure
(\fIregerror\fR permitting),
where failures are syntax errors, exceeding implementation limits,
or applying `+' or `*' to a possibly-null operand.
.SH HISTORY
Both code and manual page were
written at U of T.
They are intended to be compatible with the Bell V8 \fIregexp\fR(3),
but are not derived from Bell code.
.SH BUGS
Empty branches and empty regular expressions are not portable to V8.
.PP
The restriction against
applying `*' or `+' to a possibly-null operand is an artifact of the
simplistic implementation.
.PP
Does not support \fIegrep\fR's newline-separated branches;
neither does the V8 \fIregexp\fR(3), though.
.PP
Due to emphasis on
compactness and simplicity,
it's not strikingly fast.
It does give special attention to handling simple cases quickly.

File diff suppressed because it is too large Load diff

View file

@ -1,28 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: regmagic.h /main/3 1996/06/19 17:13:41 drk $ */
/*
* The first byte of the regexp internal "program" is actually this magic
* number; the start node begins in the second byte.
*/
#define MAGIC 0234

View file

@ -1,101 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: regsub.c /main/3 1996/06/19 17:13:46 drk $ */
/*
* regsub
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*/
#include <stdio.h>
#include <string.h>
#include <tptregexp.h>
#include "regmagic.h"
#ifndef CHARBITS
#define UCHARAT(p) ((int)*(unsigned char *)(p))
#else
#define UCHARAT(p) ((int)*(p)&CHARBITS)
#endif
/*
- regsub - perform substitutions after a regexp match
*/
void
tpt_regsub(regexp *prog, char *source, char *dest)
{
char *src;
char *dst;
char c;
int no;
int len;
if (prog == NULL || source == NULL || dest == NULL) {
tpt_regerror("NULL parm to regsub");
return;
}
if (UCHARAT(prog->program) != MAGIC) {
tpt_regerror("damaged regexp fed to regsub");
return;
}
src = source;
dst = dest;
while ((c = *src++) != '\0') {
if (c == '&')
no = 0;
else if (c == '\\' && '0' <= *src && *src <= '9')
no = *src++ - '0';
else
no = -1;
if (no < 0) { /* Ordinary character. */
if (c == '\\' && (*src == '\\' || *src == '&'))
c = *src++;
*dst++ = c;
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
len = prog->endp[no] - prog->startp[no];
(void) strncpy(dst, prog->startp[no], len);
dst += len;
if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
tpt_regerror("damaged match string");
return;
}
}
}
*dst++ = '\0';
}

View file

@ -1,127 +0,0 @@
abc abc y & abc
abc xbc n - -
abc axc n - -
abc abx n - -
abc xabcy y & abc
abc ababc y & abc
ab*c abc y & abc
ab*bc abc y & abc
ab*bc abbc y & abbc
ab*bc abbbbc y & abbbbc
ab+bc abbc y & abbc
ab+bc abc n - -
ab+bc abq n - -
ab+bc abbbbc y & abbbbc
ab?bc abbc y & abbc
ab?bc abc y & abc
ab?bc abbbbc n - -
ab?c abc y & abc
^abc$ abc y & abc
^abc$ abcc n - -
^abc abcc y & abc
^abc$ aabc n - -
abc$ aabc y & abc
^ abc y &
$ abc y &
a.c abc y & abc
a.c axc y & axc
a.*c axyzc y & axyzc
a.*c axyzd n - -
a[bc]d abc n - -
a[bc]d abd y & abd
a[b-d]e abd n - -
a[b-d]e ace y & ace
a[b-d] aac y & ac
a[-b] a- y & a-
a[b-] a- y & a-
a[b-a] - c - -
a[]b - c - -
a[ - c - -
a] a] y & a]
a[]]b a]b y & a]b
a[^bc]d aed y & aed
a[^bc]d abd n - -
a[^-b]c adc y & adc
a[^-b]c a-c n - -
a[^]b]c a]c n - -
a[^]b]c adc y & adc
ab|cd abc y & ab
ab|cd abcd y & ab
()ef def y &-\1 ef-
()* - c - -
*a - c - -
^* - c - -
$* - c - -
(*)b - c - -
$b b n - -
a\ - c - -
a\(b a(b y &-\1 a(b-
a\(*b ab y & ab
a\(*b a((b y & a((b
a\\b a\b y & a\b
abc) - c - -
(abc - c - -
((a)) abc y &-\1-\2 a-a-a
(a)b(c) abc y &-\1-\2 abc-a-c
a+b+c aabbabc y & abc
a** - c - -
a*? - c - -
(a*)* - c - -
(a*)+ - c - -
(a|)* - c - -
(a*|b)* - c - -
(a+|b)* ab y &-\1 ab-b
(a+|b)+ ab y &-\1 ab-b
(a+|b)? ab y &-\1 a-a
[^ab]* cde y & cde
(^)* - c - -
(ab|)* - c - -
)( - c - -
abc y &
abc n - -
a* y &
([abc])*d abbbcd y &-\1 abbbcd-c
([abc])*bcd abcd y &-\1 abcd-a
a|b|c|d|e e y & e
(a|b|c|d|e)f ef y &-\1 ef-e
((a*|b))* - c - -
abcd*efg abcdefg y & abcdefg
ab* xabyabbbz y & ab
ab* xayabbbz y & a
(ab|cd)e abcde y &-\1 cde-cd
[abhgefdc]ij hij y & hij
^(ab|cd)e abcde n x\1y xy
(abc|)ef abcdef y &-\1 ef-
(a|b)c*d abcd y &-\1 bcd-b
(ab|ab*)bc abc y &-\1 abc-a
a([bc]*)c* abc y &-\1 abc-bc
a([bc]*)(c*d) abcd y &-\1-\2 abcd-bc-d
a([bc]+)(c*d) abcd y &-\1-\2 abcd-bc-d
a([bc]*)(c+d) abcd y &-\1-\2 abcd-b-cd
a[bcd]*dcdcde adcdcde y & adcdcde
a[bcd]+dcdcde adcdcde n - -
(ab|a)b*c abc y &-\1 abc-ab
((a)(b)c)(d) abcd y \1-\2-\3-\4 abc-a-b-d
[a-zA-Z_][a-zA-Z0-9_]* alpha y & alpha
^a(bc+|b[eh])g|.h$ abh y &-\1 bh-
(bc+d$|ef*g.|h?i(j|k)) effgz y &-\1-\2 effgz-effgz-
(bc+d$|ef*g.|h?i(j|k)) ij y &-\1-\2 ij-ij-j
(bc+d$|ef*g.|h?i(j|k)) effg n - -
(bc+d$|ef*g.|h?i(j|k)) bcdd n - -
(bc+d$|ef*g.|h?i(j|k)) reffgz y &-\1-\2 effgz-effgz-
((((((((((a)))))))))) - c - -
(((((((((a))))))))) a y & a
multiple words of text uh-uh n - -
multiple words multiple words, yeah y & multiple words
(.*)c(.*) abcde y &-\1-\2 abcde-ab-de
\((.*), (.*)\) (a, b) y (\2, \1) (b, a)
[k] ab n - -
abcd abcd y &-\&-\\& abcd-&-\abcd
a(bc)d abcd y \1-\\1-\\\1 bc-\1-\bc
[ -~]* abc y & abc
[ -~ -~]* abc y & abc
[ -~ -~ -~]* abc y & abc
[ -~ -~ -~ -~]* abc y & abc
[ -~ -~ -~ -~ -~]* abc y & abc
[ -~ -~ -~ -~ -~ -~]* abc y & abc
[ -~ -~ -~ -~ -~ -~ -~]* abc y & abc

View file

@ -1,205 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: timer.c /main/3 1996/06/19 17:13:50 drk $ */
/*
* Simple timing program for regcomp().
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
* Usage: timer ncomp nexec nsub
* or
* timer ncomp nexec nsub regexp string [ answer [ sub ] ]
*
* The second form is for timing repetitions of a single test case.
* The first form's test data is a compiled-in copy of the "tests" file.
* Ncomp, nexec, nsub are how many times to do each regcomp, regexec,
* and regsub. The way to time an operation individually is to do something
* like "timer 1 50 1".
*/
#include <stdio.h>
struct try {
char *re, *str, *ans, *src, *dst;
} tests[] = {
#include "timer.t.h"
{ NULL, NULL, NULL, NULL, NULL }
};
#include <tptregexp.h>
int errreport = 0; /* Report errors via errseen? */
char *errseen = NULL; /* Error message. */
char *progname;
/* ARGSUSED */
main(argc, argv)
int argc;
char *argv[];
{
int ncomp, nexec, nsub;
struct try one;
char dummy[512];
if (argc < 4) {
ncomp = 1;
nexec = 1;
nsub = 1;
} else {
ncomp = atoi(argv[1]);
nexec = atoi(argv[2]);
nsub = atoi(argv[3]);
}
progname = argv[0];
if (argc > 5) {
one.re = argv[4];
one.str = argv[5];
if (argc > 6)
one.ans = argv[6];
else
one.ans = "y";
if (argc > 7) {
one.src = argv[7];
one.dst = "xxx";
} else {
one.src = "x";
one.dst = "x";
}
errreport = 1;
try(one, ncomp, nexec, nsub);
} else
multiple(ncomp, nexec, nsub);
exit(0);
}
void
tpt_regerror(s)
char *s;
{
if (errreport)
errseen = s;
else
error(s, "");
}
#ifndef ERRAVAIL
error(s1, s2)
char *s1;
char *s2;
{
fprintf(stderr, "regexp: ");
fprintf(stderr, s1, s2);
fprintf(stderr, "\n");
exit(1);
}
#endif
int lineno = 0;
multiple(ncomp, nexec, nsub)
int ncomp, nexec, nsub;
{
int i;
extern char *strchr();
errreport = 1;
for (i = 0; tests[i].re != NULL; i++) {
lineno++;
try(tests[i], ncomp, nexec, nsub);
}
}
try(fields, ncomp, nexec, nsub)
struct try fields;
int ncomp, nexec, nsub;
{
regexp *r;
char dbuf[BUFSIZ];
int i;
errseen = NULL;
r = tpt_regcomp(fields.re);
if (r == NULL) {
if (*fields.ans != 'c')
complain("tpt_regcomp failure in `%s'", fields.re);
return;
}
if (*fields.ans == 'c') {
complain("unexpected tpt_regcomp success in `%s'", fields.re);
free((char *)r);
return;
}
for (i = ncomp-1; i > 0; i--) {
free((char *)r);
r = tpt_regcomp(fields.re);
}
if (!tpt_regexec(r, fields.str)) {
if (*fields.ans != 'n')
complain("tpt_regexec failure in `%s'", "");
free((char *)r);
return;
}
if (*fields.ans == 'n') {
complain("unexpected tpt_regexec success", "");
free((char *)r);
return;
}
for (i = nexec-1; i > 0; i--)
(void) tpt_regexec(r, fields.str);
errseen = NULL;
for (i = nsub; i > 0; i--)
tpt_regsub(r, fields.src, dbuf);
if (errseen != NULL) {
complain("tpt_regsub complaint", "");
free((char *)r);
return;
}
if (strcmp(dbuf, fields.dst) != 0)
complain("tpt_regsub result `%s' wrong", dbuf);
free((char *)r);
}
complain(s1, s2)
char *s1;
char *s2;
{
fprintf(stderr, "try: %d: ", lineno);
fprintf(stderr, s1, s2);
fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
}

View file

@ -1,44 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: tptregexp.h /main/3 1996/06/19 17:13:54 drk $ */
/*
* Definitions etc. for regexp(3) routines.
*
* Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
* not the System V one.
*/
#define NSUBEXP 10
typedef struct regexp {
char *startp[NSUBEXP];
char *endp[NSUBEXP];
char regstart; /* Internal use only. */
char reganch; /* Internal use only. */
char *regmust; /* Internal use only. */
int regmlen; /* Internal use only. */
char program[1]; /* Unwarranted chumminess with compiler. */
} regexp;
extern regexp *tpt_regcomp();
extern int tpt_regexec();
extern void tpt_regsub();
extern void tpt_regerror();

View file

@ -1,261 +0,0 @@
/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: try.c /main/3 1996/06/19 17:13:58 drk $ */
/*
* Simple test program for regexp(3) stuff. Knows about debugging hooks.
*
* Copyright (c) 1986 by University of Toronto.
* Written by Henry Spencer. Not derived from licensed software.
*
* Permission is granted to anyone to use this software for any
* purpose on any computer system, and to redistribute it freely,
* subject to the following restrictions:
*
* 1. The author is not responsible for the consequences of use of
* this software, no matter how awful, even if they arise
* from defects in it.
*
* 2. The origin of this software must not be misrepresented, either
* by explicit claim or by omission.
*
* 3. Altered versions must be plainly marked as such, and must not
* be misrepresented as being the original software.
*
* Usage: try re [string [output [-]]]
* The re is compiled and dumped, regexeced against the string, the result
* is applied to output using regsub(). The - triggers a running narrative
* from regexec(). Dumping and narrative don't happen unless DEBUG.
*
* If there are no arguments, stdin is assumed to be a stream of lines with
* five fields: a r.e., a string to match it against, a result code, a
* source string for regsub, and the proper result. Result codes are 'c'
* for compile failure, 'y' for match success, 'n' for match failure.
* Field separator is tab.
*/
#include <stdio.h>
#include <tptregexp.h>
#ifdef ERRAVAIL
char *progname;
extern char *mkprogname();
#endif
#ifdef DEBUG
extern int regnarrate;
#endif
char buf[BUFSIZ];
int errreport = 0; /* Report errors via errseen? */
char *errseen = NULL; /* Error message. */
int status = 0; /* Exit status. */
/* ARGSUSED */
main(argc, argv)
int argc;
char *argv[];
{
regexp *r;
int i;
#ifdef ERRAVAIL
progname = mkprogname(argv[0]);
#endif
if (argc == 1) {
multiple();
exit(status);
}
r = tpt_regcomp(argv[1]);
if (r == NULL)
error("tpt_regcomp failure", "");
#ifdef DEBUG
regdump(r);
if (argc > 4)
regnarrate++;
#endif
if (argc > 2) {
i = tpt_regexec(r, argv[2]);
printf("%d", i);
for (i = 1; i < NSUBEXP; i++)
if (r->startp[i] != NULL && r->endp[i] != NULL)
printf(" \\%d", i);
printf("\n");
}
if (argc > 3) {
tpt_regsub(r, argv[3], buf);
printf("%s\n", buf);
}
exit(status);
}
void
tpt_regerror(s)
char *s;
{
if (errreport)
errseen = s;
else
error(s, "");
}
#ifndef ERRAVAIL
error(s1, s2)
char *s1;
char *s2;
{
fprintf(stderr, "regexp: ");
fprintf(stderr, s1, s2);
fprintf(stderr, "\n");
exit(1);
}
#endif
int lineno;
regexp badregexp; /* Implicit init to 0. */
multiple()
{
char rbuf[BUFSIZ];
char *field[5];
char *scan;
int i;
regexp *r;
extern char *strchr();
errreport = 1;
lineno = 0;
while (fgets(rbuf, sizeof(rbuf), stdin) != NULL) {
rbuf[strlen(rbuf)-1] = '\0'; /* Dispense with \n. */
lineno++;
scan = rbuf;
for (i = 0; i < 5; i++) {
field[i] = scan;
if (field[i] == NULL) {
complain("bad testfile format", "");
exit(1);
}
scan = strchr(scan, '\t');
if (scan != NULL)
*scan++ = '\0';
}
try(field);
}
/* And finish up with some internal testing... */
lineno = 9990;
errseen = NULL;
if (tpt_regcomp((char *)NULL) != NULL || errseen == NULL)
complain("tpt_regcomp(NULL) doesn't complain", "");
lineno = 9991;
errseen = NULL;
if (tpt_regexec((regexp *)NULL, "foo") || errseen == NULL)
complain("tpt_regexec(NULL, ...) doesn't complain", "");
lineno = 9992;
r = tpt_regcomp("foo");
if (r == NULL) {
complain("tpt_regcomp(\"foo\") fails", "");
return;
}
lineno = 9993;
errseen = NULL;
if (tpt_regexec(r, (char *)NULL) || errseen == NULL)
complain("tpt_regexec(..., NULL) doesn't complain", "");
lineno = 9994;
errseen = NULL;
tpt_regsub((regexp *)NULL, "foo", rbuf);
if (errseen == NULL)
complain("tpt_regsub(NULL, ..., ...) doesn't complain", "");
lineno = 9995;
errseen = NULL;
tpt_regsub(r, (char *)NULL, rbuf);
if (errseen == NULL)
complain("tpt_regsub(..., NULL, ...) doesn't complain", "");
lineno = 9996;
errseen = NULL;
tpt_regsub(r, "foo", (char *)NULL);
if (errseen == NULL)
complain("tpt_regsub(..., ..., NULL) doesn't complain", "");
lineno = 9997;
errseen = NULL;
if (tpt_regexec(&badregexp, "foo") || errseen == NULL)
complain("tpt_regexec(nonsense, ...) doesn't complain", "");
lineno = 9998;
errseen = NULL;
tpt_regsub(&badregexp, "foo", rbuf);
if (errseen == NULL)
complain("tpt_regsub(nonsense, ..., ...) doesn't complain", "");
}
try(fields)
char **fields;
{
regexp *r;
char dbuf[BUFSIZ];
errseen = NULL;
r = tpt_regcomp(fields[0]);
if (r == NULL) {
if (*fields[2] != 'c')
complain("tpt_regcomp failure in `%s'", fields[0]);
return;
}
if (*fields[2] == 'c') {
complain("unexpected tpt_regcomp success in `%s'", fields[0]);
free((char *)r);
return;
}
if (!tpt_regexec(r, fields[1])) {
if (*fields[2] != 'n')
complain("tpt_regexec failure in `%s'", "");
free((char *)r);
return;
}
if (*fields[2] == 'n') {
complain("unexpected tpt_regexec success", "");
free((char *)r);
return;
}
errseen = NULL;
tpt_regsub(r, fields[3], dbuf);
if (errseen != NULL) {
complain("tpt_regsub complaint", "");
free((char *)r);
return;
}
if (strcmp(dbuf, fields[4]) != 0)
complain("tpt_regsub result `%s' wrong", dbuf);
free((char *)r);
}
complain(s1, s2)
char *s1;
char *s2;
{
fprintf(stderr, "try: %d: ", lineno);
fprintf(stderr, s1, s2);
fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : "");
status = 1;
}