diff --git a/cde/programs/dtdocbook/instant/Makefile.am b/cde/programs/dtdocbook/doc2sdl/instant/Makefile.am similarity index 56% rename from cde/programs/dtdocbook/instant/Makefile.am rename to cde/programs/dtdocbook/doc2sdl/instant/Makefile.am index e53b2cec7..cfa827161 100644 --- a/cde/programs/dtdocbook/instant/Makefile.am +++ b/cde/programs/dtdocbook/doc2sdl/instant/Makefile.am @@ -1,11 +1,12 @@ MAINTAINERCLEANFILES = Makefile.in -pkglibexec_PROGRAMS = instant +include ../doc2sdl.am -instant_CFLAGS = -I../lib/tptregexp $(DT_INCDIR) +doc2sdllibexec_PROGRAMS = instant -instant_LDADD = $(DTCLIENTLIBS) $(XTOOLLIB) -L../lib/tptregexp \ - -ltptregexp +instant_CFLAGS = $(DT_INCDIR) + +instant_LDADD = $(DTCLIENTLIBS) $(XTOOLLIB) instant_CFLAGS += @TCL_INCLUDE_SPEC@ instant_LDADD += @TCL_LIB_SPEC@ diff --git a/cde/programs/dtdocbook/instant/README b/cde/programs/dtdocbook/doc2sdl/instant/README similarity index 100% rename from cde/programs/dtdocbook/instant/README rename to cde/programs/dtdocbook/doc2sdl/instant/README diff --git a/cde/programs/dtdocbook/instant/TODO b/cde/programs/dtdocbook/doc2sdl/instant/TODO similarity index 100% rename from cde/programs/dtdocbook/instant/TODO rename to cde/programs/dtdocbook/doc2sdl/instant/TODO diff --git a/cde/programs/dtdocbook/instant/browse.c b/cde/programs/dtdocbook/doc2sdl/instant/browse.c similarity index 100% rename from cde/programs/dtdocbook/instant/browse.c rename to cde/programs/dtdocbook/doc2sdl/instant/browse.c diff --git a/cde/programs/dtdocbook/instant/general.h b/cde/programs/dtdocbook/doc2sdl/instant/general.h similarity index 100% rename from cde/programs/dtdocbook/instant/general.h rename to cde/programs/dtdocbook/doc2sdl/instant/general.h diff --git a/cde/programs/dtdocbook/instant/hyper.c b/cde/programs/dtdocbook/doc2sdl/instant/hyper.c similarity index 100% rename from cde/programs/dtdocbook/instant/hyper.c rename to cde/programs/dtdocbook/doc2sdl/instant/hyper.c diff --git a/cde/programs/dtdocbook/instant/info.c b/cde/programs/dtdocbook/doc2sdl/instant/info.c similarity index 100% rename from cde/programs/dtdocbook/instant/info.c rename to cde/programs/dtdocbook/doc2sdl/instant/info.c diff --git a/cde/programs/dtdocbook/instant/main.c b/cde/programs/dtdocbook/doc2sdl/instant/main.c similarity index 100% rename from cde/programs/dtdocbook/instant/main.c rename to cde/programs/dtdocbook/doc2sdl/instant/main.c diff --git a/cde/programs/dtdocbook/instant/sgmls.notes b/cde/programs/dtdocbook/doc2sdl/instant/sgmls.notes similarity index 100% rename from cde/programs/dtdocbook/instant/sgmls.notes rename to cde/programs/dtdocbook/doc2sdl/instant/sgmls.notes diff --git a/cde/programs/dtdocbook/instant/tables.c b/cde/programs/dtdocbook/doc2sdl/instant/tables.c similarity index 99% rename from cde/programs/dtdocbook/instant/tables.c rename to cde/programs/dtdocbook/doc2sdl/instant/tables.c index 533a99c88..3653d806d 100644 --- a/cde/programs/dtdocbook/instant/tables.c +++ b/cde/programs/dtdocbook/doc2sdl/instant/tables.c @@ -76,7 +76,6 @@ static char *RCSid = #include #include -#include #include "general.h" #include "translate.h" diff --git a/cde/programs/dtdocbook/instant/traninit.c b/cde/programs/dtdocbook/doc2sdl/instant/traninit.c similarity index 98% rename from cde/programs/dtdocbook/instant/traninit.c rename to cde/programs/dtdocbook/doc2sdl/instant/traninit.c index 70fd85941..5cf744a68 100644 --- a/cde/programs/dtdocbook/instant/traninit.c +++ b/cde/programs/dtdocbook/doc2sdl/instant/traninit.c @@ -67,7 +67,6 @@ static char *RCSid = #include #include -#include #include "general.h" #include "translate.h" @@ -262,7 +261,7 @@ RememberTransSpec( if (do_regex) { t->depth = MAX_DEPTH; - if (!(t->context_re=tpt_regcomp(t->context))) { + if (regcomp(&t->context_re, t->context, 0)) { fprintf(stderr, "Regex error in Context: %s\n", t->context); } } @@ -300,16 +299,15 @@ RememberTransSpec( else { /* value not found */ t->attpair[i].val = "."; } - if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) { + if (regcomp(&t->attpair[i].rex, t->attpair[i].val, 0)) { fprintf(stderr, "Regex error in AttValue: %s %s\n", t->attpair[i].name, t->attpair[i].val); } } /* Compile regular expression for content */ - t->content_re = 0; if (t->content) { - if (!(t->content_re=tpt_regcomp(t->content))) + if (regcomp(&t->content_re, t->content, 0)) fprintf(stderr, "Regex error in Content: %s\n", t->content); } diff --git a/cde/programs/dtdocbook/instant/translate.c b/cde/programs/dtdocbook/doc2sdl/instant/translate.c similarity index 97% rename from cde/programs/dtdocbook/instant/translate.c rename to cde/programs/dtdocbook/doc2sdl/instant/translate.c index 225145752..1d11245f9 100644 --- a/cde/programs/dtdocbook/instant/translate.c +++ b/cde/programs/dtdocbook/doc2sdl/instant/translate.c @@ -64,7 +64,6 @@ static char *RCSid = #include #include -#include #include "general.h" #define STORAGE #include "translate.h" @@ -137,8 +136,15 @@ DoTranslate( * program is normally done at this point anyway. */ for (t=TrSpecs; t; ) { tn = t->next; + + regfree(&t->context_re); + regfree(&t->content_re); + + for (int i = 0; i < t->nattpairs; ++i) regfree(&t->attpair[i].rex); + /* free the contents of t here ... */ (void)free((void* )t); + t = tn; } TrSpecs = 0; @@ -268,7 +274,7 @@ ExpandVariables( if ( modifier && *modifier == 'l' ) { while (*s) { *op = tolower(*s); - op++, *s++; + op++, s++; } } else while (*s) *op++ = *s++; @@ -470,19 +476,15 @@ FindTrans( if (t->context) { /* no context specified -> a match */ FindContext(e, t->depth, context); - /* If reg expr set, do regex compare; else just string compare. */ - if (t->context_re) { - if (! tpt_regexec(t->context_re, context)) continue; - } - else { - /* Is depth of spec deeper than element's depth? */ - if (t->depth > e->depth) continue; + if (regexec(&t->context_re, context, 0, NULL, 0)) continue; - /* See if context of element matches "context" of transpec */ - match = ( (t->context[0] == context[0]) && + /* Is depth of spec deeper than element's depth? */ + if (t->depth > e->depth) continue; + + /* See if context of element matches "context" of transpec */ + match = ( (t->context[0] == context[0]) && !strcmp(t->context, context) ); - if (!match) continue; - } + if (!match) continue; } /* Check attributes. Loop through list, comparing each. */ @@ -492,7 +494,8 @@ FindTrans( match = 0; break; } - if (!tpt_regexec(t->attpair[a].rex, atval)) match = 0; + if (regexec(&t->attpair[a].rex, atval, 0, NULL, 0)) + match = 0; } if (!match) continue; } @@ -551,7 +554,7 @@ FindTrans( /* check content */ if (t->content) { /* no att specified -> a match */ for (match=0,i=0; indcont; i++) { - if (tpt_regexec(t->content_re, e->dcont[i])) { + if (!regexec(&t->content_re, e->dcont[i], 0, NULL, 0)) { match = 1; break; } diff --git a/cde/programs/dtdocbook/instant/translate.h b/cde/programs/dtdocbook/doc2sdl/instant/translate.h similarity index 96% rename from cde/programs/dtdocbook/instant/translate.h rename to cde/programs/dtdocbook/doc2sdl/instant/translate.h index 49e4019db..9968e2552 100644 --- a/cde/programs/dtdocbook/instant/translate.h +++ b/cde/programs/dtdocbook/doc2sdl/instant/translate.h @@ -48,6 +48,8 @@ * ________________________________________________________________________ */ +#include + #ifdef STORAGE #ifndef lint static char *tr_h_RCSid = @@ -73,7 +75,7 @@ typedef enum { CONT_CONTINUE, CONT_BREAK } ContParse_t; typedef struct { char *name; /* attribute name string */ char *val; /* attribute value string */ - regexp *rex; /* attribute value reg expr (compiled) */ + regex_t rex; /* attribute value reg expr (compiled) */ } AttPair_t; typedef struct _Trans { @@ -81,14 +83,14 @@ typedef struct _Trans { char *gi; /* element name of tag under consideration */ char **gilist; /* list of element names (multiple gi's) */ char *context; /* context in tree - looking depth levels up */ - regexp *context_re; /* tree hierarchy looking depth levels up */ + regex_t context_re; /* tree hierarchy looking depth levels up */ int depth; /* number of levels to look up the tree */ AttPair_t *attpair; /* attr name-value pairs */ int nattpairs; /* number of name-value pairs */ char *parent; /* GI has this element as parent */ int nth_child; /* GI is Nth child of this of parent element */ char *content; /* element has this string in content */ - regexp *content_re; /* content reg expr (compiled) */ + regex_t content_re; /* content reg expr (compiled) */ char *pattrset; /* is this attr set (any value) in parent? */ char *var_name; /* variable name */ char *var_value; /* variable value */ diff --git a/cde/programs/dtdocbook/instant/tranvar.c b/cde/programs/dtdocbook/doc2sdl/instant/tranvar.c similarity index 99% rename from cde/programs/dtdocbook/instant/tranvar.c rename to cde/programs/dtdocbook/doc2sdl/instant/tranvar.c index f2c23f5e2..3e050d8a0 100644 --- a/cde/programs/dtdocbook/instant/tranvar.c +++ b/cde/programs/dtdocbook/doc2sdl/instant/tranvar.c @@ -61,7 +61,6 @@ static char *RCSid = #include #include -#include #include "general.h" #include "translate.h" diff --git a/cde/programs/dtdocbook/instant/util.c b/cde/programs/dtdocbook/doc2sdl/instant/util.c similarity index 100% rename from cde/programs/dtdocbook/instant/util.c rename to cde/programs/dtdocbook/doc2sdl/instant/util.c diff --git a/cde/programs/dtdocbook/lib/Makefile.am b/cde/programs/dtdocbook/lib/Makefile.am deleted file mode 100644 index 156ab94bf..000000000 --- a/cde/programs/dtdocbook/lib/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -SUBDIRS = tptregexp diff --git a/cde/programs/dtdocbook/lib/tptregexp/Makefile.am b/cde/programs/dtdocbook/lib/tptregexp/Makefile.am deleted file mode 100644 index 130bf8b5c..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -MAINTAINERCLEANFILES = Makefile.in - -noinst_LIBRARIES = libtptregexp.a - -libtptregexp_a_SOURCES = regexp.c regsub.c regerror.c diff --git a/cde/programs/dtdocbook/lib/tptregexp/README b/cde/programs/dtdocbook/lib/tptregexp/README deleted file mode 100644 index 0ee1943a4..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/README +++ /dev/null @@ -1,101 +0,0 @@ -/* $XConsortium: README /main/2 1996/07/15 14:09:54 drk $ */ -# -# Copyright (c) 1994 -# Open Software Foundation, Inc. -# -# Permission is hereby granted to use, copy, modify and freely distribute -# the software in this file and its documentation for any purpose without -# fee, provided that the above copyright notice appears in all copies and -# that both the copyright notice and this permission notice appear in -# supporting documentation. Further, provided that the name of Open -# Software Foundation, Inc. ("OSF") not be used in advertising or -# publicity pertaining to distribution of the software without prior -# written permission from OSF. OSF makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# - -This is a nearly-public-domain reimplementation of the V8 regexp(3) package. -It gives C programs the ability to use egrep-style regular expressions, and -does it in a much cleaner fashion than the analogous routines in SysV. - - Copyright (c) 1986 by University of Toronto. - Written by Henry Spencer. Not derived from licensed software. - - Permission is granted to anyone to use this software for any - purpose on any computer system, and to redistribute it freely, - subject to the following restrictions: - - 1. The author is not responsible for the consequences of use of - this software, no matter how awful, even if they arise - from defects in it. - - 2. The origin of this software must not be misrepresented, either - by explicit claim or by omission. - - 3. Altered versions must be plainly marked as such, and must not - be misrepresented as being the original software. - -Barring a couple of small items in the BUGS list, this implementation is -believed 100% compatible with V8. It should even be binary-compatible, -sort of, since the only fields in a "struct regexp" that other people have -any business touching are declared in exactly the same way at the same -location in the struct (the beginning). - -This implementation is *NOT* AT&T/Bell code, and is not derived from licensed -software. Even though U of T is a V8 licensee. This software is based on -a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed -here is a complete rewrite and hence is not covered by AT&T copyright). -The software was nearly complete at the time of arrival of our V8 tape. -I haven't even looked at V8 yet, although a friend elsewhere at U of T has -been kind enough to run a few test programs using the V8 regexp(3) to resolve -a few fine points. I admit to some familiarity with regular-expression -implementations of the past, but the only one that this code traces any -ancestry to is the one published in Kernighan & Plauger (from which this -one draws ideas but not code). - -Simplistically: put this stuff into a source directory, copy regexp.h into -/usr/include, inspect Makefile for compilation options that need changing -to suit your local environment, and then do "make r". This compiles the -regexp(3) functions, compiles a test program, and runs a large set of -regression tests. If there are no complaints, then put regexp.o, regsub.o, -and regerror.o into your C library, and regexp.3 into your manual-pages -directory. - -Note that if you don't put regexp.h into /usr/include *before* compiling, -you'll have to add "-I." to CFLAGS before compiling. - -The files are: - -Makefile instructions to make everything -regexp.3 manual page -regexp.h header file, for /usr/include -regexp.c source for regcomp() and regexec() -regsub.c source for regsub() -regerror.c source for default regerror() -regmagic.h internal header file -try.c source for test program -timer.c source for timing program -tests test list for try and timer - -This implementation uses nondeterministic automata rather than the -deterministic ones found in some other implementations, which makes it -simpler, smaller, and faster at compiling regular expressions, but slower -at executing them. In theory, anyway. This implementation does employ -some special-case optimizations to make the simpler cases (which do make -up the bulk of regular expressions actually used) run quickly. In general, -if you want blazing speed you're in the wrong place. Replacing the insides -of egrep with this stuff is probably a mistake; if you want your own egrep -you're going to have to do a lot more work. But if you want to use regular -expressions a little bit in something else, you're in luck. Note that many -existing text editors use nondeterministic regular-expression implementations, -so you're in good company. - -This stuff should be pretty portable, given appropriate option settings. -If your chars have less than 8 bits, you're going to have to change the -internal representation of the automaton, although knowledge of the details -of this is fairly localized. There are no "reserved" char values except for -NUL, and no special significance is attached to the top bit of chars. -The string(3) functions are used a fair bit, on the grounds that they are -probably faster than coding the operations in line. Some attempts at code -tuning have been made, but this is invariably a bit machine-specific. diff --git a/cde/programs/dtdocbook/lib/tptregexp/README.osf b/cde/programs/dtdocbook/lib/tptregexp/README.osf deleted file mode 100644 index 95759f786..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/README.osf +++ /dev/null @@ -1,37 +0,0 @@ -/* $XConsortium: README.osf /main/2 1996/07/15 14:10:16 drk $ */ -# -# Copyright (c) 1994 -# Open Software Foundation, Inc. -# -# Permission is hereby granted to use, copy, modify and freely distribute -# the software in this file and its documentation for any purpose without -# fee, provided that the above copyright notice appears in all copies and -# that both the copyright notice and this permission notice appear in -# supporting documentation. Further, provided that the name of Open -# Software Foundation, Inc. ("OSF") not be used in advertising or -# publicity pertaining to distribution of the software without prior -# written permission from OSF. OSF makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# - -OSF did not write this code. - -Changes made: - - Renamed "regexp.h" to "tptregexp.h" to avoid conflicts with systems that - have a /usr/include/regexp.h. Changed the *.c files in the appropriate - places. - - Renamed these routines: - regcomp() -> tpt_regcomp() - regexec() -> tpt_regexec() - regsub() -> tpt_regsub() - regerror() -> tpt_regerror() - This is to avoid conflicts in the standard C library. - - In Makefile - wrote "all" and "install" targets. Changed "regexp.h" - to "tptregexp.h", as appropriate. - - In regexp.c - added #include to keep compiler happy. - diff --git a/cde/programs/dtdocbook/lib/tptregexp/regerror.c b/cde/programs/dtdocbook/lib/tptregexp/regerror.c deleted file mode 100644 index f6c5bab09..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/regerror.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: regerror.c /main/3 1996/06/19 17:13:36 drk $ */ -#include -#include - -void -tpt_regerror(char *s) -{ -#ifdef ERRAVAIL - error("tpt_regexp: %s", s); -#else - fprintf(stderr, "tpt_regexp(3): %s", s); - exit(1); -#endif - /* NOTREACHED */ -} diff --git a/cde/programs/dtdocbook/lib/tptregexp/regexp.3 b/cde/programs/dtdocbook/lib/tptregexp/regexp.3 deleted file mode 100644 index 24d94ae57..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/regexp.3 +++ /dev/null @@ -1,180 +0,0 @@ -.\" $XConsortium: regexp.3 /main/2 1996/10/29 15:06:28 drk $ -.TH REGEXP 3 local -.DA 30 Nov 1985 -.SH NAME -regcomp, regexec, regsub, regerror \- regular expression handler -.SH SYNOPSIS -.ft B -.nf -#include - -regexp *regcomp(exp) -char *exp; - -int regexec(prog, string) -regexp *prog; -char *string; - -regsub(prog, source, dest) -regexp *prog; -char *source; -char *dest; - -regerror(msg) -char *msg; -.SH DESCRIPTION -These functions implement -.IR egrep (1)-style -regular expressions and supporting facilities. -.PP -.I Regcomp -compiles a regular expression into a structure of type -.IR regexp , -and returns a pointer to it. -The space has been allocated using -.IR malloc (3) -and may be released by -.IR free . -.PP -.I Regexec -matches a NUL-terminated \fIstring\fR against the compiled regular expression -in \fIprog\fR. -It returns 1 for success and 0 for failure, and adjusts the contents of -\fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly. -.PP -The members of a -.I regexp -structure include at least the following (not necessarily in order): -.PP -.RS -char *startp[NSUBEXP]; -.br -char *endp[NSUBEXP]; -.RE -.PP -where -.I NSUBEXP -is defined (as 10) in the header file. -Once a successful \fIregexec\fR has been done using the \fIregexp\fR, -each \fIstartp\fR-\fIendp\fR pair describes one substring -within the \fIstring\fR, -with the \fIstartp\fR pointing to the first character of the substring and -the \fIendp\fR pointing to the first character following the substring. -The 0th substring is the substring of \fIstring\fR that matched the whole -regular expression. -The others are those substrings that matched parenthesized expressions -within the regular expression, with parenthesized expressions numbered -in left-to-right order of their opening parentheses. -.PP -.I Regsub -copies \fIsource\fR to \fIdest\fR, making substitutions according to the -most recent \fIregexec\fR performed using \fIprog\fR. -Each instance of `&' in \fIsource\fR is replaced by the substring -indicated by \fIstartp\fR[\fI0\fR] and -\fIendp\fR[\fI0\fR]. -Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by -the substring indicated by -\fIstartp\fR[\fIn\fR] and -\fIendp\fR[\fIn\fR]. -To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e'; -to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with -another `\e'. -.PP -.I Regerror -is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR, -or \fIregsub\fR. -The default \fIregerror\fR writes the string \fImsg\fR, -with a suitable indicator of origin, -on the standard -error output -and invokes \fIexit\fR(2). -.I Regerror -can be replaced by the user if other actions are desirable. -.SH "REGULAR EXPRESSION SYNTAX" -A regular expression is zero or more \fIbranches\fR, separated by `|'. -It matches anything that matches one of the branches. -.PP -A branch is zero or more \fIpieces\fR, concatenated. -It matches a match for the first, followed by a match for the second, etc. -.PP -A piece is an \fIatom\fR possibly followed by `*', `+', or `?'. -An atom followed by `*' matches a sequence of 0 or more matches of the atom. -An atom followed by `+' matches a sequence of 1 or more matches of the atom. -An atom followed by `?' matches a match of the atom, or the null string. -.PP -An atom is a regular expression in parentheses (matching a match for the -regular expression), a \fIrange\fR (see below), `.' -(matching any single character), `^' (matching the null string at the -beginning of the input string), `$' (matching the null string at the -end of the input string), a `\e' followed by a single character (matching -that character), or a single character with no other significance -(matching that character). -.PP -A \fIrange\fR is a sequence of characters enclosed in `[]'. -It normally matches any single character from the sequence. -If the sequence begins with `^', -it matches any single character \fInot\fR from the rest of the sequence. -If two characters in the sequence are separated by `\-', this is shorthand -for the full list of ASCII characters between them -(e.g. `[0-9]' matches any decimal digit). -To include a literal `]' in the sequence, make it the first character -(following a possible `^'). -To include a literal `\-', make it the first or last character. -.SH AMBIGUITY -If a regular expression could match two different parts of the input string, -it will match the one which begins earliest. -If both begin in the same place but match different lengths, or match -the same length in different ways, life gets messier, as follows. -.PP -In general, the possibilities in a list of branches are considered in -left-to-right order, the possibilities for `*', `+', and `?' are -considered longest-first, nested constructs are considered from the -outermost in, and concatenated constructs are considered leftmost-first. -The match that will be chosen is the one that uses the earliest -possibility in the first choice that has to be made. -If there is more than one choice, the next will be made in the same manner -(earliest possibility) subject to the decision on the first choice. -And so forth. -.PP -For example, `(ab|a)b*c' could match `abc' in one of two ways. -The first choice is between `ab' and `a'; since `ab' is earlier, and does -lead to a successful overall match, it is chosen. -Since the `b' is already spoken for, -the `b*' must match its last possibility\(emthe empty string\(emsince -it must respect the earlier choice. -.PP -In the particular case where no `|'s are present and there is only one -`*', `+', or `?', the net effect is that the longest possible -match will be chosen. -So `ab*', presented with `xabbbby', will match `abbbb'. -Note that if `ab*' is tried against `xabyabbbz', it -will match `ab' just after `x', due to the begins-earliest rule. -(In effect, the decision on where to start the match is the first choice -to be made, hence subsequent choices must respect it even if this leads them -to less-preferred alternatives.) -.SH SEE ALSO -egrep(1), expr(1) -.SH DIAGNOSTICS -\fIRegcomp\fR returns NULL for a failure -(\fIregerror\fR permitting), -where failures are syntax errors, exceeding implementation limits, -or applying `+' or `*' to a possibly-null operand. -.SH HISTORY -Both code and manual page were -written at U of T. -They are intended to be compatible with the Bell V8 \fIregexp\fR(3), -but are not derived from Bell code. -.SH BUGS -Empty branches and empty regular expressions are not portable to V8. -.PP -The restriction against -applying `*' or `+' to a possibly-null operand is an artifact of the -simplistic implementation. -.PP -Does not support \fIegrep\fR's newline-separated branches; -neither does the V8 \fIregexp\fR(3), though. -.PP -Due to emphasis on -compactness and simplicity, -it's not strikingly fast. -It does give special attention to handling simple cases quickly. diff --git a/cde/programs/dtdocbook/lib/tptregexp/regexp.c b/cde/programs/dtdocbook/lib/tptregexp/regexp.c deleted file mode 100644 index 63a5d7954..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/regexp.c +++ /dev/null @@ -1,1213 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: regexp.c /main/3 1996/05/28 15:37:26 lehors $ */ -/* - * regcomp and regexec -- regsub and regerror are elsewhere - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - */ -#include -#include -#include -#include -#include "regmagic.h" - -/* - * The "internal use only" fields in regexp.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; '\0' if none obvious - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in regexec() needs it and regcomp() is computing - * it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH implement concatenation; a "next" pointer with - * a BRANCH on both ends of it is connecting two alternatives. (Here we - * have one of the subtle syntax dependencies: an individual BRANCH (as - * opposed to a collection of them) is never concatenated with anything - * because of operator precedence.) The operand of some types of node is - * a literal string; for others, it is a node leading into a sub-FSM. In - * particular, the operand of a BRANCH node is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects - * to the thing following the set of BRANCHes.) The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* no End of program. */ -#define BOL 1 /* no Match "" at beginning of line. */ -#define EOL 2 /* no Match "" at end of line. */ -#define ANY 3 /* no Match any one character. */ -#define ANYOF 4 /* str Match any character in this string. */ -#define ANYBUT 5 /* str Match any character not in this string. */ -#define BRANCH 6 /* node Match this alternative, or the next... */ -#define BACK 7 /* no Match "", "next" ptr points backward. */ -#define EXACTLY 8 /* str Match this string. */ -#define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this (simple) thing 0 or more times. */ -#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ -#define OPEN 20 /* no Mark this point in input as start of #n. */ - /* OPEN+1 is number 1, etc. */ -#define CLOSE 30 /* no Analogous to OPEN. */ - -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '?', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * OPEN,CLOSE ...are numbered at compile time. - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit pieces, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p) (*(p)) -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) - -#define OPERAND(p) ((p) + 3) - -/* - * See regmagic.h for one further detail of program structure. - */ - - -/* - * Utility definitions. - */ -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -#define FAIL(m) { tpt_regerror(m); return(NULL); } -#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') -#define META "^$.[()|?+*\\" - -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 01 /* Known never to match null string. */ -#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 04 /* Starts with * or +. */ -#define WORST 0 /* Worst case. */ - -/* - * Global work variables for regcomp(). - */ -static char *regparse; /* Input-scan pointer. */ -static int regnpar; /* () count. */ -static char regdummy; -static char *regcode; /* Code-emit pointer; ®dummy = don't. */ -static long regsize; /* Code size. */ - -/* - * Forward declarations for regcomp()'s friends. - */ -#ifndef STATIC -#define STATIC static -#endif -STATIC char *reg(); -STATIC char *regbranch(); -STATIC char *regpiece(); -STATIC char *regatom(); -STATIC char *regnode(char op); -STATIC char *regnext(); -STATIC void regc(char b); -STATIC void reginsert(char op, char *opnd); -STATIC void regtail(); -STATIC void regoptail(); -#ifdef STRCSPN -STATIC int strcspn(); -#endif - -/* - - regcomp - compile a regular expression into internal code - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - */ -regexp * -tpt_regcomp(char *exp) -{ - regexp *r; - char *scan; - char *longest; - int len; - int flags; - - if (exp == NULL) - FAIL("NULL argument"); - - /* First pass: determine size, legality. */ - regparse = exp; - regnpar = 1; - regsize = 0L; - regcode = ®dummy; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Small enough for pointer-storage convention? */ - if (regsize >= 32767L) /* Probably could be 65535L. */ - FAIL("regexp too big"); - - /* Allocate space. */ - r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); - if (r == NULL) - FAIL("out of space"); - - /* Second pass: emit code. */ - regparse = exp; - regnpar = 1; - regcode = r->program; - regc(MAGIC); - if (reg(0, &flags) == NULL) - return(NULL); - - /* Dig out information for optimizations. */ - r->regstart = '\0'; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; - r->regmlen = 0; - scan = r->program+1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ - scan = OPERAND(scan); - - /* Starting-point info. */ - if (OP(scan) == EXACTLY) - r->regstart = *OPERAND(scan); - else if (OP(scan) == BOL) - r->reganch++; - - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - */ - if (flags&SPSTART) { - longest = NULL; - len = 0; - for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { - longest = OPERAND(scan); - len = strlen(OPERAND(scan)); - } - r->regmust = longest; - r->regmlen = len; - } - } - - return(r); -} - -/* - - reg - regular expression, i.e. main body or parenthesized thing - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char * -reg( -int paren, /* Parenthesized? */ -int *flagp -) -{ - char *ret; - char *br; - char *ender; - int parno; - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - /* Make an OPEN node, if parenthesized. */ - if (paren) { - if (regnpar >= NSUBEXP) - FAIL("too many ()"); - parno = regnpar; - regnpar++; - ret = regnode(OPEN+parno); - } else - ret = NULL; - - /* Pick up the branches, linking them together. */ - br = regbranch(&flags); - if (br == NULL) - return(NULL); - if (ret != NULL) - regtail(ret, br); /* OPEN -> first. */ - else - ret = br; - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - while (*regparse == '|') { - regparse++; - br = regbranch(&flags); - if (br == NULL) - return(NULL); - regtail(ret, br); /* BRANCH -> BRANCH. */ - if (!(flags&HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags&SPSTART; - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode((paren) ? CLOSE+parno : END); - regtail(ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); - - /* Check for proper termination. */ - if (paren && *regparse++ != ')') { - FAIL("unmatched ()"); - } else if (!paren && *regparse != '\0') { - if (*regparse == ')') { - FAIL("unmatched ()"); - } else - FAIL("junk on end"); /* "Can't happen". */ - /* NOTREACHED */ - } - - return(ret); -} - -/* - - regbranch - one alternative of an | operator - * - * Implements the concatenation operator. - */ -static char * -regbranch(int *flagp) -{ - char *ret; - char *chain; - char *latest; - int flags; - - *flagp = WORST; /* Tentatively. */ - - ret = regnode(BRANCH); - chain = NULL; - while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { - latest = regpiece(&flags); - if (latest == NULL) - return(NULL); - *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ - *flagp |= flags&SPSTART; - else - regtail(chain, latest); - chain = latest; - } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(NOTHING); - - return(ret); -} - -/* - - regpiece - something followed by possible [*+?] - * - * Note that the branching code sequences used for ? and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char * -regpiece(int *flagp) -{ - char *ret; - char op; - char *next; - int flags; - - ret = regatom(&flags); - if (ret == NULL) - return(NULL); - - op = *regparse; - if (!ISMULT(op)) { - *flagp = flags; - return(ret); - } - - if (!(flags&HASWIDTH) && op != '?') - FAIL("*+ operand could be empty"); - *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); - - if (op == '*' && (flags&SIMPLE)) - reginsert(STAR, ret); - else if (op == '*') { - /* Emit x* as (x&|), where & means "self". */ - reginsert(BRANCH, ret); /* Either x */ - regoptail(ret, regnode(BACK)); /* and loop */ - regoptail(ret, ret); /* back */ - regtail(ret, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '+' && (flags&SIMPLE)) - reginsert(PLUS, ret); - else if (op == '+') { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(BRANCH); /* Either */ - regtail(ret, next); - regtail(regnode(BACK), ret); /* loop back */ - regtail(next, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } else if (op == '?') { - /* Emit x? as (x|) */ - reginsert(BRANCH, ret); /* Either x */ - regtail(ret, regnode(BRANCH)); /* or */ - next = regnode(NOTHING); /* null. */ - regtail(ret, next); - regoptail(ret, next); - } - regparse++; - if (ISMULT(*regparse)) - FAIL("nested *?+"); - - return(ret); -} - -/* - - regatom - the lowest level - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Backslashed characters are exceptions, each becoming a - * separate node; the code is simpler that way and it's not worth fixing. - */ -static char * -regatom(int *flagp) -{ - char *ret; - int flags; - - *flagp = WORST; /* Tentatively. */ - - switch (*regparse++) { - case '^': - ret = regnode(BOL); - break; - case '$': - ret = regnode(EOL); - break; - case '.': - ret = regnode(ANY); - *flagp |= HASWIDTH|SIMPLE; - break; - case '[': { - int class; - int classend; - - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT); - regparse++; - } else - ret = regnode(ANYOF); - if (*regparse == ']' || *regparse == '-') - regc(*regparse++); - while (*regparse != '\0' && *regparse != ']') { - if (*regparse == '-') { - regparse++; - if (*regparse == ']' || *regparse == '\0') - regc('-'); - else { - class = UCHARAT(regparse-2)+1; - classend = UCHARAT(regparse); - if (class > classend+1) - FAIL("invalid [] range"); - for (; class <= classend; class++) - regc(class); - regparse++; - } - } else - regc(*regparse++); - } - regc('\0'); - if (*regparse != ']') - FAIL("unmatched []"); - regparse++; - *flagp |= HASWIDTH|SIMPLE; - } - break; - case '(': - ret = reg(1, &flags); - if (ret == NULL) - return(NULL); - *flagp |= flags&(HASWIDTH|SPSTART); - break; - case '\0': - case '|': - case ')': - FAIL("internal urp"); /* Supposed to be caught earlier. */ - break; - case '?': - case '+': - case '*': - FAIL("?+* follows nothing"); - break; - case '\\': - if (*regparse == '\0') - FAIL("trailing \\"); - ret = regnode(EXACTLY); - regc(*regparse++); - regc('\0'); - *flagp |= HASWIDTH|SIMPLE; - break; - default: { - int len; - char ender; - - regparse--; - len = strcspn(regparse, META); - if (len <= 0) - FAIL("internal disaster"); - ender = *(regparse+len); - if (len > 1 && ISMULT(ender)) - len--; /* Back off clear of ?+* operand. */ - *flagp |= HASWIDTH; - if (len == 1) - *flagp |= SIMPLE; - ret = regnode(EXACTLY); - while (len > 0) { - regc(*regparse++); - len--; - } - regc('\0'); - } - break; - } - - return(ret); -} - -/* - - regnode - emit a node - */ -static char * /* Location. */ -regnode(char op) -{ - char *ret; - char *ptr; - - ret = regcode; - if (ret == ®dummy) { - regsize += 3; - return(ret); - } - - ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; - regcode = ptr; - - return(ret); -} - -/* - - regc - emit (if appropriate) a byte of code - */ -static void -regc(char b) -{ - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; -} - -/* - - reginsert - insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void -reginsert(char op, char *opnd) -{ - char *src; - char *dst; - char *place; - - if (regcode == ®dummy) { - regsize += 3; - return; - } - - src = regcode; - regcode += 3; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; -} - -/* - - regtail - set the next-pointer at the end of a node chain - */ -static void -regtail(char *p, char *val) -{ - char *scan; - char *temp; - int offset; - - if (p == ®dummy) - return; - - /* Find last node. */ - scan = p; - for (;;) { - temp = regnext(scan); - if (temp == NULL) - break; - scan = temp; - } - - if (OP(scan) == BACK) - offset = scan - val; - else - offset = val - scan; - *(scan+1) = (offset>>8)&0377; - *(scan+2) = offset&0377; -} - -/* - - regoptail - regtail on operand of first argument; nop if operandless - */ -static void -regoptail(char *p, char *val) -{ - /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || OP(p) != BRANCH) - return; - regtail(OPERAND(p), val); -} - -/* - * regexec and friends - */ - -/* - * Global work variables for regexec(). - */ -static char *reginput; /* String-input pointer. */ -static char *regbol; /* Beginning of input, for ^ check. */ -static char **regstartp; /* Pointer to startp array. */ -static char **regendp; /* Ditto for endp. */ - -/* - * Forwards. - */ -STATIC int regtry(); -STATIC int regmatch(); -STATIC int regrepeat(); - -#ifdef DEBUG -int regnarrate = 0; -void regdump(); -STATIC char *regprop(); -#endif - -/* - - regexec - match a regexp against a string - */ -int -tpt_regexec(regexp *prog, char *string) -{ - char *s; - - /* Be paranoid... */ - if (prog == NULL || string == NULL) { - tpt_regerror("NULL parameter"); - return(0); - } - - /* Check validity of program. */ - if (UCHARAT(prog->program) != MAGIC) { - tpt_regerror("corrupted program"); - return(0); - } - - /* If there is a "must appear" string, look for it. */ - if (prog->regmust != NULL) { - s = string; - while ((s = strchr(s, prog->regmust[0])) != NULL) { - if (strncmp(s, prog->regmust, prog->regmlen) == 0) - break; /* Found it. */ - s++; - } - if (s == NULL) /* Not present. */ - return(0); - } - - /* Mark beginning of line for ^ . */ - regbol = string; - - /* Simplest case: anchored match need be tried only once. */ - if (prog->reganch) - return(regtry(prog, string)); - - /* Messy cases: unanchored match. */ - s = string; - if (prog->regstart != '\0') - /* We know what char it must start with. */ - while ((s = strchr(s, prog->regstart)) != NULL) { - if (regtry(prog, s)) - return(1); - s++; - } - else - /* We don't -- general case. */ - do { - if (regtry(prog, s)) - return(1); - } while (*s++ != '\0'); - - /* Failure. */ - return(0); -} - -/* - - regtry - try match at specific point - */ -static int /* 0 failure, 1 success */ -regtry(regexp *prog, char *string) -{ - int i; - char **sp; - char **ep; - - reginput = string; - regstartp = prog->startp; - regendp = prog->endp; - - sp = prog->startp; - ep = prog->endp; - for (i = NSUBEXP; i > 0; i--) { - *sp++ = NULL; - *ep++ = NULL; - } - if (regmatch(prog->program + 1)) { - prog->startp[0] = string; - prog->endp[0] = reginput; - return(1); - } else - return(0); -} - -/* - - regmatch - main matching routine - * - * Conceptually the strategy is simple: check to see whether the current - * node matches, call self recursively to see whether the rest matches, - * and then act accordingly. In practice we make some effort to avoid - * recursion, in particular by going through "ordinary" nodes (that don't - * need to know whether the rest of the match failed) by a loop instead of - * by recursion. - */ -static int /* 0 failure, 1 success */ -regmatch(char *prog) -{ - char *scan; /* Current node. */ - char *next; /* Next node. */ - - scan = prog; -#ifdef DEBUG - if (scan != NULL && regnarrate) - fprintf(stderr, "%s(\n", regprop(scan)); -#endif - while (scan != NULL) { -#ifdef DEBUG - if (regnarrate) - fprintf(stderr, "%s...\n", regprop(scan)); -#endif - next = regnext(scan); - - switch (OP(scan)) { - case BOL: - if (reginput != regbol) - return(0); - break; - case EOL: - if (*reginput != '\0') - return(0); - break; - case ANY: - if (*reginput == '\0') - return(0); - reginput++; - break; - case EXACTLY: { - int len; - char *opnd; - - opnd = OPERAND(scan); - /* Inline the first character, for speed. */ - if (*opnd != *reginput) - return(0); - len = strlen(opnd); - if (len > 1 && strncmp(opnd, reginput, len) != 0) - return(0); - reginput += len; - } - break; - case ANYOF: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) - return(0); - reginput++; - break; - case ANYBUT: - if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) - return(0); - reginput++; - break; - case NOTHING: - break; - case BACK: - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: { - int no; - char *save; - - no = OP(scan) - OPEN; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set startp if some later - * invocation of the same parentheses - * already has. - */ - if (regstartp[no] == NULL) - regstartp[no] = save; - return(1); - } else - return(0); - } - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: { - int no; - char *save; - - no = OP(scan) - CLOSE; - save = reginput; - - if (regmatch(next)) { - /* - * Don't set endp if some later - * invocation of the same parentheses - * already has. - */ - if (regendp[no] == NULL) - regendp[no] = save; - return(1); - } else - return(0); - } - break; - case BRANCH: { - char *save; - - if (OP(next) != BRANCH) /* No choice. */ - next = OPERAND(scan); /* Avoid recursion. */ - else { - do { - save = reginput; - if (regmatch(OPERAND(scan))) - return(1); - reginput = save; - scan = regnext(scan); - } while (scan != NULL && OP(scan) == BRANCH); - return(0); - /* NOTREACHED */ - } - } - break; - case STAR: - case PLUS: { - char nextch; - int no; - char *save; - int min; - - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min = (OP(scan) == STAR) ? 0 : 1; - save = reginput; - no = regrepeat(OPERAND(scan)); - while (no >= min) { - /* If it could work, try it. */ - if (nextch == '\0' || *reginput == nextch) - if (regmatch(next)) - return(1); - /* Couldn't or didn't -- back up. */ - no--; - reginput = save + no; - } - return(0); - } - break; - case END: - return(1); /* Success! */ - break; - default: - tpt_regerror("memory corruption"); - return(0); - break; - } - - scan = next; - } - - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - tpt_regerror("corrupted pointers"); - return(0); -} - -/* - - regrepeat - repeatedly match something simple, report how many - */ -static int -regrepeat(char *p) -{ - int count = 0; - char *scan; - char *opnd; - - scan = reginput; - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - count = strlen(scan); - scan += count; - break; - case EXACTLY: - while (*opnd == *scan) { - count++; - scan++; - } - break; - case ANYOF: - while (*scan != '\0' && strchr(opnd, *scan) != NULL) { - count++; - scan++; - } - break; - case ANYBUT: - while (*scan != '\0' && strchr(opnd, *scan) == NULL) { - count++; - scan++; - } - break; - default: /* Oh dear. Called inappropriately. */ - tpt_regerror("internal foulup"); - count = 0; /* Best compromise. */ - break; - } - reginput = scan; - - return(count); -} - -/* - - regnext - dig the "next" pointer out of a node - */ -static char * -regnext(char *p) -{ - int offset; - - if (p == ®dummy) - return(NULL); - - offset = NEXT(p); - if (offset == 0) - return(NULL); - - if (OP(p) == BACK) - return(p-offset); - else - return(p+offset); -} - -#ifdef DEBUG - -STATIC char *regprop(); - -/* - - regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -void -regdump(regexp *r) -{ - char *s; - char op = EXACTLY; /* Arbitrary non-END op. */ - char *next; - - - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ - op = OP(s); - printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - printf("(0)"); - else - printf("(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF || op == ANYBUT || op == EXACTLY) { - /* Literal string, where present. */ - while (*s != '\0') { - putchar(*s); - s++; - } - s++; - } - putchar('\n'); - } - - /* Header fields of interest. */ - if (r->regstart != '\0') - printf("start `%c' ", r->regstart); - if (r->reganch) - printf("anchored "); - if (r->regmust != NULL) - printf("must have \"%s\"", r->regmust); - printf("\n"); -} - -/* - - regprop - printable representation of opcode - */ -static char * -regprop(char *op) -{ - char *p; - static char buf[50]; - - (void) strcpy(buf, ":"); - - switch (OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case ANY: - p = "ANY"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case OPEN+1: - case OPEN+2: - case OPEN+3: - case OPEN+4: - case OPEN+5: - case OPEN+6: - case OPEN+7: - case OPEN+8: - case OPEN+9: - sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); - p = NULL; - break; - case CLOSE+1: - case CLOSE+2: - case CLOSE+3: - case CLOSE+4: - case CLOSE+5: - case CLOSE+6: - case CLOSE+7: - case CLOSE+8: - case CLOSE+9: - sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - default: - tpt_regerror("corrupted opcode"); - break; - } - if (p != NULL) - (void) strcat(buf, p); - return(buf); -} -#endif - -/* - * The following is provided for those people who do not have strcspn() in - * their C libraries. They should get off their butts and do something - * about it; at least one public-domain implementation of those (highly - * useful) string routines has been published on Usenet. - */ -#ifdef STRCSPN -/* - * strcspn - find length of initial segment of s1 consisting entirely - * of characters not from s2 - */ - -static int -strcspn(char *s1, char *s2) -{ - char *scan1; - char *scan2; - int count; - - count = 0; - for (scan1 = s1; *scan1 != '\0'; scan1++) { - for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ - if (*scan1 == *scan2++) - return(count); - count++; - } - return(count); -} -#endif diff --git a/cde/programs/dtdocbook/lib/tptregexp/regmagic.h b/cde/programs/dtdocbook/lib/tptregexp/regmagic.h deleted file mode 100644 index c2a7ca622..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/regmagic.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: regmagic.h /main/3 1996/06/19 17:13:41 drk $ */ -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte. - */ -#define MAGIC 0234 diff --git a/cde/programs/dtdocbook/lib/tptregexp/regsub.c b/cde/programs/dtdocbook/lib/tptregexp/regsub.c deleted file mode 100644 index 3e94a2b2d..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/regsub.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: regsub.c /main/3 1996/06/19 17:13:46 drk $ */ -/* - * regsub - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - */ -#include -#include -#include -#include "regmagic.h" - -#ifndef CHARBITS -#define UCHARAT(p) ((int)*(unsigned char *)(p)) -#else -#define UCHARAT(p) ((int)*(p)&CHARBITS) -#endif - -/* - - regsub - perform substitutions after a regexp match - */ -void -tpt_regsub(regexp *prog, char *source, char *dest) -{ - char *src; - char *dst; - char c; - int no; - int len; - - if (prog == NULL || source == NULL || dest == NULL) { - tpt_regerror("NULL parm to regsub"); - return; - } - if (UCHARAT(prog->program) != MAGIC) { - tpt_regerror("damaged regexp fed to regsub"); - return; - } - - src = source; - dst = dest; - while ((c = *src++) != '\0') { - if (c == '&') - no = 0; - else if (c == '\\' && '0' <= *src && *src <= '9') - no = *src++ - '0'; - else - no = -1; - - if (no < 0) { /* Ordinary character. */ - if (c == '\\' && (*src == '\\' || *src == '&')) - c = *src++; - *dst++ = c; - } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { - len = prog->endp[no] - prog->startp[no]; - (void) strncpy(dst, prog->startp[no], len); - dst += len; - if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ - tpt_regerror("damaged match string"); - return; - } - } - } - *dst++ = '\0'; -} diff --git a/cde/programs/dtdocbook/lib/tptregexp/tests b/cde/programs/dtdocbook/lib/tptregexp/tests deleted file mode 100644 index e5bc16d8a..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/tests +++ /dev/null @@ -1,127 +0,0 @@ -abc abc y & abc -abc xbc n - - -abc axc n - - -abc abx n - - -abc xabcy y & abc -abc ababc y & abc -ab*c abc y & abc -ab*bc abc y & abc -ab*bc abbc y & abbc -ab*bc abbbbc y & abbbbc -ab+bc abbc y & abbc -ab+bc abc n - - -ab+bc abq n - - -ab+bc abbbbc y & abbbbc -ab?bc abbc y & abbc -ab?bc abc y & abc -ab?bc abbbbc n - - -ab?c abc y & abc -^abc$ abc y & abc -^abc$ abcc n - - -^abc abcc y & abc -^abc$ aabc n - - -abc$ aabc y & abc -^ abc y & -$ abc y & -a.c abc y & abc -a.c axc y & axc -a.*c axyzc y & axyzc -a.*c axyzd n - - -a[bc]d abc n - - -a[bc]d abd y & abd -a[b-d]e abd n - - -a[b-d]e ace y & ace -a[b-d] aac y & ac -a[-b] a- y & a- -a[b-] a- y & a- -a[b-a] - c - - -a[]b - c - - -a[ - c - - -a] a] y & a] -a[]]b a]b y & a]b -a[^bc]d aed y & aed -a[^bc]d abd n - - -a[^-b]c adc y & adc -a[^-b]c a-c n - - -a[^]b]c a]c n - - -a[^]b]c adc y & adc -ab|cd abc y & ab -ab|cd abcd y & ab -()ef def y &-\1 ef- -()* - c - - -*a - c - - -^* - c - - -$* - c - - -(*)b - c - - -$b b n - - -a\ - c - - -a\(b a(b y &-\1 a(b- -a\(*b ab y & ab -a\(*b a((b y & a((b -a\\b a\b y & a\b -abc) - c - - -(abc - c - - -((a)) abc y &-\1-\2 a-a-a -(a)b(c) abc y &-\1-\2 abc-a-c -a+b+c aabbabc y & abc -a** - c - - -a*? - c - - -(a*)* - c - - -(a*)+ - c - - -(a|)* - c - - -(a*|b)* - c - - -(a+|b)* ab y &-\1 ab-b -(a+|b)+ ab y &-\1 ab-b -(a+|b)? ab y &-\1 a-a -[^ab]* cde y & cde -(^)* - c - - -(ab|)* - c - - -)( - c - - - abc y & -abc n - - -a* y & -([abc])*d abbbcd y &-\1 abbbcd-c -([abc])*bcd abcd y &-\1 abcd-a -a|b|c|d|e e y & e -(a|b|c|d|e)f ef y &-\1 ef-e -((a*|b))* - c - - -abcd*efg abcdefg y & abcdefg -ab* xabyabbbz y & ab -ab* xayabbbz y & a -(ab|cd)e abcde y &-\1 cde-cd -[abhgefdc]ij hij y & hij -^(ab|cd)e abcde n x\1y xy -(abc|)ef abcdef y &-\1 ef- -(a|b)c*d abcd y &-\1 bcd-b -(ab|ab*)bc abc y &-\1 abc-a -a([bc]*)c* abc y &-\1 abc-bc -a([bc]*)(c*d) abcd y &-\1-\2 abcd-bc-d -a([bc]+)(c*d) abcd y &-\1-\2 abcd-bc-d -a([bc]*)(c+d) abcd y &-\1-\2 abcd-b-cd -a[bcd]*dcdcde adcdcde y & adcdcde -a[bcd]+dcdcde adcdcde n - - -(ab|a)b*c abc y &-\1 abc-ab -((a)(b)c)(d) abcd y \1-\2-\3-\4 abc-a-b-d -[a-zA-Z_][a-zA-Z0-9_]* alpha y & alpha -^a(bc+|b[eh])g|.h$ abh y &-\1 bh- -(bc+d$|ef*g.|h?i(j|k)) effgz y &-\1-\2 effgz-effgz- -(bc+d$|ef*g.|h?i(j|k)) ij y &-\1-\2 ij-ij-j -(bc+d$|ef*g.|h?i(j|k)) effg n - - -(bc+d$|ef*g.|h?i(j|k)) bcdd n - - -(bc+d$|ef*g.|h?i(j|k)) reffgz y &-\1-\2 effgz-effgz- -((((((((((a)))))))))) - c - - -(((((((((a))))))))) a y & a -multiple words of text uh-uh n - - -multiple words multiple words, yeah y & multiple words -(.*)c(.*) abcde y &-\1-\2 abcde-ab-de -\((.*), (.*)\) (a, b) y (\2, \1) (b, a) -[k] ab n - - -abcd abcd y &-\&-\\& abcd-&-\abcd -a(bc)d abcd y \1-\\1-\\\1 bc-\1-\bc -[ -~]* abc y & abc -[ -~ -~]* abc y & abc -[ -~ -~ -~]* abc y & abc -[ -~ -~ -~ -~]* abc y & abc -[ -~ -~ -~ -~ -~]* abc y & abc -[ -~ -~ -~ -~ -~ -~]* abc y & abc -[ -~ -~ -~ -~ -~ -~ -~]* abc y & abc diff --git a/cde/programs/dtdocbook/lib/tptregexp/timer.c b/cde/programs/dtdocbook/lib/tptregexp/timer.c deleted file mode 100644 index bc0ec2d21..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/timer.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: timer.c /main/3 1996/06/19 17:13:50 drk $ */ -/* - * Simple timing program for regcomp(). - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Usage: timer ncomp nexec nsub - * or - * timer ncomp nexec nsub regexp string [ answer [ sub ] ] - * - * The second form is for timing repetitions of a single test case. - * The first form's test data is a compiled-in copy of the "tests" file. - * Ncomp, nexec, nsub are how many times to do each regcomp, regexec, - * and regsub. The way to time an operation individually is to do something - * like "timer 1 50 1". - */ -#include - -struct try { - char *re, *str, *ans, *src, *dst; -} tests[] = { -#include "timer.t.h" -{ NULL, NULL, NULL, NULL, NULL } -}; - -#include - -int errreport = 0; /* Report errors via errseen? */ -char *errseen = NULL; /* Error message. */ - -char *progname; - -/* ARGSUSED */ -main(argc, argv) -int argc; -char *argv[]; -{ - int ncomp, nexec, nsub; - struct try one; - char dummy[512]; - - if (argc < 4) { - ncomp = 1; - nexec = 1; - nsub = 1; - } else { - ncomp = atoi(argv[1]); - nexec = atoi(argv[2]); - nsub = atoi(argv[3]); - } - - progname = argv[0]; - if (argc > 5) { - one.re = argv[4]; - one.str = argv[5]; - if (argc > 6) - one.ans = argv[6]; - else - one.ans = "y"; - if (argc > 7) { - one.src = argv[7]; - one.dst = "xxx"; - } else { - one.src = "x"; - one.dst = "x"; - } - errreport = 1; - try(one, ncomp, nexec, nsub); - } else - multiple(ncomp, nexec, nsub); - exit(0); -} - -void -tpt_regerror(s) -char *s; -{ - if (errreport) - errseen = s; - else - error(s, ""); -} - -#ifndef ERRAVAIL -error(s1, s2) -char *s1; -char *s2; -{ - fprintf(stderr, "regexp: "); - fprintf(stderr, s1, s2); - fprintf(stderr, "\n"); - exit(1); -} -#endif - -int lineno = 0; - -multiple(ncomp, nexec, nsub) -int ncomp, nexec, nsub; -{ - int i; - extern char *strchr(); - - errreport = 1; - for (i = 0; tests[i].re != NULL; i++) { - lineno++; - try(tests[i], ncomp, nexec, nsub); - } -} - -try(fields, ncomp, nexec, nsub) -struct try fields; -int ncomp, nexec, nsub; -{ - regexp *r; - char dbuf[BUFSIZ]; - int i; - - errseen = NULL; - r = tpt_regcomp(fields.re); - if (r == NULL) { - if (*fields.ans != 'c') - complain("tpt_regcomp failure in `%s'", fields.re); - return; - } - if (*fields.ans == 'c') { - complain("unexpected tpt_regcomp success in `%s'", fields.re); - free((char *)r); - return; - } - for (i = ncomp-1; i > 0; i--) { - free((char *)r); - r = tpt_regcomp(fields.re); - } - if (!tpt_regexec(r, fields.str)) { - if (*fields.ans != 'n') - complain("tpt_regexec failure in `%s'", ""); - free((char *)r); - return; - } - if (*fields.ans == 'n') { - complain("unexpected tpt_regexec success", ""); - free((char *)r); - return; - } - for (i = nexec-1; i > 0; i--) - (void) tpt_regexec(r, fields.str); - errseen = NULL; - for (i = nsub; i > 0; i--) - tpt_regsub(r, fields.src, dbuf); - if (errseen != NULL) { - complain("tpt_regsub complaint", ""); - free((char *)r); - return; - } - if (strcmp(dbuf, fields.dst) != 0) - complain("tpt_regsub result `%s' wrong", dbuf); - free((char *)r); -} - -complain(s1, s2) -char *s1; -char *s2; -{ - fprintf(stderr, "try: %d: ", lineno); - fprintf(stderr, s1, s2); - fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : ""); -} diff --git a/cde/programs/dtdocbook/lib/tptregexp/tptregexp.h b/cde/programs/dtdocbook/lib/tptregexp/tptregexp.h deleted file mode 100644 index fda28b409..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/tptregexp.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: tptregexp.h /main/3 1996/06/19 17:13:54 drk $ */ -/* - * Definitions etc. for regexp(3) routines. - * - * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], - * not the System V one. - */ -#define NSUBEXP 10 -typedef struct regexp { - char *startp[NSUBEXP]; - char *endp[NSUBEXP]; - char regstart; /* Internal use only. */ - char reganch; /* Internal use only. */ - char *regmust; /* Internal use only. */ - int regmlen; /* Internal use only. */ - char program[1]; /* Unwarranted chumminess with compiler. */ -} regexp; - -extern regexp *tpt_regcomp(); -extern int tpt_regexec(); -extern void tpt_regsub(); -extern void tpt_regerror(); diff --git a/cde/programs/dtdocbook/lib/tptregexp/try.c b/cde/programs/dtdocbook/lib/tptregexp/try.c deleted file mode 100644 index a7dd012c7..000000000 --- a/cde/programs/dtdocbook/lib/tptregexp/try.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * CDE - Common Desktop Environment - * - * Copyright (c) 1993-2012, The Open Group. All rights reserved. - * - * These libraries and programs are free software; you can - * redistribute them and/or modify them under the terms of the GNU - * Lesser General Public License as published by the Free Software - * Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * These libraries and programs are distributed in the hope that - * they will be useful, but WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with these libraries and programs; if not, write - * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth - * Floor, Boston, MA 02110-1301 USA - */ -/* $XConsortium: try.c /main/3 1996/06/19 17:13:58 drk $ */ -/* - * Simple test program for regexp(3) stuff. Knows about debugging hooks. - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Usage: try re [string [output [-]]] - * The re is compiled and dumped, regexeced against the string, the result - * is applied to output using regsub(). The - triggers a running narrative - * from regexec(). Dumping and narrative don't happen unless DEBUG. - * - * If there are no arguments, stdin is assumed to be a stream of lines with - * five fields: a r.e., a string to match it against, a result code, a - * source string for regsub, and the proper result. Result codes are 'c' - * for compile failure, 'y' for match success, 'n' for match failure. - * Field separator is tab. - */ -#include -#include - -#ifdef ERRAVAIL -char *progname; -extern char *mkprogname(); -#endif - -#ifdef DEBUG -extern int regnarrate; -#endif - -char buf[BUFSIZ]; - -int errreport = 0; /* Report errors via errseen? */ -char *errseen = NULL; /* Error message. */ -int status = 0; /* Exit status. */ - -/* ARGSUSED */ -main(argc, argv) -int argc; -char *argv[]; -{ - regexp *r; - int i; - -#ifdef ERRAVAIL - progname = mkprogname(argv[0]); -#endif - - if (argc == 1) { - multiple(); - exit(status); - } - - r = tpt_regcomp(argv[1]); - if (r == NULL) - error("tpt_regcomp failure", ""); -#ifdef DEBUG - regdump(r); - if (argc > 4) - regnarrate++; -#endif - if (argc > 2) { - i = tpt_regexec(r, argv[2]); - printf("%d", i); - for (i = 1; i < NSUBEXP; i++) - if (r->startp[i] != NULL && r->endp[i] != NULL) - printf(" \\%d", i); - printf("\n"); - } - if (argc > 3) { - tpt_regsub(r, argv[3], buf); - printf("%s\n", buf); - } - exit(status); -} - -void -tpt_regerror(s) -char *s; -{ - if (errreport) - errseen = s; - else - error(s, ""); -} - -#ifndef ERRAVAIL -error(s1, s2) -char *s1; -char *s2; -{ - fprintf(stderr, "regexp: "); - fprintf(stderr, s1, s2); - fprintf(stderr, "\n"); - exit(1); -} -#endif - -int lineno; - -regexp badregexp; /* Implicit init to 0. */ - -multiple() -{ - char rbuf[BUFSIZ]; - char *field[5]; - char *scan; - int i; - regexp *r; - extern char *strchr(); - - errreport = 1; - lineno = 0; - while (fgets(rbuf, sizeof(rbuf), stdin) != NULL) { - rbuf[strlen(rbuf)-1] = '\0'; /* Dispense with \n. */ - lineno++; - scan = rbuf; - for (i = 0; i < 5; i++) { - field[i] = scan; - if (field[i] == NULL) { - complain("bad testfile format", ""); - exit(1); - } - scan = strchr(scan, '\t'); - if (scan != NULL) - *scan++ = '\0'; - } - try(field); - } - - /* And finish up with some internal testing... */ - lineno = 9990; - errseen = NULL; - if (tpt_regcomp((char *)NULL) != NULL || errseen == NULL) - complain("tpt_regcomp(NULL) doesn't complain", ""); - lineno = 9991; - errseen = NULL; - if (tpt_regexec((regexp *)NULL, "foo") || errseen == NULL) - complain("tpt_regexec(NULL, ...) doesn't complain", ""); - lineno = 9992; - r = tpt_regcomp("foo"); - if (r == NULL) { - complain("tpt_regcomp(\"foo\") fails", ""); - return; - } - lineno = 9993; - errseen = NULL; - if (tpt_regexec(r, (char *)NULL) || errseen == NULL) - complain("tpt_regexec(..., NULL) doesn't complain", ""); - lineno = 9994; - errseen = NULL; - tpt_regsub((regexp *)NULL, "foo", rbuf); - if (errseen == NULL) - complain("tpt_regsub(NULL, ..., ...) doesn't complain", ""); - lineno = 9995; - errseen = NULL; - tpt_regsub(r, (char *)NULL, rbuf); - if (errseen == NULL) - complain("tpt_regsub(..., NULL, ...) doesn't complain", ""); - lineno = 9996; - errseen = NULL; - tpt_regsub(r, "foo", (char *)NULL); - if (errseen == NULL) - complain("tpt_regsub(..., ..., NULL) doesn't complain", ""); - lineno = 9997; - errseen = NULL; - if (tpt_regexec(&badregexp, "foo") || errseen == NULL) - complain("tpt_regexec(nonsense, ...) doesn't complain", ""); - lineno = 9998; - errseen = NULL; - tpt_regsub(&badregexp, "foo", rbuf); - if (errseen == NULL) - complain("tpt_regsub(nonsense, ..., ...) doesn't complain", ""); -} - -try(fields) -char **fields; -{ - regexp *r; - char dbuf[BUFSIZ]; - - errseen = NULL; - r = tpt_regcomp(fields[0]); - if (r == NULL) { - if (*fields[2] != 'c') - complain("tpt_regcomp failure in `%s'", fields[0]); - return; - } - if (*fields[2] == 'c') { - complain("unexpected tpt_regcomp success in `%s'", fields[0]); - free((char *)r); - return; - } - if (!tpt_regexec(r, fields[1])) { - if (*fields[2] != 'n') - complain("tpt_regexec failure in `%s'", ""); - free((char *)r); - return; - } - if (*fields[2] == 'n') { - complain("unexpected tpt_regexec success", ""); - free((char *)r); - return; - } - errseen = NULL; - tpt_regsub(r, fields[3], dbuf); - if (errseen != NULL) { - complain("tpt_regsub complaint", ""); - free((char *)r); - return; - } - if (strcmp(dbuf, fields[4]) != 0) - complain("tpt_regsub result `%s' wrong", dbuf); - free((char *)r); -} - -complain(s1, s2) -char *s1; -char *s2; -{ - fprintf(stderr, "try: %d: ", lineno); - fprintf(stderr, s1, s2); - fprintf(stderr, " (%s)\n", (errseen != NULL) ? errseen : ""); - status = 1; -}