mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
536 lines
15 KiB
C
536 lines
15 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: parseMode.C /main/1 1996/07/29 17:09:27 cde-hp $ */
|
|
// Copyright (c) 1994 James Clark
|
|
// See the file COPYING for copying permission.
|
|
|
|
#include "splib.h"
|
|
#include "Parser.h"
|
|
#include "ParserMessages.h"
|
|
#include "MessageArg.h"
|
|
#include "TokenMessageArg.h"
|
|
#include "ModeInfo.h"
|
|
#include "Partition.h"
|
|
#include "SrInfo.h"
|
|
#include "Vector.h"
|
|
#include "ISetIter.h"
|
|
#include "token.h"
|
|
#include "TrieBuilder.h"
|
|
#include "macros.h"
|
|
|
|
#ifdef SP_NAMESPACE
|
|
namespace SP_NAMESPACE {
|
|
#endif
|
|
|
|
enum {
|
|
modeUsedInSd = 01,
|
|
modeUsedInProlog = 02,
|
|
modeUsedInInstance = 04,
|
|
modeUsesSr = 010
|
|
};
|
|
|
|
static struct {
|
|
Mode mode;
|
|
unsigned flags;
|
|
} modeTable[] = {
|
|
{ grpMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ alitMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ alitaMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ aliteMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ talitMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ talitaMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ taliteMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ mdMode, modeUsedInProlog|modeUsedInInstance|modeUsedInSd },
|
|
{ mdMinusMode, modeUsedInProlog },
|
|
{ mdPeroMode, modeUsedInProlog },
|
|
{ comMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ sdcomMode, modeUsedInSd },
|
|
{ piMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ refMode, modeUsedInProlog|modeUsedInInstance|modeUsedInSd },
|
|
{ imsMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ cmsMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ rcmsMode, modeUsedInProlog|modeUsedInInstance },
|
|
{ proMode, modeUsedInProlog },
|
|
{ dsMode, modeUsedInProlog },
|
|
{ dsiMode, modeUsedInProlog },
|
|
{ plitMode, modeUsedInProlog },
|
|
{ plitaMode, modeUsedInProlog },
|
|
{ pliteMode, modeUsedInProlog },
|
|
{ sdplitMode, modeUsedInSd },
|
|
{ sdplitaMode, modeUsedInSd },
|
|
{ grpsufMode, modeUsedInProlog },
|
|
{ mlitMode, modeUsedInProlog|modeUsedInSd },
|
|
{ mlitaMode, modeUsedInProlog|modeUsedInSd },
|
|
{ asMode, modeUsedInProlog },
|
|
{ slitMode, modeUsedInProlog },
|
|
{ slitaMode, modeUsedInProlog },
|
|
{ cconMode, modeUsedInInstance },
|
|
{ rcconMode, modeUsedInInstance },
|
|
{ cconnetMode, modeUsedInInstance },
|
|
{ rcconnetMode, modeUsedInInstance },
|
|
{ rcconeMode, modeUsedInInstance },
|
|
{ tagMode, modeUsedInInstance },
|
|
{ econMode, modeUsedInInstance|modeUsesSr },
|
|
{ mconMode, modeUsedInInstance|modeUsesSr },
|
|
{ econnetMode, modeUsedInInstance|modeUsesSr },
|
|
{ mconnetMode, modeUsedInInstance|modeUsesSr },
|
|
};
|
|
|
|
void Parser::compileSdModes()
|
|
{
|
|
Mode modes[nModes];
|
|
int n = 0;
|
|
for (size_t i = 0; i < SIZEOF(modeTable); i++)
|
|
if (modeTable[i].flags & modeUsedInSd)
|
|
modes[n++] = modeTable[i].mode;
|
|
compileModes(modes, n, 0);
|
|
}
|
|
|
|
void Parser::compilePrologModes()
|
|
{
|
|
Boolean scopeInstance = sd().scopeInstance();
|
|
Boolean haveSr = syntax().hasShortrefs();
|
|
Mode modes[nModes];
|
|
int n = 0;
|
|
for (size_t i = 0; i < SIZEOF(modeTable); i++) {
|
|
if (scopeInstance) {
|
|
if (modeTable[i].flags & modeUsedInProlog)
|
|
modes[n++] = modeTable[i].mode;
|
|
}
|
|
else if (haveSr) {
|
|
if ((modeTable[i].flags & (modeUsedInInstance|modeUsedInProlog))
|
|
&& !(modeTable[i].flags & modeUsesSr))
|
|
modes[n++] = modeTable[i].mode;
|
|
}
|
|
else {
|
|
if (modeTable[i].flags & (modeUsedInInstance|modeUsedInProlog))
|
|
modes[n++] = modeTable[i].mode;
|
|
}
|
|
}
|
|
compileModes(modes, n, 0);
|
|
}
|
|
|
|
void Parser::compileInstanceModes()
|
|
{
|
|
Boolean scopeInstance = sd().scopeInstance();
|
|
compileNormalMap();
|
|
if (!scopeInstance && !syntax().hasShortrefs())
|
|
return;
|
|
Mode modes[nModes];
|
|
int n = 0;
|
|
for (size_t i = 0; i < SIZEOF(modeTable); i++) {
|
|
if (scopeInstance) {
|
|
if (modeTable[i].flags & modeUsedInInstance)
|
|
modes[n++] = modeTable[i].mode;
|
|
}
|
|
else {
|
|
if (modeTable[i].flags & modeUsesSr)
|
|
modes[n++] = modeTable[i].mode;
|
|
}
|
|
}
|
|
compileModes(modes, n, ¤tDtd());
|
|
}
|
|
|
|
void Parser::compileModes(const Mode *modes,
|
|
int n,
|
|
const Dtd *dtd)
|
|
{
|
|
PackedBoolean sets[Syntax::nSet];
|
|
PackedBoolean delims[Syntax::nDelimGeneral];
|
|
PackedBoolean functions[3];
|
|
int i;
|
|
Boolean includesShortref = 0;
|
|
for (i = 0; i < Syntax::nSet; i++)
|
|
sets[i] = 0;
|
|
for (i = 0; i < Syntax::nDelimGeneral; i++)
|
|
delims[i] = 0;
|
|
for (i = 0; i < 3; i++)
|
|
functions[i] = 0;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
ModeInfo iter(modes[i], sd());
|
|
TokenInfo ti;
|
|
while (iter.nextToken(&ti)) {
|
|
switch (ti.type) {
|
|
case TokenInfo::delimType:
|
|
delims[ti.delim1] = 1;
|
|
break;
|
|
case TokenInfo::delimDelimType:
|
|
delims[ti.delim1] = 1;
|
|
delims[ti.delim2] = 1;
|
|
break;
|
|
case TokenInfo::delimSetType:
|
|
delims[ti.delim1] = 1;
|
|
// fall through
|
|
case TokenInfo::setType:
|
|
sets[ti.set] = 1;
|
|
break;
|
|
case TokenInfo::functionType:
|
|
functions[ti.function] = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!includesShortref && iter.includesShortref())
|
|
includesShortref = 1;
|
|
}
|
|
|
|
ISet<Char> chars;
|
|
|
|
for (i = 0; i < 3; i++)
|
|
if (functions[i])
|
|
chars.add(syntax().standardFunction(i));
|
|
for (i = 0; i < Syntax::nDelimGeneral; i++)
|
|
if (delims[i]) {
|
|
const StringC &str = syntax().delimGeneral(i);
|
|
for (size_t j = 0; j < str.size(); j++)
|
|
chars.add(str[j]);
|
|
}
|
|
if (includesShortref && dtd) {
|
|
size_t n = dtd->nShortref();
|
|
for (size_t i = 0; i < n; i++) {
|
|
const StringC &delim = dtd->shortref(i);
|
|
size_t len = delim.size();
|
|
for (size_t j = 0; j < len; j++)
|
|
if (delim[j] == sd().execToDoc('B'))
|
|
sets[Syntax::blank] = 1;
|
|
else
|
|
chars.add(delim[j]);
|
|
}
|
|
}
|
|
|
|
const ISet<Char> *csets[Syntax::nSet];
|
|
int usedSets = 0;
|
|
for (i = 0; i < Syntax::nSet; i++)
|
|
if (sets[i])
|
|
csets[usedSets++] = syntax().charSet(i);
|
|
|
|
Partition partition(chars, csets, usedSets, *syntax().generalSubstTable());
|
|
|
|
String<EquivCode> setCodes[Syntax::nSet];
|
|
|
|
int nCodes = 0;
|
|
for (i = 0; i < Syntax::nSet; i++)
|
|
if (sets[i])
|
|
setCodes[i] = partition.setCodes(nCodes++);
|
|
|
|
String<EquivCode> delimCodes[Syntax::nDelimGeneral];
|
|
for (i = 0; i < Syntax::nDelimGeneral; i++)
|
|
if (delims[i]) {
|
|
StringC str = syntax().delimGeneral(i);
|
|
for (size_t j = 0; j < str.size(); j++)
|
|
delimCodes[i] += partition.charCode(str[j]);
|
|
}
|
|
|
|
String<EquivCode> functionCode[3];
|
|
for (i = 0; i < 3; i++)
|
|
if (functions[i])
|
|
functionCode[i] += partition.charCode(syntax().standardFunction(i));
|
|
|
|
Vector<SrInfo> srInfo;
|
|
|
|
int nShortref;
|
|
if (!includesShortref || !dtd)
|
|
nShortref = 0;
|
|
else {
|
|
nShortref = dtd->nShortref();
|
|
srInfo.resize(nShortref);
|
|
|
|
for (i = 0; i < nShortref; i++) {
|
|
const StringC delim = dtd->shortref(i);
|
|
SrInfo *p = &srInfo[i];
|
|
size_t j;
|
|
for (j = 0; j < delim.size(); j++) {
|
|
if (delim[j] == sd().execToDoc('B'))
|
|
break;
|
|
p->chars += partition.charCode(delim[j]);
|
|
}
|
|
if (j < delim.size()) {
|
|
p->bSequenceLength = 1;
|
|
for (++j; j < delim.size(); j++) {
|
|
if (delim[j] != sd().execToDoc('B'))
|
|
break;
|
|
p->bSequenceLength += 1;
|
|
}
|
|
for (; j < delim.size(); j++)
|
|
p->chars2 += partition.charCode(delim[j]);
|
|
}
|
|
else
|
|
p->bSequenceLength = 0;
|
|
}
|
|
}
|
|
|
|
const String<EquivCode> emptyString;
|
|
Boolean multicode = syntax().multicode();
|
|
for (i = 0; i < n; i++) {
|
|
TrieBuilder tb(partition.maxCode() + 1);
|
|
TrieBuilder::TokenVector ambiguities;
|
|
Vector<Token> suppressTokens;
|
|
if (multicode) {
|
|
suppressTokens.assign(partition.maxCode() + 1, 0);
|
|
suppressTokens[partition.eECode()] = tokenEe;
|
|
}
|
|
tb.recognizeEE(partition.eECode(), tokenEe);
|
|
ModeInfo iter(modes[i], sd());
|
|
TokenInfo ti;
|
|
// We try to handle the possibility that some delimiters may be empty;
|
|
// this might happen when compiling recognizers for the SGML declaration.
|
|
while (iter.nextToken(&ti)) {
|
|
switch (ti.type) {
|
|
case TokenInfo::delimType:
|
|
if (delimCodes[ti.delim1].size() > 0)
|
|
tb.recognize(delimCodes[ti.delim1], ti.token,
|
|
ti.priority, ambiguities);
|
|
break;
|
|
case TokenInfo::delimDelimType:
|
|
{
|
|
String<EquivCode> str(delimCodes[ti.delim1]);
|
|
if (str.size() > 0 && delimCodes[ti.delim2].size() > 0) {
|
|
str += delimCodes[ti.delim2];
|
|
tb.recognize(str, ti.token, ti.priority, ambiguities);
|
|
}
|
|
}
|
|
break;
|
|
case TokenInfo::delimSetType:
|
|
if (delimCodes[ti.delim1].size() > 0)
|
|
tb.recognize(delimCodes[ti.delim1], setCodes[ti.set],
|
|
ti.token, ti.priority, ambiguities);
|
|
break;
|
|
case TokenInfo::setType:
|
|
tb.recognize(emptyString, setCodes[ti.set], ti.token,
|
|
ti.priority, ambiguities);
|
|
if (multicode) {
|
|
const String<EquivCode> &equivCodes = setCodes[ti.set];
|
|
for (size_t j = 0; j < equivCodes.size(); j++)
|
|
suppressTokens[equivCodes[j]] = ti.token;
|
|
}
|
|
break;
|
|
case TokenInfo::functionType:
|
|
tb.recognize(functionCode[ti.function], ti.token,
|
|
ti.priority, ambiguities);
|
|
if (multicode)
|
|
suppressTokens[functionCode[ti.function][0]] = ti.token;
|
|
break;
|
|
}
|
|
}
|
|
if (iter.includesShortref()) {
|
|
for (int j = 0; j < nShortref; j++) {
|
|
const SrInfo *p = &srInfo[j];
|
|
if (p->bSequenceLength > 0)
|
|
tb.recognizeB(p->chars, p->bSequenceLength,
|
|
syntax().quantity(Syntax::qBSEQLEN),
|
|
setCodes[Syntax::blank],
|
|
p->chars2, tokenFirstShortref + j,
|
|
ambiguities);
|
|
else
|
|
tb.recognize(p->chars, tokenFirstShortref + j,
|
|
Priority::delim, ambiguities);
|
|
}
|
|
}
|
|
setRecognizer(modes[i],
|
|
(multicode
|
|
? new Recognizer(tb.extractTrie(), partition.map(),
|
|
suppressTokens)
|
|
: new Recognizer(tb.extractTrie(), partition.map())));
|
|
// FIXME give more information
|
|
for (size_t j = 0; j < ambiguities.size(); j += 2)
|
|
message(ParserMessages::lexicalAmbiguity,
|
|
TokenMessageArg(ambiguities[j], modes[i], syntaxPointer(),
|
|
sdPointer()),
|
|
TokenMessageArg(ambiguities[j + 1], modes[i], syntaxPointer(),
|
|
sdPointer()));
|
|
}
|
|
}
|
|
|
|
void Parser::compileNormalMap()
|
|
{
|
|
XcharMap<PackedBoolean> map(0);
|
|
ISetIter<Char> sgmlCharIter(*syntax().charSet(Syntax::sgmlChar));
|
|
Char min, max;
|
|
while (sgmlCharIter.next(min, max))
|
|
map.setRange(min, max, 1);
|
|
ModeInfo iter(mconnetMode, sd());
|
|
TokenInfo ti;
|
|
while (iter.nextToken(&ti)) {
|
|
switch (ti.type) {
|
|
case TokenInfo::delimType:
|
|
case TokenInfo::delimDelimType:
|
|
case TokenInfo::delimSetType:
|
|
{
|
|
Char c = syntax().delimGeneral(ti.delim1)[0];
|
|
map.setChar(c, 0);
|
|
StringC str(syntax().generalSubstTable()->inverse(c));
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
map.setChar(str[i], 0);
|
|
}
|
|
break;
|
|
case TokenInfo::setType:
|
|
if (ti.token != tokenChar) {
|
|
ISetIter<Char> setIter(*syntax().charSet(ti.set));
|
|
Char min, max;
|
|
while (setIter.next(min, max))
|
|
map.setRange(min, max, 0);
|
|
}
|
|
break;
|
|
case TokenInfo::functionType:
|
|
if (ti.token != tokenChar)
|
|
map.setChar(syntax().standardFunction(ti.function), 0);
|
|
break;
|
|
}
|
|
}
|
|
int nShortref = currentDtd().nShortref();
|
|
for (int i = 0; i < nShortref; i++) {
|
|
Char c = currentDtd().shortref(i)[0];
|
|
if (c == sd().execToDoc('B')) {
|
|
ISetIter<Char> setIter(*syntax().charSet(Syntax::blank));
|
|
Char min, max;
|
|
while (setIter.next(min, max))
|
|
map.setRange(min, max, 0);
|
|
}
|
|
else {
|
|
map.setChar(c, 0);
|
|
StringC str(syntax().generalSubstTable()->inverse(c));
|
|
for (size_t j = 0; j < str.size(); j++)
|
|
map.setChar(str[j], 0);
|
|
}
|
|
}
|
|
setNormalMap(map);
|
|
}
|
|
|
|
void Parser::addNeededShortrefs(Dtd &dtd, const Syntax &syntax)
|
|
{
|
|
if (!syntax.hasShortrefs())
|
|
return;
|
|
PackedBoolean delimRelevant[Syntax::nDelimGeneral];
|
|
size_t i;
|
|
for (i = 0; i < Syntax::nDelimGeneral; i++)
|
|
delimRelevant[i] = 0;
|
|
ModeInfo iter(mconnetMode, sd());
|
|
TokenInfo ti;
|
|
while (iter.nextToken(&ti)) {
|
|
switch (ti.type) {
|
|
case TokenInfo::delimType:
|
|
case TokenInfo::delimDelimType:
|
|
case TokenInfo::delimSetType:
|
|
delimRelevant[ti.delim1] = 1;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// PIO and NET are the only delimiters that are recognized in con
|
|
// mode without context. If a short reference delimiter is
|
|
// identical to one of these delimiters, then we'll have an
|
|
// ambiguity. We make such a short reference delimiter needed
|
|
// to ensure that this ambiguity is reported.
|
|
if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dPIO)))
|
|
dtd.addNeededShortref(syntax.delimGeneral(Syntax::dPIO));
|
|
if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dNET)))
|
|
dtd.addNeededShortref(syntax.delimGeneral(Syntax::dNET));
|
|
|
|
size_t nShortrefComplex = syntax.nDelimShortrefComplex();
|
|
|
|
// A short reference delimiter is needed if it is used or if it can
|
|
// contains some other shorter delimiter that is either a relevant general
|
|
// delimiter or a shortref delimiter that is used.
|
|
|
|
for (i = 0; i < nShortrefComplex; i++) {
|
|
size_t j;
|
|
for (j = 0; j < Syntax::nDelimGeneral; j++)
|
|
if (delimRelevant[j]
|
|
&& shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),
|
|
syntax.delimGeneral(j),
|
|
0,
|
|
syntax)) {
|
|
dtd.addNeededShortref(syntax.delimShortrefComplex(i));
|
|
break;
|
|
}
|
|
for (j = 0; j < dtd.nShortref(); j++)
|
|
if (shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),
|
|
dtd.shortref(j),
|
|
1,
|
|
syntax)) {
|
|
dtd.addNeededShortref(syntax.delimShortrefComplex(i));
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
Boolean Parser::shortrefCanPreemptDelim(const StringC &sr,
|
|
const StringC &d,
|
|
Boolean dIsSr,
|
|
const Syntax &syntax)
|
|
{
|
|
Char letterB = sd().execToDoc('B');
|
|
for (size_t i = 0; i < sr.size(); i++) {
|
|
size_t j = 0;
|
|
size_t k = i;
|
|
for (;;) {
|
|
if (j == d.size())
|
|
return 1;
|
|
if (k >= sr.size())
|
|
break;
|
|
if (sr[k] == letterB) {
|
|
if (dIsSr && d[j] == letterB) {
|
|
j++;
|
|
k++;
|
|
}
|
|
else if (syntax.isB(d[j])) {
|
|
j++;
|
|
k++;
|
|
if (k == sr.size() || sr[k] != letterB) {
|
|
// it was the last B in the sequence
|
|
while (j < d.size() && syntax.isB(d[j]))
|
|
j++;
|
|
}
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
else if (dIsSr && d[j] == letterB) {
|
|
if (syntax.isB(sr[k])) {
|
|
++j;
|
|
++k;
|
|
if (j < d.size() && d[j] != letterB) {
|
|
while (k < sr.size() && syntax.isB(sr[k]))
|
|
k++;
|
|
}
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
else if (d[j] == sr[k]) {
|
|
j++;
|
|
k++;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef SP_NAMESPACE
|
|
}
|
|
#endif
|