mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-02-13 11:42:21 +00:00
dtdocbook: Migrate instant to UTF-8.
This commit is contained in:
parent
b816b85575
commit
df7bc69a3f
3 changed files with 71 additions and 333 deletions
|
@ -81,6 +81,7 @@ static char *CopyRt =
|
|||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <locale.h>
|
||||
#include <langinfo.h>
|
||||
#include "LocaleXlate.h"
|
||||
#include "XlationSvc.h"
|
||||
|
||||
|
@ -121,10 +122,6 @@ static int DefaultOutputString(ClientData clientData,
|
|||
Tcl_Interp *interp,
|
||||
int argc,
|
||||
const char *argv[]);
|
||||
static int TclReadLocaleStrings(ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int argc,
|
||||
const char *argv[]);
|
||||
char *GetOutFileBaseName();
|
||||
|
||||
char *
|
||||
|
@ -209,16 +206,6 @@ main(
|
|||
0,
|
||||
0);
|
||||
|
||||
/* Add a function to read a localized set of data from a file.
|
||||
* We'll make sure the munging takes place so we can parse it
|
||||
* in Tcl and any strings we get will output properly when
|
||||
* unmunged. */
|
||||
Tcl_CreateCommand(interpreter,
|
||||
"ReadLocaleStrings",
|
||||
TclReadLocaleStrings,
|
||||
0,
|
||||
0);
|
||||
|
||||
ReadInstance(in_file);
|
||||
|
||||
if (interactive) {
|
||||
|
@ -271,78 +258,6 @@ main(
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Undo the munging done in EscapeI18NChars().
|
||||
*
|
||||
* The parameter may be modified. It is returned for the convenience
|
||||
* of the caller.
|
||||
*
|
||||
* The algorithm here is:
|
||||
*
|
||||
* get the next byte to write;
|
||||
*
|
||||
* if the current byte is the chosen character:
|
||||
*
|
||||
* get the next byte;
|
||||
*
|
||||
* if the current byte is the chosen character:
|
||||
*
|
||||
* get the next byte and zero out the 8th bit;
|
||||
*
|
||||
* if the current byte is an ASCII "1", emit the chosen
|
||||
* character and continue;
|
||||
*
|
||||
* if the current byte is an ASCII "0", emit the chosen
|
||||
* character with the 8th bit turned off and continue;
|
||||
*
|
||||
* it's an internal error if we get here
|
||||
*
|
||||
* emit the current byte with the 8th bit turned off and
|
||||
* continue;
|
||||
*
|
||||
* emit the current byte and continue;
|
||||
*/
|
||||
static char *UnEscapeI18NChars(
|
||||
char *source
|
||||
)
|
||||
{
|
||||
unsigned char c;
|
||||
char *buf;
|
||||
unsigned char *to, *from;
|
||||
|
||||
if (MB_CUR_MAX != 1) {
|
||||
from = (unsigned char*)source;
|
||||
buf = malloc(strlen(source)+1);
|
||||
to = (unsigned char *)buf;
|
||||
while (c = *from++) {
|
||||
if (c == I18N_TRIGGER) {
|
||||
c = *from++;
|
||||
if (c == I18N_TRIGGER) {
|
||||
c = *from++ & ~0x80;
|
||||
if (c == '0') {
|
||||
*to++ = I18N_TRIGGER & ~0x80;
|
||||
} else if (c == '1') {
|
||||
*to++ = I18N_TRIGGER;
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Error: Unexpected I18N transformation.\n");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
*to++ = c & ~0x80;
|
||||
}
|
||||
} else {
|
||||
*to++ = c;
|
||||
}
|
||||
}
|
||||
*to = 0;
|
||||
strcpy(source, buf);
|
||||
free(buf);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
|
||||
static int DefaultOutputString(ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int argc,
|
||||
|
@ -375,43 +290,18 @@ static int DefaultOutputString(ClientData clientData,
|
|||
* any characters that will throw Tcl for a loop */
|
||||
*pString++ = '"';
|
||||
while (*pArgv) {
|
||||
if (*pArgv & 0x80)
|
||||
{
|
||||
/* 8-bit data - need to encode since modern Tcl expects
|
||||
* any "binary" (8-bit) data in strings to be proper UTF-8
|
||||
* encoded. We aren't doing that (yet), so convert any
|
||||
* detected 8b characters into a \xNN format.
|
||||
*
|
||||
* This code should be unnecessary when we switch to UTF8.
|
||||
*/
|
||||
char fmt[16];
|
||||
snprintf(fmt, 16, "%02x", (int)*pArgv & 0xff);
|
||||
#if 0
|
||||
fprintf(stderr, "JET: converted 0x%02x to '%s'\n",
|
||||
*pArgv, fmt);
|
||||
#endif
|
||||
/* copy the 4 bytes into the string */
|
||||
*pString++ = '\\';
|
||||
*pString++ = 'x';
|
||||
*pString++ = fmt[0];
|
||||
*pString++ = fmt[1];
|
||||
pArgv++;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (*pArgv) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '[':
|
||||
case ']':
|
||||
case '$':
|
||||
case '\\':
|
||||
*pString++ = '\\';
|
||||
}
|
||||
*pString++ = *pArgv++;
|
||||
}
|
||||
switch (*pArgv) {
|
||||
case '{':
|
||||
case '}':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '[':
|
||||
case ']':
|
||||
case '$':
|
||||
case '\\':
|
||||
*pString++ = '\\';
|
||||
}
|
||||
*pString++ = *pArgv++;
|
||||
}
|
||||
*pString++ = '"';
|
||||
*pString++ = 0;
|
||||
|
@ -753,187 +643,6 @@ FindEntity(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Check multibyte characters for inner bytes that don't have their
|
||||
* 8th bit set - e.g., this may happen in SJIS. Rather than risk
|
||||
* having downstream code mistake that inner byte for an ASCII
|
||||
* character, we'll mung it here and undo the mung when we write the
|
||||
* character out in DefaultOutputString().
|
||||
*
|
||||
* A character buffer may be allocated and returned in this routine.
|
||||
* That buffer must be free'd by the caller if the return value of
|
||||
* this routine is different from its parameter.
|
||||
*
|
||||
* The algorithm here is:
|
||||
*
|
||||
* get a character
|
||||
*
|
||||
* if the length of the current character is 1:
|
||||
*
|
||||
* if the current character has the 8th bit off, emit it
|
||||
* and continue;
|
||||
*
|
||||
* if the current character is the chosen 8-bit
|
||||
* character, emit the chosen character twice and follow
|
||||
* it with the ASCII character "1" or'd with the 8th bit
|
||||
* and continue;
|
||||
*
|
||||
* emit the character and continue;
|
||||
*
|
||||
* if the length of the current character is greater than
|
||||
* one, for each of the bytes in the character:
|
||||
*
|
||||
* if the current byte is the chosen 8-bit character,
|
||||
* emit the chosen character twice and follow it with the
|
||||
* ASCII character "1" or'd with the 8th bit and
|
||||
* continue;
|
||||
*
|
||||
* if the current byte is the chosen character except the
|
||||
* 8th bit is off, emit the chosen character twice
|
||||
* followed by the ASCII character "0" or'd with the 8th
|
||||
* bit and continue;
|
||||
*
|
||||
* if the current byte has the 8th bit set, emit it and
|
||||
* continue;
|
||||
*
|
||||
* emit the chosen character followed by the current byte
|
||||
* or'd with the 8th bit.
|
||||
*/
|
||||
static char *
|
||||
EscapeI18NChars(
|
||||
char *source
|
||||
)
|
||||
{
|
||||
char *retval;
|
||||
unsigned char *from, *to;
|
||||
int len;
|
||||
|
||||
if (MB_CUR_MAX == 1) {
|
||||
return source;
|
||||
} else {
|
||||
/* worst case, the string will expand by a factor of 3 */
|
||||
from = (unsigned char *)source;
|
||||
retval = malloc(3 * strlen(source) + 1);
|
||||
to = (unsigned char *)retval;
|
||||
while (*from) {
|
||||
if ((len = mblen(from, MB_CUR_MAX)) < 0) {
|
||||
fprintf(stderr,
|
||||
"Bad multibyte character '%c' (0x%x) in source file\n",
|
||||
*from,
|
||||
*from);
|
||||
from++;
|
||||
} else if ((len = mblen(from, MB_CUR_MAX)) == 1) {
|
||||
if (*from & 0x80) {
|
||||
if (*from == I18N_TRIGGER) {
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = '1' | 0x80;
|
||||
from++;
|
||||
} else {
|
||||
*to++ = *from++;
|
||||
}
|
||||
} else {
|
||||
*to++ = *from++;
|
||||
}
|
||||
} else {
|
||||
while (--len >= 0) {
|
||||
if (*from == I18N_TRIGGER) {
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = '1' | 0x80;
|
||||
from++;
|
||||
} else if (*from == (I18N_TRIGGER & ~0x80)) {
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = '0' | 0x80;
|
||||
from++;
|
||||
} else if (*from & 0x80) {
|
||||
*to++ = *from++;
|
||||
} else {
|
||||
*to++ = I18N_TRIGGER;
|
||||
*to++ = *from++ | 0x80;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
static char *
|
||||
ReadLocaleStrings(const char *file_name, int *ret_code) {
|
||||
int fd;
|
||||
char *pBuf;
|
||||
char *i18nBuf;
|
||||
off_t size;
|
||||
struct stat stat_buf;
|
||||
|
||||
fd = open(file_name, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
*ret_code = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fstat(fd, &stat_buf);
|
||||
size = stat_buf.st_size;
|
||||
pBuf = Tcl_Alloc(size+1);
|
||||
memset(pBuf, 0, size+1);
|
||||
|
||||
if (read(fd, pBuf, size) != size) {
|
||||
*ret_code = 2;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i18nBuf = EscapeI18NChars(pBuf);
|
||||
if (i18nBuf != pBuf) {
|
||||
pBuf = Tcl_Realloc(pBuf, 1 + strlen(i18nBuf));
|
||||
strcpy(pBuf, i18nBuf);
|
||||
free(i18nBuf);
|
||||
}
|
||||
|
||||
*ret_code = 0;
|
||||
return pBuf;
|
||||
}
|
||||
|
||||
static int TclReadLocaleStrings(ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int argc,
|
||||
const char *argv[]) {
|
||||
char *pBuf;
|
||||
int ret_code;
|
||||
char errorBuf[512];
|
||||
|
||||
if (argc > 2) {
|
||||
Tcl_SetResult(interpreter, "Too many arguments", TCL_VOLATILE);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
if (argc < 2) {
|
||||
Tcl_SetResult(interpreter, "Missing file name", TCL_VOLATILE);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
pBuf = ReadLocaleStrings(argv[1], &ret_code);
|
||||
|
||||
if (ret_code != 0) {
|
||||
if (ret_code == 1) {
|
||||
sprintf(errorBuf,
|
||||
"Could not open locale strings file \"%s\" for reading",
|
||||
argv[1]);
|
||||
}
|
||||
if (ret_code == 2) {
|
||||
sprintf(errorBuf,
|
||||
"Error reading locale strings file \"%s\"",
|
||||
argv[1]);
|
||||
}
|
||||
Tcl_SetResult(interpreter, errorBuf, TCL_VOLATILE);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
Tcl_SetResult(interpreter, pBuf, TCL_DYNAMIC);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
/* Accumulate lines up to the open tag. Attributes, line number,
|
||||
* entity info, notation info, etc., all come before the open tag.
|
||||
*/
|
||||
|
@ -943,7 +652,6 @@ AccumElemInfo(
|
|||
)
|
||||
{
|
||||
char buf[LINESIZE+1];
|
||||
char *i18nBuf;
|
||||
int c;
|
||||
int i, na;
|
||||
char *cp, *atval;
|
||||
|
@ -972,8 +680,7 @@ AccumElemInfo(
|
|||
while (1) {
|
||||
if ((c = getc(fp)) == EOF) break;
|
||||
fgets(buf, LINESIZE, fp);
|
||||
i18nBuf = EscapeI18NChars(buf);
|
||||
stripNL(i18nBuf);
|
||||
stripNL(buf);
|
||||
switch (c) {
|
||||
case EOF: /* End of input */
|
||||
fprintf(stderr, "Error: Unexpectedly reached end of ESIS.\n");
|
||||
|
@ -981,7 +688,7 @@ AccumElemInfo(
|
|||
break;
|
||||
|
||||
case CMD_OPEN: /* (gi */
|
||||
e->gi = AddElemName(i18nBuf);
|
||||
e->gi = AddElemName(buf);
|
||||
if (na > 0) {
|
||||
Malloc(na, e->atts, Mapping_t);
|
||||
memcpy(e->atts, a, na*sizeof(Mapping_t));
|
||||
|
@ -1002,7 +709,7 @@ AccumElemInfo(
|
|||
|
||||
case CMD_ATT: /* Aname val */
|
||||
i = 3;
|
||||
tok = Split(i18nBuf, &i, 0);
|
||||
tok = Split(buf, &i, 0);
|
||||
if (!strcmp(tok[1], "IMPLIED")) break; /* skip IMPLIED atts. */
|
||||
if (!strcmp(tok[1], "CDATA") || !strcmp(tok[1], "TOKEN") ||
|
||||
!strcmp(tok[1], "ENTITY") ||!strcmp(tok[1], "NOTATION"))
|
||||
|
@ -1021,17 +728,17 @@ AccumElemInfo(
|
|||
/* These lines come in 2 forms: "L123" and "L123 file.sgml".
|
||||
* Filename is given only at 1st occurrence. Remember it.
|
||||
*/
|
||||
if ((cp = strchr(i18nBuf, ' '))) {
|
||||
if ((cp = strchr(buf, ' '))) {
|
||||
cp++;
|
||||
last_file = strdup(cp);
|
||||
}
|
||||
last_lineno = e->lineno = atoi(i18nBuf);
|
||||
last_lineno = e->lineno = atoi(buf);
|
||||
e->infile = last_file;
|
||||
break;
|
||||
|
||||
case CMD_DATA: /* -data */
|
||||
fprintf(stderr, "Error: Data in AccumElemInfo, line %d:\n%c%s\n",
|
||||
e->lineno, c,i18nBuf);
|
||||
e->lineno, c,buf);
|
||||
/*return e;*/
|
||||
exit(1);
|
||||
break;
|
||||
|
@ -1043,23 +750,23 @@ AccumElemInfo(
|
|||
|
||||
case CMD_EXT_ENT: /* Eename typ nname */
|
||||
i = 3;
|
||||
tok = Split(i18nBuf, &i, 0);
|
||||
tok = Split(buf, &i, 0);
|
||||
ent.ename = strdup(tok[0]);
|
||||
ent.type = strdup(tok[1]);
|
||||
ent.nname = strdup(tok[2]);
|
||||
AddEntity(&ent);
|
||||
break;
|
||||
case CMD_INT_ENT: /* Iename typ text */
|
||||
fprintf(stderr, "Error: Got CMD_INT_ENT in AccumElemInfo: %s\n", i18nBuf);
|
||||
fprintf(stderr, "Error: Got CMD_INT_ENT in AccumElemInfo: %s\n", buf);
|
||||
break;
|
||||
case CMD_SYSID: /* ssysid */
|
||||
ent.sysid = strdup(i18nBuf);
|
||||
ent.sysid = strdup(buf);
|
||||
break;
|
||||
case CMD_PUBID: /* ppubid */
|
||||
ent.pubid = strdup(i18nBuf);
|
||||
ent.pubid = strdup(buf);
|
||||
break;
|
||||
case CMD_FILENAME: /* ffilename */
|
||||
ent.fname = strdup(i18nBuf);
|
||||
ent.fname = strdup(buf);
|
||||
break;
|
||||
|
||||
case CMD_CLOSE: /* )gi */
|
||||
|
@ -1072,13 +779,10 @@ AccumElemInfo(
|
|||
case CMD_CONFORM: /* C */
|
||||
default:
|
||||
fprintf(stderr, "Error: Unexpected input in AccumElemInfo, %d:\n%c%s\n",
|
||||
e->lineno, c,i18nBuf);
|
||||
e->lineno, c,buf);
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
if (i18nBuf != buf) {
|
||||
free(i18nBuf);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Error: End of AccumElemInfo - should not be here: %s\n",
|
||||
e->gi);
|
||||
|
@ -1097,7 +801,7 @@ ReadESIS(
|
|||
int depth
|
||||
)
|
||||
{
|
||||
char *buf, *i18nBuf;
|
||||
char *buf;
|
||||
int i, c, ncont;
|
||||
Element_t *e;
|
||||
Content_t cont[5000] = {0};
|
||||
|
@ -1117,14 +821,10 @@ ReadESIS(
|
|||
|
||||
case CMD_DATA: /* -data */
|
||||
fgets(buf, LINESIZE, fp);
|
||||
i18nBuf = EscapeI18NChars(buf);
|
||||
stripNL(i18nBuf);
|
||||
cont[ncont].ch.data = strdup(i18nBuf);
|
||||
stripNL(buf);
|
||||
cont[ncont].ch.data = strdup(buf);
|
||||
cont[ncont].type = CMD_DATA;
|
||||
ncont++;
|
||||
if (i18nBuf != buf) {
|
||||
free(i18nBuf);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMD_PI: /* ?pi */
|
||||
|
|
|
@ -295,6 +295,9 @@ CallInterpreter(
|
|||
{
|
||||
int result;
|
||||
int recursive;
|
||||
char *tcl_str;
|
||||
Tcl_DString tcl_dstr;
|
||||
Tcl_Encoding tcl_enc;
|
||||
|
||||
#if 0
|
||||
if (ib)
|
||||
|
@ -316,7 +319,11 @@ CallInterpreter(
|
|||
|
||||
ProcesOutputSpec(ib, e, 0, 1);
|
||||
if (!recursive) {
|
||||
result = Tcl_Eval(interpreter, GetOutputBuffer());
|
||||
tcl_enc = Tcl_GetEncoding(NULL, NULL);
|
||||
tcl_str = Tcl_ExternalToUtfDString(
|
||||
tcl_enc, GetOutputBuffer(), -1, &tcl_dstr);
|
||||
result = Tcl_Eval(interpreter, tcl_str);
|
||||
Tcl_DStringFree(&tcl_dstr);
|
||||
ClearOutputBuffer();
|
||||
|
||||
if (result != TCL_OK) {
|
||||
|
|
|
@ -698,10 +698,32 @@ int Putc(
|
|||
)
|
||||
{
|
||||
int result;
|
||||
int j;
|
||||
char *pc;
|
||||
static char commandBuf[] = "OutputString \" ";
|
||||
char *tcl_str;
|
||||
Tcl_DString tcl_dstr;
|
||||
Tcl_Encoding tcl_enc;
|
||||
static int i = 0;
|
||||
static char argBuf[8];
|
||||
static char commandBuf[] = "OutputString \" ";
|
||||
|
||||
if (stream) {
|
||||
argBuf[i++] = c;
|
||||
|
||||
mblen(NULL, 0);
|
||||
|
||||
if (mblen(argBuf, i) == -1) {
|
||||
if (i < MB_CUR_MAX) {
|
||||
return c;
|
||||
}
|
||||
else {
|
||||
i = 0;
|
||||
fprintf(stderr,
|
||||
"An invalid multi-byte character was found in the input.");
|
||||
return EOF;
|
||||
}
|
||||
}
|
||||
|
||||
pc = &(commandBuf[14]);
|
||||
switch (c) { /* escape those things that throw off tcl */
|
||||
case '{':
|
||||
|
@ -714,10 +736,13 @@ int Putc(
|
|||
case '\\':
|
||||
*pc++ = '\\';
|
||||
}
|
||||
*pc++ = c;
|
||||
for (j = 0; j < i; ++j) *pc++ = argBuf[j]; i = 0;
|
||||
*pc++ = '"';
|
||||
*pc++ = 0;
|
||||
result = Tcl_Eval(interpreter, commandBuf);
|
||||
tcl_enc = Tcl_GetEncoding(NULL, NULL);
|
||||
tcl_str = Tcl_ExternalToUtfDString(tcl_enc, commandBuf, -1, &tcl_dstr);
|
||||
result = Tcl_Eval(interpreter, tcl_str);
|
||||
Tcl_DStringFree(&tcl_dstr);
|
||||
|
||||
if (result != TCL_OK) {
|
||||
fprintf(stderr,
|
||||
|
@ -754,6 +779,9 @@ int FPuts(
|
|||
const char *ps;
|
||||
int sLength;
|
||||
int result;
|
||||
char *tcl_str;
|
||||
Tcl_DString tcl_dstr;
|
||||
Tcl_Encoding tcl_enc;
|
||||
|
||||
if ((sLength = strlen(s)) == 0)
|
||||
return 0; /* no need to call CheckOutputBuffer() */
|
||||
|
@ -782,7 +810,10 @@ int FPuts(
|
|||
} while (*ps);
|
||||
*pb++ = '"';
|
||||
*pb = 0;
|
||||
result = Tcl_Eval(interpreter, pBuff);
|
||||
tcl_enc = Tcl_GetEncoding(NULL, NULL);
|
||||
tcl_str = Tcl_ExternalToUtfDString(tcl_enc, pBuff, -1, &tcl_dstr);
|
||||
result = Tcl_Eval(interpreter, tcl_str);
|
||||
Tcl_DStringFree(&tcl_dstr);
|
||||
|
||||
if (result != TCL_OK) {
|
||||
fprintf(stderr,
|
||||
|
|
Loading…
Reference in a new issue