1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-13 11:42:21 +00:00

dtdocbook: Migrate instant to UTF-8.

This commit is contained in:
Liang Chang 2021-05-11 00:59:23 +08:00
parent b816b85575
commit df7bc69a3f
3 changed files with 71 additions and 333 deletions

View file

@ -81,6 +81,7 @@ static char *CopyRt =
#include <fcntl.h>
#include <unistd.h>
#include <locale.h>
#include <langinfo.h>
#include "LocaleXlate.h"
#include "XlationSvc.h"
@ -121,10 +122,6 @@ static int DefaultOutputString(ClientData clientData,
Tcl_Interp *interp,
int argc,
const char *argv[]);
static int TclReadLocaleStrings(ClientData clientData,
Tcl_Interp *interp,
int argc,
const char *argv[]);
char *GetOutFileBaseName();
char *
@ -209,16 +206,6 @@ main(
0,
0);
/* Add a function to read a localized set of data from a file.
* We'll make sure the munging takes place so we can parse it
* in Tcl and any strings we get will output properly when
* unmunged. */
Tcl_CreateCommand(interpreter,
"ReadLocaleStrings",
TclReadLocaleStrings,
0,
0);
ReadInstance(in_file);
if (interactive) {
@ -271,78 +258,6 @@ main(
return 0;
}
/* Undo the munging done in EscapeI18NChars().
*
* The parameter may be modified. It is returned for the convenience
* of the caller.
*
* The algorithm here is:
*
* get the next byte to write;
*
* if the current byte is the chosen character:
*
* get the next byte;
*
* if the current byte is the chosen character:
*
* get the next byte and zero out the 8th bit;
*
* if the current byte is an ASCII "1", emit the chosen
* character and continue;
*
* if the current byte is an ASCII "0", emit the chosen
* character with the 8th bit turned off and continue;
*
* it's an internal error if we get here
*
* emit the current byte with the 8th bit turned off and
* continue;
*
* emit the current byte and continue;
*/
static char *UnEscapeI18NChars(
char *source
)
{
unsigned char c;
char *buf;
unsigned char *to, *from;
if (MB_CUR_MAX != 1) {
from = (unsigned char*)source;
buf = malloc(strlen(source)+1);
to = (unsigned char *)buf;
while (c = *from++) {
if (c == I18N_TRIGGER) {
c = *from++;
if (c == I18N_TRIGGER) {
c = *from++ & ~0x80;
if (c == '0') {
*to++ = I18N_TRIGGER & ~0x80;
} else if (c == '1') {
*to++ = I18N_TRIGGER;
} else {
fprintf(stderr,
"Error: Unexpected I18N transformation.\n");
exit(1);
}
} else {
*to++ = c & ~0x80;
}
} else {
*to++ = c;
}
}
*to = 0;
strcpy(source, buf);
free(buf);
}
return source;
}
static int DefaultOutputString(ClientData clientData,
Tcl_Interp *interp,
int argc,
@ -375,30 +290,6 @@ static int DefaultOutputString(ClientData clientData,
* any characters that will throw Tcl for a loop */
*pString++ = '"';
while (*pArgv) {
if (*pArgv & 0x80)
{
/* 8-bit data - need to encode since modern Tcl expects
* any "binary" (8-bit) data in strings to be proper UTF-8
* encoded. We aren't doing that (yet), so convert any
* detected 8b characters into a \xNN format.
*
* This code should be unnecessary when we switch to UTF8.
*/
char fmt[16];
snprintf(fmt, 16, "%02x", (int)*pArgv & 0xff);
#if 0
fprintf(stderr, "JET: converted 0x%02x to '%s'\n",
*pArgv, fmt);
#endif
/* copy the 4 bytes into the string */
*pString++ = '\\';
*pString++ = 'x';
*pString++ = fmt[0];
*pString++ = fmt[1];
pArgv++;
}
else
{
switch (*pArgv) {
case '{':
case '}':
@ -412,7 +303,6 @@ static int DefaultOutputString(ClientData clientData,
}
*pString++ = *pArgv++;
}
}
*pString++ = '"';
*pString++ = 0;
@ -753,187 +643,6 @@ FindEntity(
return 0;
}
/* Check multibyte characters for inner bytes that don't have their
* 8th bit set - e.g., this may happen in SJIS. Rather than risk
* having downstream code mistake that inner byte for an ASCII
* character, we'll mung it here and undo the mung when we write the
* character out in DefaultOutputString().
*
* A character buffer may be allocated and returned in this routine.
* That buffer must be free'd by the caller if the return value of
* this routine is different from its parameter.
*
* The algorithm here is:
*
* get a character
*
* if the length of the current character is 1:
*
* if the current character has the 8th bit off, emit it
* and continue;
*
* if the current character is the chosen 8-bit
* character, emit the chosen character twice and follow
* it with the ASCII character "1" or'd with the 8th bit
* and continue;
*
* emit the character and continue;
*
* if the length of the current character is greater than
* one, for each of the bytes in the character:
*
* if the current byte is the chosen 8-bit character,
* emit the chosen character twice and follow it with the
* ASCII character "1" or'd with the 8th bit and
* continue;
*
* if the current byte is the chosen character except the
* 8th bit is off, emit the chosen character twice
* followed by the ASCII character "0" or'd with the 8th
* bit and continue;
*
* if the current byte has the 8th bit set, emit it and
* continue;
*
* emit the chosen character followed by the current byte
* or'd with the 8th bit.
*/
static char *
EscapeI18NChars(
char *source
)
{
char *retval;
unsigned char *from, *to;
int len;
if (MB_CUR_MAX == 1) {
return source;
} else {
/* worst case, the string will expand by a factor of 3 */
from = (unsigned char *)source;
retval = malloc(3 * strlen(source) + 1);
to = (unsigned char *)retval;
while (*from) {
if ((len = mblen(from, MB_CUR_MAX)) < 0) {
fprintf(stderr,
"Bad multibyte character '%c' (0x%x) in source file\n",
*from,
*from);
from++;
} else if ((len = mblen(from, MB_CUR_MAX)) == 1) {
if (*from & 0x80) {
if (*from == I18N_TRIGGER) {
*to++ = I18N_TRIGGER;
*to++ = I18N_TRIGGER;
*to++ = '1' | 0x80;
from++;
} else {
*to++ = *from++;
}
} else {
*to++ = *from++;
}
} else {
while (--len >= 0) {
if (*from == I18N_TRIGGER) {
*to++ = I18N_TRIGGER;
*to++ = I18N_TRIGGER;
*to++ = '1' | 0x80;
from++;
} else if (*from == (I18N_TRIGGER & ~0x80)) {
*to++ = I18N_TRIGGER;
*to++ = I18N_TRIGGER;
*to++ = '0' | 0x80;
from++;
} else if (*from & 0x80) {
*to++ = *from++;
} else {
*to++ = I18N_TRIGGER;
*to++ = *from++ | 0x80;
}
}
}
}
*to = 0;
}
return retval;
}
static char *
ReadLocaleStrings(const char *file_name, int *ret_code) {
int fd;
char *pBuf;
char *i18nBuf;
off_t size;
struct stat stat_buf;
fd = open(file_name, O_RDONLY);
if (fd == -1) {
*ret_code = 1;
return NULL;
}
fstat(fd, &stat_buf);
size = stat_buf.st_size;
pBuf = Tcl_Alloc(size+1);
memset(pBuf, 0, size+1);
if (read(fd, pBuf, size) != size) {
*ret_code = 2;
return NULL;
}
i18nBuf = EscapeI18NChars(pBuf);
if (i18nBuf != pBuf) {
pBuf = Tcl_Realloc(pBuf, 1 + strlen(i18nBuf));
strcpy(pBuf, i18nBuf);
free(i18nBuf);
}
*ret_code = 0;
return pBuf;
}
static int TclReadLocaleStrings(ClientData clientData,
Tcl_Interp *interp,
int argc,
const char *argv[]) {
char *pBuf;
int ret_code;
char errorBuf[512];
if (argc > 2) {
Tcl_SetResult(interpreter, "Too many arguments", TCL_VOLATILE);
return TCL_ERROR;
}
if (argc < 2) {
Tcl_SetResult(interpreter, "Missing file name", TCL_VOLATILE);
return TCL_ERROR;
}
pBuf = ReadLocaleStrings(argv[1], &ret_code);
if (ret_code != 0) {
if (ret_code == 1) {
sprintf(errorBuf,
"Could not open locale strings file \"%s\" for reading",
argv[1]);
}
if (ret_code == 2) {
sprintf(errorBuf,
"Error reading locale strings file \"%s\"",
argv[1]);
}
Tcl_SetResult(interpreter, errorBuf, TCL_VOLATILE);
return TCL_ERROR;
}
Tcl_SetResult(interpreter, pBuf, TCL_DYNAMIC);
return TCL_OK;
}
/* Accumulate lines up to the open tag. Attributes, line number,
* entity info, notation info, etc., all come before the open tag.
*/
@ -943,7 +652,6 @@ AccumElemInfo(
)
{
char buf[LINESIZE+1];
char *i18nBuf;
int c;
int i, na;
char *cp, *atval;
@ -972,8 +680,7 @@ AccumElemInfo(
while (1) {
if ((c = getc(fp)) == EOF) break;
fgets(buf, LINESIZE, fp);
i18nBuf = EscapeI18NChars(buf);
stripNL(i18nBuf);
stripNL(buf);
switch (c) {
case EOF: /* End of input */
fprintf(stderr, "Error: Unexpectedly reached end of ESIS.\n");
@ -981,7 +688,7 @@ AccumElemInfo(
break;
case CMD_OPEN: /* (gi */
e->gi = AddElemName(i18nBuf);
e->gi = AddElemName(buf);
if (na > 0) {
Malloc(na, e->atts, Mapping_t);
memcpy(e->atts, a, na*sizeof(Mapping_t));
@ -1002,7 +709,7 @@ AccumElemInfo(
case CMD_ATT: /* Aname val */
i = 3;
tok = Split(i18nBuf, &i, 0);
tok = Split(buf, &i, 0);
if (!strcmp(tok[1], "IMPLIED")) break; /* skip IMPLIED atts. */
if (!strcmp(tok[1], "CDATA") || !strcmp(tok[1], "TOKEN") ||
!strcmp(tok[1], "ENTITY") ||!strcmp(tok[1], "NOTATION"))
@ -1021,17 +728,17 @@ AccumElemInfo(
/* These lines come in 2 forms: "L123" and "L123 file.sgml".
* Filename is given only at 1st occurrence. Remember it.
*/
if ((cp = strchr(i18nBuf, ' '))) {
if ((cp = strchr(buf, ' '))) {
cp++;
last_file = strdup(cp);
}
last_lineno = e->lineno = atoi(i18nBuf);
last_lineno = e->lineno = atoi(buf);
e->infile = last_file;
break;
case CMD_DATA: /* -data */
fprintf(stderr, "Error: Data in AccumElemInfo, line %d:\n%c%s\n",
e->lineno, c,i18nBuf);
e->lineno, c,buf);
/*return e;*/
exit(1);
break;
@ -1043,23 +750,23 @@ AccumElemInfo(
case CMD_EXT_ENT: /* Eename typ nname */
i = 3;
tok = Split(i18nBuf, &i, 0);
tok = Split(buf, &i, 0);
ent.ename = strdup(tok[0]);
ent.type = strdup(tok[1]);
ent.nname = strdup(tok[2]);
AddEntity(&ent);
break;
case CMD_INT_ENT: /* Iename typ text */
fprintf(stderr, "Error: Got CMD_INT_ENT in AccumElemInfo: %s\n", i18nBuf);
fprintf(stderr, "Error: Got CMD_INT_ENT in AccumElemInfo: %s\n", buf);
break;
case CMD_SYSID: /* ssysid */
ent.sysid = strdup(i18nBuf);
ent.sysid = strdup(buf);
break;
case CMD_PUBID: /* ppubid */
ent.pubid = strdup(i18nBuf);
ent.pubid = strdup(buf);
break;
case CMD_FILENAME: /* ffilename */
ent.fname = strdup(i18nBuf);
ent.fname = strdup(buf);
break;
case CMD_CLOSE: /* )gi */
@ -1072,13 +779,10 @@ AccumElemInfo(
case CMD_CONFORM: /* C */
default:
fprintf(stderr, "Error: Unexpected input in AccumElemInfo, %d:\n%c%s\n",
e->lineno, c,i18nBuf);
e->lineno, c,buf);
exit(1);
break;
}
if (i18nBuf != buf) {
free(i18nBuf);
}
}
fprintf(stderr, "Error: End of AccumElemInfo - should not be here: %s\n",
e->gi);
@ -1097,7 +801,7 @@ ReadESIS(
int depth
)
{
char *buf, *i18nBuf;
char *buf;
int i, c, ncont;
Element_t *e;
Content_t cont[5000] = {0};
@ -1117,14 +821,10 @@ ReadESIS(
case CMD_DATA: /* -data */
fgets(buf, LINESIZE, fp);
i18nBuf = EscapeI18NChars(buf);
stripNL(i18nBuf);
cont[ncont].ch.data = strdup(i18nBuf);
stripNL(buf);
cont[ncont].ch.data = strdup(buf);
cont[ncont].type = CMD_DATA;
ncont++;
if (i18nBuf != buf) {
free(i18nBuf);
}
break;
case CMD_PI: /* ?pi */

View file

@ -295,6 +295,9 @@ CallInterpreter(
{
int result;
int recursive;
char *tcl_str;
Tcl_DString tcl_dstr;
Tcl_Encoding tcl_enc;
#if 0
if (ib)
@ -316,7 +319,11 @@ CallInterpreter(
ProcesOutputSpec(ib, e, 0, 1);
if (!recursive) {
result = Tcl_Eval(interpreter, GetOutputBuffer());
tcl_enc = Tcl_GetEncoding(NULL, NULL);
tcl_str = Tcl_ExternalToUtfDString(
tcl_enc, GetOutputBuffer(), -1, &tcl_dstr);
result = Tcl_Eval(interpreter, tcl_str);
Tcl_DStringFree(&tcl_dstr);
ClearOutputBuffer();
if (result != TCL_OK) {

View file

@ -698,10 +698,32 @@ int Putc(
)
{
int result;
int j;
char *pc;
char *tcl_str;
Tcl_DString tcl_dstr;
Tcl_Encoding tcl_enc;
static int i = 0;
static char argBuf[8];
static char commandBuf[] = "OutputString \" ";
if (stream) {
argBuf[i++] = c;
mblen(NULL, 0);
if (mblen(argBuf, i) == -1) {
if (i < MB_CUR_MAX) {
return c;
}
else {
i = 0;
fprintf(stderr,
"An invalid multi-byte character was found in the input.");
return EOF;
}
}
pc = &(commandBuf[14]);
switch (c) { /* escape those things that throw off tcl */
case '{':
@ -714,10 +736,13 @@ int Putc(
case '\\':
*pc++ = '\\';
}
*pc++ = c;
for (j = 0; j < i; ++j) *pc++ = argBuf[j]; i = 0;
*pc++ = '"';
*pc++ = 0;
result = Tcl_Eval(interpreter, commandBuf);
tcl_enc = Tcl_GetEncoding(NULL, NULL);
tcl_str = Tcl_ExternalToUtfDString(tcl_enc, commandBuf, -1, &tcl_dstr);
result = Tcl_Eval(interpreter, tcl_str);
Tcl_DStringFree(&tcl_dstr);
if (result != TCL_OK) {
fprintf(stderr,
@ -754,6 +779,9 @@ int FPuts(
const char *ps;
int sLength;
int result;
char *tcl_str;
Tcl_DString tcl_dstr;
Tcl_Encoding tcl_enc;
if ((sLength = strlen(s)) == 0)
return 0; /* no need to call CheckOutputBuffer() */
@ -782,7 +810,10 @@ int FPuts(
} while (*ps);
*pb++ = '"';
*pb = 0;
result = Tcl_Eval(interpreter, pBuff);
tcl_enc = Tcl_GetEncoding(NULL, NULL);
tcl_str = Tcl_ExternalToUtfDString(tcl_enc, pBuff, -1, &tcl_dstr);
result = Tcl_Eval(interpreter, tcl_str);
Tcl_DStringFree(&tcl_dstr);
if (result != TCL_OK) {
fprintf(stderr,