mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
This takes another step towards cleaning up the build system. We now do not even pretend to be theoretically compatible with pre-1989 K&R C compilers or with C++ compilers. In practice, this had already been broken for many years due to bit rot. Commit46593a89already removed the license handling enormity that depended on proto, so now we can cleanly remove it altogether. But we do need to leave some backwards compatibility stubs to keep the build system compatible with older AST code; it should remain possible to build older ksh versions with the current build system (the bin/ and src/cmd/INIT/ directories) for testing purposes. So as of now there is no more __MANGLE__d rubbish in your generated header files. This is only about a quarter of a century overdue... This commit also includes a huge amount of code cleanup to remove thousands of unused K&R C fallbacks and other cruft, particularly in libast. This code base should now be a little easier to understand for people who are familiar with a modern(ish) C standard. ratz is now also removed; this was a standalone and simplified 2005 version of gunzip. As of6137b99a, none of our code uses it, even theoretically. And the real g(un)zip is now everywhere. src/cmd/INIT/proto.c, src/cmd/INIT/ratz.c: - Removed. COPYRIGHT: - Remove zlib license; this only applied to ratz. bin/package, src/cmd/INIT/package.sh: - Related cleanups. - Unset LC_ALL before invoking a new shell, respecting the user's locale again and avoiding multibyte character corruption on the command line. src/cmd/INIT/proto.sh: - Add stub for backwards compatibility with Mamfiles that depend on proto. It does nothing but pass input without modification and is now installed as the new arch/*/bin/proto by src/cmd/INIT/Mamfile. src/cmd/INIT/iffe.sh: - Ignore the proto-related -e (--package) and -p (--prototyped) options; keep parsing them for backwards compatibility. - Trim the macros passed to every test to their standard C versions, removing K&R C and C++ versions. These are now considered to be for backwards compatibility only. src/cmd/INIT/iffe.tst: - Remove proto(1) mangling code. By the way, iffe can be regression-tested as follows: $ bin/package use # set up environment in a child shell $ regress src/cmd/INIT/iffe.tst $ exit # leave package environment src/cmd/INIT/make.probe, src/cmd/INIT/probe.win32: - Remove code to handle C++. src/lib/libast/features/common: - As in iffe.sh above, trim macros designed for compatibility with C++ and ancient C compilers to their standard C versions and comment that they are for backwards compatibility with AST code. This is needed to keep all the old ast and ksh code compiling. src/cmd/ksh93/sh/init.c, src/cmd/ksh93/sh/name.c: - Clarify libshell ABI compatibility function versions of macros. A "proto workaround" comment in the original code mislead me into thinking this had something to do with the removed proto(1), but it's unrelated. Call the workaround macro BYPASS_MACRO instead. src/cmd/ksh93/include/defs.h: - sh_sigcheck() macro: allow &sh as an argument: parenthesise shp. src/cmd/ksh93/sh/nvtype.c: - Remove unused nv_mkstruct() function. (re:d0a5cab1) **/features/*: - Remove obsolete iffe 'set prototyped' option. **/Mamfile: - Remove all references to the ast/prototyped.h header. - Remove all use of the proto command. Simply copy instead. *** 850-ish source files: *** - Remove all '#pragma prototyped' directives. - Remove all C++ compat code conditional upon defined(__cplusplus). - Remove all use of the _ARG_ macro, which on standard C expands to its argument: #define _ARG_(x) x (on K&R C, it expanded to nothing) - Remove all use of _BEGIN_EXTERNS_ and _END_EXTERNS_ macros (empty on standard C; this was for C++ compatibility) - Reduce all #if __STD_C (standard code) #else (K&R code) #endif blocks to the standard code only, without use of the macro. - Same for _STD_ macro which seems to have had the same function. - Change all instances of 'Void_t' to standard 'void'.
1590 lines
29 KiB
C
1590 lines
29 KiB
C
/***********************************************************************
|
|
* *
|
|
* This software is part of the ast package *
|
|
* Copyright (c) 1985-2012 AT&T Intellectual Property *
|
|
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
|
|
* and is licensed under the *
|
|
* Eclipse Public License, Version 1.0 *
|
|
* by AT&T Intellectual Property *
|
|
* *
|
|
* A copy of the License is available at *
|
|
* http://www.eclipse.org/org/documents/epl-v10.html *
|
|
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
|
|
* *
|
|
* Information and Software Systems Research *
|
|
* AT&T Research *
|
|
* Florham Park NJ *
|
|
* *
|
|
* Glenn Fowler <gsf@research.att.com> *
|
|
* David Korn <dgk@research.att.com> *
|
|
* Phong Vo <kpv@research.att.com> *
|
|
* *
|
|
***********************************************************************/
|
|
|
|
/*
|
|
* Glenn Fowler
|
|
* AT&T Research
|
|
*
|
|
* iconv intercept
|
|
* minimally provides { UTF*<=>bin ASCII<=>EBCDIC* }
|
|
*/
|
|
|
|
#include <ast.h>
|
|
#include <dirent.h>
|
|
#include <error.h>
|
|
|
|
#define DEBUG_TRACE 0
|
|
#define _ICONV_LIST_PRIVATE_
|
|
|
|
#include <ccode.h>
|
|
#include <ctype.h>
|
|
#include <iconv.h>
|
|
|
|
#include "lclib.h"
|
|
|
|
#if !_lib_iconv_open
|
|
|
|
#define _ast_iconv_t iconv_t
|
|
#define _ast_iconv_f iconv_f
|
|
#define _ast_iconv_list_t iconv_list_t
|
|
#define _ast_iconv_open iconv_open
|
|
#define _ast_iconv iconv
|
|
#define _ast_iconv_close iconv_close
|
|
#define _ast_iconv_list iconv_list
|
|
#define _ast_iconv_move iconv_move
|
|
#define _ast_iconv_name iconv_name
|
|
#define _ast_iconv_write iconv_write
|
|
|
|
#endif
|
|
|
|
#define RETURN(e,n,fn) \
|
|
if (*fn && !e) e = E2BIG; \
|
|
if (e) { errno = e; return (size_t)(-1); } \
|
|
return n;
|
|
|
|
typedef struct Map_s
|
|
{
|
|
char* name;
|
|
const unsigned char* map;
|
|
_ast_iconv_f fun;
|
|
int index;
|
|
} Map_t;
|
|
|
|
typedef struct Conv_s
|
|
{
|
|
iconv_t cvt;
|
|
char* buf;
|
|
size_t size;
|
|
Map_t from;
|
|
Map_t to;
|
|
} Conv_t;
|
|
|
|
static Conv_t* freelist[4];
|
|
static int freeindex;
|
|
|
|
static const char name_local[] = "local";
|
|
static const char name_native[] = "native";
|
|
|
|
static const _ast_iconv_list_t codes[] =
|
|
{
|
|
{
|
|
"utf",
|
|
"un|unicode|utf",
|
|
"multibyte 8-bit unicode",
|
|
"UTF-%s",
|
|
"8",
|
|
CC_UTF,
|
|
},
|
|
|
|
{
|
|
"ume",
|
|
"um|ume|utf?(-)7",
|
|
"multibyte 7-bit unicode",
|
|
"UTF-7",
|
|
0,
|
|
CC_UME,
|
|
},
|
|
|
|
{
|
|
"euc",
|
|
"(big|euc)*",
|
|
"euc family",
|
|
0,
|
|
0,
|
|
CC_ICONV,
|
|
},
|
|
|
|
{
|
|
"dos",
|
|
"dos?(-)?(855)",
|
|
"dos code page",
|
|
"DOS855",
|
|
0,
|
|
CC_ICONV,
|
|
},
|
|
|
|
{
|
|
"ucs",
|
|
"ucs?(-)?(2)?(be)|utf-16?(be)",
|
|
"unicode runes",
|
|
"UCS-%s",
|
|
"2",
|
|
CC_UCS,
|
|
},
|
|
|
|
{
|
|
"ucs-le",
|
|
"ucs?(-)?(2)le|utf-16le",
|
|
"little endian unicode runes",
|
|
"UCS-%sLE",
|
|
"2",
|
|
CC_SCU,
|
|
},
|
|
|
|
{ 0 },
|
|
};
|
|
|
|
#if _UWIN
|
|
|
|
#include <ast_windows.h>
|
|
|
|
#ifndef CP_UCS2
|
|
#define CP_UCS2 0x0000
|
|
#endif
|
|
|
|
static char _win_maps[] = "/reg/local_machine/SOFTWARE/Classes/MIME/Database/Charset";
|
|
|
|
/*
|
|
* return the codeset index given its name or alias
|
|
* the map is in the what? oh, the registry
|
|
*/
|
|
|
|
static int
|
|
_win_codeset(const char* name)
|
|
{
|
|
register char* s;
|
|
char* e;
|
|
int n;
|
|
Sfio_t* sp;
|
|
char aka[128];
|
|
char tmp[128];
|
|
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _win_codeset name=%s", __LINE__, name);
|
|
#endif
|
|
if (name == name_native)
|
|
return CP_ACP;
|
|
if (!strcasecmp(name, "utf") || !strcasecmp(name, "utf8") || !strcasecmp(name, "utf-8"))
|
|
return CP_UTF8;
|
|
if (!strcasecmp(name, "ucs") || !strcasecmp(name, "ucs2") || !strcasecmp(name, "ucs-2"))
|
|
return CP_UCS2;
|
|
if (name[0] == '0' && name[1] == 'x' && (n = strtol(name, &e, 0)) > 0 && !*e)
|
|
return n;
|
|
for (;;)
|
|
{
|
|
sfsprintf(tmp, sizeof(tmp), "%s/%s", _win_maps, name);
|
|
if (!(sp = sfopen(0, tmp, "r")))
|
|
{
|
|
s = (char*)name;
|
|
if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'p' || s[1] == 'P'))
|
|
s += 2;
|
|
if (!isdigit(s[0]))
|
|
break;
|
|
sfsprintf(tmp, sizeof(tmp), "%s/windows-%s", _win_maps, s);
|
|
if (!(sp = sfopen(0, tmp, "r")))
|
|
break;
|
|
}
|
|
for (;;)
|
|
{
|
|
if (!(s = sfgetr(sp, '\n', 0)))
|
|
{
|
|
sfclose(sp);
|
|
return -1;
|
|
}
|
|
if (!strncasecmp(s, "AliasForCharSet=", 16))
|
|
{
|
|
n = sfvalue(sp) - 17;
|
|
s += 16;
|
|
if (n >= sizeof(aka))
|
|
n = sizeof(aka) - 1;
|
|
memcpy(aka, s, n);
|
|
aka[n] = 0;
|
|
sfclose(sp);
|
|
name = (const char*)aka;
|
|
break;
|
|
}
|
|
if (!strncasecmp(s, "CodePage=", 9))
|
|
{
|
|
s += 9;
|
|
n = strtol(s, 0, 0);
|
|
sfclose(sp);
|
|
return n;
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* get and check the codeset indices
|
|
*/
|
|
|
|
static _ast_iconv_t
|
|
_win_iconv_open(register Conv_t* cc, const char* t, const char* f)
|
|
{
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=%s t=%s\n", __LINE__, f, t);
|
|
#endif
|
|
if ((cc->from.index = _win_codeset(f)) < 0)
|
|
return (_ast_iconv_t)(-1);
|
|
if ((cc->to.index = _win_codeset(t)) < 0)
|
|
return (_ast_iconv_t)(-1);
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=0x%04x t=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
|
|
#endif
|
|
return (_ast_iconv_t)cc;
|
|
}
|
|
|
|
/*
|
|
* even though the indices already check out
|
|
* they could still be rejected
|
|
*/
|
|
|
|
static size_t
|
|
_win_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
Conv_t* cc = (Conv_t*)cd;
|
|
size_t un;
|
|
size_t tz;
|
|
size_t fz;
|
|
size_t bz;
|
|
size_t pz;
|
|
size_t oz;
|
|
LPWSTR ub;
|
|
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _win_iconv from=0x%04x to=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
|
|
#endif
|
|
if (cc->from.index == cc->to.index || cc->from.index != CP_UCS2 && cc->to.index == 0)
|
|
{
|
|
/*
|
|
* easy
|
|
*/
|
|
|
|
fz = tz = (*fn < *tn) ? *fn : *tn;
|
|
memcpy(*tb, *fb, fz);
|
|
}
|
|
else
|
|
{
|
|
ub = 0;
|
|
un = *fn;
|
|
|
|
/*
|
|
* from => UCS-2
|
|
*/
|
|
|
|
if (cc->to.index == CP_UCS2)
|
|
{
|
|
if ((tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, *tn)) && tz <= *tn)
|
|
{
|
|
fz = *fn;
|
|
tz *= sizeof(WCHAR);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* target too small
|
|
* binary search on input size to make it fit
|
|
*/
|
|
|
|
oz = 0;
|
|
pz = *fn / 2;
|
|
fz = *fn - pz;
|
|
for (;;)
|
|
{
|
|
while (!(tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)fz, (LPWSTR)*tb, 0)))
|
|
if (++fz >= *fn)
|
|
goto nope;
|
|
tz *= sizeof(WCHAR);
|
|
if (tz == *tn)
|
|
break;
|
|
if (!(pz /= 2))
|
|
{
|
|
if (!(fz = oz))
|
|
goto nope;
|
|
break;
|
|
}
|
|
if (tz > *tn)
|
|
fz -= pz;
|
|
else
|
|
{
|
|
oz = fz;
|
|
fz += pz;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (cc->from.index == CP_UCS2)
|
|
{
|
|
un = *fn / sizeof(WCHAR);
|
|
ub = (LPWSTR)*fb;
|
|
}
|
|
else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, 0)))
|
|
goto nope;
|
|
else if (!(ub = (LPWSTR)malloc(un * sizeof(WCHAR))))
|
|
goto nope;
|
|
else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)ub, un)))
|
|
goto nope;
|
|
|
|
/*
|
|
* UCS-2 => to
|
|
*/
|
|
|
|
if (tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, un, *tb, *tn, 0, 0))
|
|
fz = *fn;
|
|
else
|
|
{
|
|
/*
|
|
* target too small
|
|
* binary search on input size to make it fit
|
|
*/
|
|
|
|
oz = 0;
|
|
pz = *fn / 2;
|
|
bz = *fn - pz;
|
|
for (;;)
|
|
{
|
|
while (!(fz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)bz, (LPWSTR)ub, un)))
|
|
if (++bz > *fn)
|
|
goto nope;
|
|
if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, 0, 0, 0)))
|
|
goto nope;
|
|
if (tz == *tn)
|
|
break;
|
|
if (!(pz /= 2))
|
|
{
|
|
if (!(fz = oz))
|
|
goto nope;
|
|
break;
|
|
}
|
|
if (tz > *tn)
|
|
bz -= pz;
|
|
else
|
|
{
|
|
oz = bz;
|
|
bz += pz;
|
|
}
|
|
}
|
|
if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, tz, 0, 0)))
|
|
goto nope;
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _win_iconv *fn=%u fz=%u[%u] *tn=%u tz=%u\n", __LINE__, *fn, fz, fz * sizeof(WCHAR), *tn, tz);
|
|
#endif
|
|
}
|
|
if (ub != (LPWSTR)*fb)
|
|
free(ub);
|
|
}
|
|
}
|
|
*fb += fz;
|
|
*fn -= fz;
|
|
*tb += tz;
|
|
*tn -= tz;
|
|
return fz;
|
|
nope:
|
|
if (ub && ub != (LPWSTR)*fb)
|
|
free(ub);
|
|
errno = EINVAL;
|
|
return (size_t)(-1);
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* return canonical character code set name for m
|
|
* if b!=0 then canonical name placed in b of size n
|
|
* <ccode.h> index returned
|
|
*/
|
|
|
|
int
|
|
_ast_iconv_name(register const char* m, register char* b, size_t n)
|
|
{
|
|
register const _ast_iconv_list_t* cp;
|
|
const _ast_iconv_list_t* bp;
|
|
register int c;
|
|
register char* e;
|
|
ssize_t sub[2];
|
|
char buf[16];
|
|
#if DEBUG_TRACE
|
|
char* o;
|
|
#endif
|
|
|
|
if (!b)
|
|
{
|
|
b = buf;
|
|
n = sizeof(buf);
|
|
}
|
|
#if DEBUG_TRACE
|
|
o = b;
|
|
#endif
|
|
e = b + n - 1;
|
|
bp = 0;
|
|
n = 0;
|
|
cp = ccmaplist(NiL);
|
|
#if DEBUG_TRACE
|
|
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name m=\"%s\"\n", error_info.id, error_info.trace, __LINE__, m);
|
|
#endif
|
|
for (;;)
|
|
{
|
|
#if DEBUG_TRACE
|
|
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name n=%d bp=%p cp=%p ccode=%d name=\"%s\"\n", error_info.id, error_info.trace, __LINE__, n, bp, cp, cp->ccode, cp->name);
|
|
#endif
|
|
if (strgrpmatch(m, cp->match, sub, elementsof(sub) / 2, STR_MAXIMAL|STR_LEFT|STR_ICASE))
|
|
{
|
|
if (!(c = m[sub[1]]))
|
|
{
|
|
bp = cp;
|
|
break;
|
|
}
|
|
if (sub[1] > n && !isalpha(c))
|
|
{
|
|
bp = cp;
|
|
n = sub[1];
|
|
}
|
|
}
|
|
if (cp->ccode < 0)
|
|
{
|
|
if (!(++cp)->name)
|
|
break;
|
|
}
|
|
else if (!(cp = (const _ast_iconv_list_t*)ccmaplist((_ast_iconv_list_t*)cp)))
|
|
cp = codes;
|
|
}
|
|
if (cp = bp)
|
|
{
|
|
if (cp->canon)
|
|
{
|
|
if (cp->index)
|
|
{
|
|
for (m += sub[1]; *m && !isalnum(*m); m++);
|
|
if (!isdigit(*m))
|
|
m = cp->index;
|
|
}
|
|
else
|
|
m = "1";
|
|
b += sfsprintf(b, e - b, cp->canon, m);
|
|
}
|
|
else if (cp->ccode == CC_NATIVE)
|
|
{
|
|
if ((locales[AST_LC_CTYPE]->flags & LC_default) || !locales[AST_LC_CTYPE]->charset || !(m = locales[AST_LC_CTYPE]->charset->code) || streq(m, "iso8859-1"))
|
|
switch (CC_NATIVE)
|
|
{
|
|
case CC_EBCDIC:
|
|
m = (const char*)"EBCDIC";
|
|
break;
|
|
case CC_EBCDIC_I:
|
|
m = (const char*)"EBCDIC-I";
|
|
break;
|
|
case CC_EBCDIC_O:
|
|
m = (const char*)"EBCDIC-O";
|
|
break;
|
|
default:
|
|
m = (const char*)"ISO-8859-1";
|
|
break;
|
|
}
|
|
b += sfsprintf(b, e - b, "%s", m);
|
|
}
|
|
*b = 0;
|
|
#if DEBUG_TRACE
|
|
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, cp->ccode, o);
|
|
#endif
|
|
return cp->ccode;
|
|
}
|
|
while (b < e && (c = *m++))
|
|
{
|
|
if (islower(c))
|
|
c = toupper(c);
|
|
*b++ = c;
|
|
}
|
|
*b = 0;
|
|
#if DEBUG_TRACE
|
|
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, CC_ICONV, o);
|
|
#endif
|
|
return CC_ICONV;
|
|
}
|
|
|
|
/*
|
|
* convert UTF-8 to bin
|
|
*/
|
|
|
|
static size_t
|
|
utf2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register unsigned char* p;
|
|
register int c;
|
|
register int w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (t < te && f < fe)
|
|
{
|
|
p = f;
|
|
c = *f++;
|
|
if (c & 0x80)
|
|
{
|
|
if (!(c & 0x40))
|
|
{
|
|
f = p;
|
|
e = EILSEQ;
|
|
break;
|
|
}
|
|
if (c & 0x20)
|
|
{
|
|
w = (c & 0x0F) << 12;
|
|
if (f >= fe)
|
|
{
|
|
f = p;
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
c = *f++;
|
|
if (c & 0x40)
|
|
{
|
|
f = p;
|
|
e = EILSEQ;
|
|
break;
|
|
}
|
|
w |= (c & 0x3F) << 6;
|
|
}
|
|
else
|
|
w = (c & 0x1F) << 6;
|
|
if (f >= fe)
|
|
{
|
|
f = p;
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
c = *f++;
|
|
w |= (c & 0x3F);
|
|
}
|
|
else
|
|
w = c;
|
|
*t++ = w;
|
|
}
|
|
*fn -= (char*)f - (*fb);
|
|
*fb = (char*)f;
|
|
*tn -= (n = (char*)t - (*tb));
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert bin to UTF-8
|
|
*/
|
|
|
|
static size_t
|
|
bin2utf(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int c;
|
|
wchar_t w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (f < fe && t < te)
|
|
{
|
|
if (!mbwide())
|
|
{
|
|
c = 1;
|
|
w = *f;
|
|
}
|
|
else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
|
|
{
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
else if (!c)
|
|
c = 1;
|
|
if (!(w & ~0x7F))
|
|
*t++ = w;
|
|
else
|
|
{
|
|
if (!(w & ~0x7FF))
|
|
{
|
|
if (t >= (te - 2))
|
|
{
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
*t++ = 0xC0 + (w >> 6);
|
|
}
|
|
else if (!(w & ~0xffff))
|
|
{
|
|
if (t >= (te - 3))
|
|
{
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
*t++ = 0xE0 + (w >> 12);
|
|
*t++ = 0x80 + ((w >> 6 ) & 0x3F);
|
|
}
|
|
else
|
|
{
|
|
e = EILSEQ;
|
|
break;
|
|
}
|
|
*t++ = 0x80 + (w & 0x3F);
|
|
}
|
|
f += c;
|
|
}
|
|
*fn -= (n = (char*)f - (*fb));
|
|
*fb = (char*)f;
|
|
*tn -= (char*)t - (*tb);
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
static const unsigned char ume_D[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?!\"#$%&*;<=>@[]^_`{|} \t\n";
|
|
|
|
static const unsigned char ume_M[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
static unsigned char ume_d[UCHAR_MAX+1];
|
|
|
|
static unsigned char ume_m[UCHAR_MAX+1];
|
|
|
|
#define NOE 0xFF
|
|
#define UMEINIT() (ume_d[ume_D[0]]?0:umeinit())
|
|
|
|
/*
|
|
* initialize the ume tables
|
|
*/
|
|
|
|
static int
|
|
umeinit(void)
|
|
{
|
|
register const unsigned char* s;
|
|
register int i;
|
|
register int c;
|
|
|
|
if (!ume_d[ume_D[0]])
|
|
{
|
|
s = ume_D;
|
|
while (c = *s++)
|
|
ume_d[c] = 1;
|
|
memset(ume_m, NOE, sizeof(ume_m));
|
|
for (i = 0; c = ume_M[i]; i++)
|
|
ume_m[c] = i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* convert UTF-7 to bin
|
|
*/
|
|
|
|
static size_t
|
|
ume2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register unsigned char* p;
|
|
register int s;
|
|
register int c;
|
|
register int w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
UMEINIT();
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
s = 0;
|
|
while (f < fe && t < te)
|
|
{
|
|
p = f;
|
|
c = *f++;
|
|
if (s)
|
|
{
|
|
if (c == '-' && s > 1)
|
|
s = 0;
|
|
else if ((w = ume_m[c]) == NOE)
|
|
{
|
|
s = 0;
|
|
*t++ = c;
|
|
}
|
|
else if (f >= (fe - 2))
|
|
{
|
|
f = p;
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
s = 2;
|
|
w = (w << 6) | ume_m[*f++];
|
|
w = (w << 6) | ume_m[*f++];
|
|
if (!(w & ~0xFF))
|
|
*t++ = w;
|
|
else if (t >= (te - 1))
|
|
{
|
|
f = p;
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
*t++ = (w >> 8) & 0xFF;
|
|
*t++ = w & 0xFF;
|
|
}
|
|
}
|
|
}
|
|
else if (c == '+')
|
|
s = 1;
|
|
else
|
|
*t++ = c;
|
|
}
|
|
*fn -= (char*)f - (*fb);
|
|
*fb = (char*)f;
|
|
*tn -= (n = (char*)t - (*tb));
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert bin to UTF-7
|
|
*/
|
|
|
|
static size_t
|
|
bin2ume(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int c;
|
|
register int s;
|
|
wchar_t w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
UMEINIT();
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
s = 0;
|
|
while (f < fe && t < (te - s))
|
|
{
|
|
if (!mbwide())
|
|
{
|
|
c = 1;
|
|
w = *f;
|
|
}
|
|
else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
|
|
{
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
else if (!c)
|
|
c = 1;
|
|
if (!(w & ~0x7F) && ume_d[w])
|
|
{
|
|
if (s)
|
|
{
|
|
s = 0;
|
|
*t++ = '-';
|
|
}
|
|
*t++ = w;
|
|
}
|
|
else if (t >= (te - (4 + s)))
|
|
{
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (!s)
|
|
{
|
|
s = 1;
|
|
*t++ = '+';
|
|
}
|
|
*t++ = ume_M[(w >> 12) & 0x3F];
|
|
*t++ = ume_M[(w >> 6) & 0x3F];
|
|
*t++ = ume_M[w & 0x3F];
|
|
}
|
|
f += c;
|
|
}
|
|
if (s)
|
|
*t++ = '-';
|
|
*fn -= (n = (char*)f - (*fb));
|
|
*fb = (char*)f;
|
|
*tn -= (char*)t - (*tb);
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert UCS-2 to bin with no byte swap
|
|
*/
|
|
|
|
static size_t
|
|
ucs2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (f < (fe - 1) && t < te)
|
|
{
|
|
w = *f++;
|
|
w = (w << 8) | *f++;
|
|
if (!(w & ~0xFF))
|
|
*t++ = w;
|
|
else if (t >= (te - 1))
|
|
{
|
|
f -= 2;
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
*t++ = (w >> 8) & 0xFF;
|
|
*t++ = w & 0xFF;
|
|
}
|
|
}
|
|
*fn -= (char*)f - (*fb);
|
|
*fb = (char*)f;
|
|
*tn -= (n = (char*)t - (*tb));
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert bin to UCS-2 with no byte swap
|
|
*/
|
|
|
|
static size_t
|
|
bin2ucs(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int c;
|
|
wchar_t w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (f < fe && t < (te - 1))
|
|
{
|
|
if (!mbwide())
|
|
{
|
|
c = 1;
|
|
w = *f;
|
|
}
|
|
if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
|
|
{
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
else if (!c)
|
|
c = 1;
|
|
*t++ = (w >> 8) & 0xFF;
|
|
*t++ = w & 0xFF;
|
|
f += c;
|
|
}
|
|
*fn -= (n = (char*)f - (*fb));
|
|
*fb = (char*)f;
|
|
*tn -= (char*)t - (*tb);
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert UCS-2 to bin with byte swap
|
|
*/
|
|
|
|
static size_t
|
|
scu2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (f < (fe - 1) && t < te)
|
|
{
|
|
w = *f++;
|
|
w = w | (*f++ << 8);
|
|
if (!(w & ~0xFF))
|
|
*t++ = w;
|
|
else if (t >= (te - 1))
|
|
{
|
|
f -= 2;
|
|
e = E2BIG;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
*t++ = (w >> 8) & 0xFF;
|
|
*t++ = w & 0xFF;
|
|
}
|
|
}
|
|
*fn -= (char*)f - (*fb);
|
|
*fb = (char*)f;
|
|
*tn -= (n = (char*)t - (*tb));
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* convert bin to UCS-2 with byte swap
|
|
*/
|
|
|
|
static size_t
|
|
bin2scu(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
register unsigned char* f;
|
|
register unsigned char* fe;
|
|
register unsigned char* t;
|
|
register unsigned char* te;
|
|
register int c;
|
|
wchar_t w;
|
|
size_t n;
|
|
int e;
|
|
|
|
e = 0;
|
|
f = (unsigned char*)(*fb);
|
|
fe = f + (*fn);
|
|
t = (unsigned char*)(*tb);
|
|
te = t + (*tn);
|
|
while (f < fe && t < (te - 1))
|
|
{
|
|
if (!mbwide())
|
|
{
|
|
c = 1;
|
|
w = *f;
|
|
}
|
|
else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
|
|
{
|
|
e = EINVAL;
|
|
break;
|
|
}
|
|
else if (!c)
|
|
c = 1;
|
|
*t++ = w & 0xFF;
|
|
*t++ = (w >> 8) & 0xFF;
|
|
f += c;
|
|
}
|
|
*fn -= (n = (char*)f - (*fb));
|
|
*fb = (char*)f;
|
|
*tn -= (char*)t - (*tb);
|
|
*tb = (char*)t;
|
|
RETURN(e, n, fn);
|
|
}
|
|
|
|
/*
|
|
* open a character code conversion map from f to t
|
|
*/
|
|
|
|
_ast_iconv_t
|
|
_ast_iconv_open(const char* t, const char* f)
|
|
{
|
|
register Conv_t* cc;
|
|
int fc;
|
|
int tc;
|
|
int i;
|
|
|
|
char fr[64];
|
|
char to[64];
|
|
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s t=%s\n", __LINE__, f, t);
|
|
#endif
|
|
if (!t || !*t || *t == '-' && !*(t + 1) || !strcasecmp(t, name_local) || !strcasecmp(t, name_native))
|
|
t = name_native;
|
|
if (!f || !*f || *f == '-' && !*(f + 1) || !strcasecmp(t, name_local) || !strcasecmp(f, name_native))
|
|
f = name_native;
|
|
|
|
/*
|
|
* the AST identify is always (iconv_t)(0)
|
|
*/
|
|
|
|
if (t == f)
|
|
return (iconv_t)(0);
|
|
fc = _ast_iconv_name(f, fr, sizeof(fr));
|
|
tc = _ast_iconv_name(t, to, sizeof(to));
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s:%s:%d t=%s:%s:%d\n", __LINE__, f, fr, fc, t, to, tc);
|
|
#endif
|
|
if (fc != CC_ICONV && fc == tc || streq(fr, to))
|
|
return (iconv_t)(0);
|
|
|
|
/*
|
|
* first check the free list
|
|
*/
|
|
|
|
for (i = 0; i < elementsof(freelist); i++)
|
|
if ((cc = freelist[i]) && streq(to, cc->to.name) && streq(fr, cc->from.name))
|
|
{
|
|
freelist[i] = 0;
|
|
#if _lib_iconv_open
|
|
/*
|
|
* reset the shift state if any
|
|
*/
|
|
|
|
if (cc->cvt != (iconv_t)(-1))
|
|
iconv(cc->cvt, NiL, NiL, NiL, NiL);
|
|
#endif
|
|
return cc;
|
|
}
|
|
|
|
/*
|
|
* allocate a new one
|
|
*/
|
|
|
|
if (!(cc = newof(0, Conv_t, 1, strlen(to) + strlen(fr) + 2)))
|
|
return (iconv_t)(-1);
|
|
cc->to.name = (char*)(cc + 1);
|
|
cc->from.name = strcopy(cc->to.name, to) + 1;
|
|
strcpy(cc->from.name, fr);
|
|
cc->cvt = (iconv_t)(-1);
|
|
|
|
/*
|
|
* 8-bit maps are the easiest
|
|
*/
|
|
|
|
if (fc >= 0 && tc >= 0)
|
|
cc->from.map = ccmap(fc, tc);
|
|
#if _lib_iconv_open
|
|
else if ((cc->cvt = iconv_open(t, f)) != (iconv_t)(-1) || (cc->cvt = iconv_open(to, fr)) != (iconv_t)(-1))
|
|
cc->from.fun = (_ast_iconv_f)iconv;
|
|
#endif
|
|
#if _UWIN
|
|
else if ((cc->cvt = _win_iconv_open(cc, t, f)) != (_ast_iconv_t)(-1) || (cc->cvt = _win_iconv_open(cc, to, fr)) != (_ast_iconv_t)(-1))
|
|
cc->from.fun = (_ast_iconv_f)_win_iconv;
|
|
#endif
|
|
else
|
|
{
|
|
switch (fc)
|
|
{
|
|
case CC_UTF:
|
|
cc->from.fun = utf2bin;
|
|
break;
|
|
case CC_UME:
|
|
cc->from.fun = ume2bin;
|
|
break;
|
|
case CC_UCS:
|
|
cc->from.fun = ucs2bin;
|
|
break;
|
|
case CC_SCU:
|
|
cc->from.fun = scu2bin;
|
|
break;
|
|
case CC_ASCII:
|
|
break;
|
|
default:
|
|
if (fc < 0)
|
|
goto nope;
|
|
cc->from.map = ccmap(fc, CC_ASCII);
|
|
break;
|
|
}
|
|
switch (tc)
|
|
{
|
|
case CC_UTF:
|
|
cc->to.fun = bin2utf;
|
|
break;
|
|
case CC_UME:
|
|
cc->to.fun = bin2ume;
|
|
break;
|
|
case CC_UCS:
|
|
cc->to.fun = bin2ucs;
|
|
break;
|
|
case CC_SCU:
|
|
cc->to.fun = bin2scu;
|
|
break;
|
|
case CC_ASCII:
|
|
break;
|
|
default:
|
|
if (tc < 0)
|
|
goto nope;
|
|
cc->to.map = ccmap(CC_ASCII, tc);
|
|
break;
|
|
}
|
|
}
|
|
return (iconv_t)cc;
|
|
nope:
|
|
return (iconv_t)(-1);
|
|
}
|
|
|
|
/*
|
|
* close a character code conversion map
|
|
*/
|
|
|
|
int
|
|
_ast_iconv_close(_ast_iconv_t cd)
|
|
{
|
|
Conv_t* cc;
|
|
Conv_t* oc;
|
|
int i;
|
|
int r = 0;
|
|
|
|
if (cd == (_ast_iconv_t)(-1))
|
|
return -1;
|
|
if (!(cc = (Conv_t*)cd))
|
|
return 0;
|
|
|
|
/*
|
|
* add to the free list
|
|
*/
|
|
|
|
i = freeindex;
|
|
for (;;)
|
|
{
|
|
if (++ i >= elementsof(freelist))
|
|
i = 0;
|
|
if (!freelist[i])
|
|
break;
|
|
if (i == freeindex)
|
|
{
|
|
if (++ i >= elementsof(freelist))
|
|
i = 0;
|
|
|
|
/*
|
|
* close the oldest
|
|
*/
|
|
|
|
if (oc = freelist[i])
|
|
{
|
|
#if _lib_iconv_open
|
|
if (oc->cvt != (iconv_t)(-1))
|
|
r = iconv_close(oc->cvt);
|
|
#endif
|
|
if (oc->buf)
|
|
free(oc->buf);
|
|
free(oc);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
freelist[freeindex = i] = cc;
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* copy *fb size *fn to *tb size *tn
|
|
* fb,fn tb,tn updated on return
|
|
*/
|
|
|
|
size_t
|
|
_ast_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
|
|
{
|
|
Conv_t* cc = (Conv_t*)cd;
|
|
register unsigned char* f;
|
|
register unsigned char* t;
|
|
register unsigned char* e;
|
|
register const unsigned char* m;
|
|
register size_t n;
|
|
char* b;
|
|
char* tfb;
|
|
size_t tfn;
|
|
size_t i;
|
|
|
|
if (!fb || !*fb)
|
|
{
|
|
/* TODO: reset to the initial state */
|
|
if (!tb || !*tb)
|
|
return 0;
|
|
/* TODO: write the initial state shift sequence */
|
|
return 0;
|
|
}
|
|
n = *tn;
|
|
if (cc)
|
|
{
|
|
if (cc->from.fun)
|
|
{
|
|
if (cc->to.fun)
|
|
{
|
|
if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
|
|
{
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
b = cc->buf;
|
|
i = cc->size;
|
|
tfb = *fb;
|
|
tfn = *fn;
|
|
if ((*cc->from.fun)(cc->cvt, &tfb, &tfn, &b, &i) == (size_t)(-1))
|
|
return -1;
|
|
tfn = b - cc->buf;
|
|
tfb = cc->buf;
|
|
n = (*cc->to.fun)(cc->cvt, &tfb, &tfn, tb, tn);
|
|
i = tfb - cc->buf;
|
|
*fb += i;
|
|
*fn -= i;
|
|
return n;
|
|
}
|
|
if ((*cc->from.fun)(cc->cvt, fb, fn, tb, tn) == (size_t)(-1))
|
|
return -1;
|
|
n -= *tn;
|
|
if (m = cc->to.map)
|
|
{
|
|
e = (unsigned char*)(*tb);
|
|
for (t = e - n; t < e; t++)
|
|
*t = m[*t];
|
|
}
|
|
return n;
|
|
}
|
|
else if (cc->to.fun)
|
|
{
|
|
if (!(m = cc->from.map))
|
|
return (*cc->to.fun)(cc->cvt, fb, fn, tb, tn);
|
|
if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
|
|
{
|
|
errno = ENOMEM;
|
|
return -1;
|
|
}
|
|
if ((n = *fn) > cc->size)
|
|
n = cc->size;
|
|
f = (unsigned char*)(*fb);
|
|
e = f + n;
|
|
t = (unsigned char*)(b = cc->buf);
|
|
while (f < e)
|
|
*t++ = m[*f++];
|
|
n = (*cc->to.fun)(cc->cvt, &b, fn, tb, tn);
|
|
*fb += b - cc->buf;
|
|
return n;
|
|
}
|
|
}
|
|
if (n > *fn)
|
|
n = *fn;
|
|
if (cc && (m = cc->from.map))
|
|
{
|
|
f = (unsigned char*)(*fb);
|
|
e = f + n;
|
|
t = (unsigned char*)(*tb);
|
|
while (f < e)
|
|
*t++ = m[*f++];
|
|
}
|
|
else
|
|
memcpy(*tb, *fb, n);
|
|
*fb += n;
|
|
*fn -= n;
|
|
*tb += n;
|
|
*tn -= n;
|
|
return n;
|
|
}
|
|
|
|
#define OK ((size_t)-1)
|
|
|
|
/*
|
|
* write *fb size *fn to op
|
|
* fb,fn updated on return
|
|
* total bytes written to op returned
|
|
*/
|
|
|
|
ssize_t
|
|
_ast_iconv_write(_ast_iconv_t cd, Sfio_t* op, char** fb, size_t* fn, Iconv_disc_t* disc)
|
|
{
|
|
char* fo = *fb;
|
|
char* tb;
|
|
char* ts;
|
|
size_t* e;
|
|
size_t tn;
|
|
size_t r;
|
|
int ok;
|
|
Iconv_disc_t compat;
|
|
|
|
/*
|
|
* the old API had optional size_t* instead of Iconv_disc_t*
|
|
*/
|
|
|
|
if (!disc || disc->version < 20110101L || disc->version >= 30000101L)
|
|
{
|
|
e = (size_t*)disc;
|
|
disc = &compat;
|
|
iconv_init(disc, 0);
|
|
}
|
|
else
|
|
e = 0;
|
|
r = 0;
|
|
tn = 0;
|
|
ok = 1;
|
|
while (ok && *fn > 0)
|
|
{
|
|
if (!(tb = (char*)sfreserve(op, -(tn + 1), SF_WRITE|SF_LOCKR)) || !(tn = sfvalue(op)))
|
|
{
|
|
if (!r)
|
|
r = -1;
|
|
break;
|
|
}
|
|
ts = tb;
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d iconv_write ts=%p tn=%d", __LINE__, ts, tn);
|
|
for (;;)
|
|
#else
|
|
while (*fn > 0 && _ast_iconv(cd, fb, fn, &ts, &tn) == (size_t)(-1))
|
|
#endif
|
|
{
|
|
#if DEBUG_TRACE
|
|
ssize_t _r;
|
|
error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d `%-.*s'", __LINE__, *fn, tn, *fn, *fb);
|
|
_r = _ast_iconv(cd, fb, fn, &ts, &tn);
|
|
error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d [%d]", __LINE__, *fn, tn, _r);
|
|
if (_r != (size_t)(-1) || !fn)
|
|
break;
|
|
#endif
|
|
switch (errno)
|
|
{
|
|
case E2BIG:
|
|
break;
|
|
case EINVAL:
|
|
if (disc->errorf)
|
|
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "incomplete multibyte sequence at offset %I*u", sizeof(fo), *fb - fo);
|
|
goto bad;
|
|
default:
|
|
if (disc->errorf)
|
|
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "invalid multibyte sequence at offset %I*u", sizeof(fo), *fb - fo);
|
|
bad:
|
|
disc->errors++;
|
|
if (!(disc->flags & ICONV_FATAL))
|
|
{
|
|
if (!(disc->flags & ICONV_OMIT) && tn > 0)
|
|
{
|
|
*ts++ = (disc->fill >= 0) ? disc->fill : **fb;
|
|
tn--;
|
|
}
|
|
(*fb)++;
|
|
(*fn)--;
|
|
continue;
|
|
}
|
|
ok = 0;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
#if DEBUG_TRACE
|
|
error(DEBUG_TRACE, "AHA#%d iconv_write %d", __LINE__, ts - tb);
|
|
#endif
|
|
sfwrite(op, tb, ts - tb);
|
|
r += ts - tb;
|
|
}
|
|
if (e)
|
|
*e = disc->errors;
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* move n bytes from ip to op
|
|
*/
|
|
|
|
ssize_t
|
|
_ast_iconv_move(_ast_iconv_t cd, Sfio_t* ip, Sfio_t* op, size_t n, Iconv_disc_t* disc)
|
|
{
|
|
char* fb;
|
|
char* fs;
|
|
char* tb;
|
|
char* ts;
|
|
size_t* e;
|
|
size_t fe;
|
|
size_t fn;
|
|
size_t fo;
|
|
size_t ft;
|
|
size_t tn;
|
|
size_t i;
|
|
ssize_t r = 0;
|
|
int ok = 1;
|
|
int locked;
|
|
Iconv_disc_t compat;
|
|
|
|
/*
|
|
* the old API had optional size_t* instead of Iconv_disc_t*
|
|
*/
|
|
|
|
if (!disc || disc->version < 20110101L || disc->version >= 30000101L)
|
|
{
|
|
e = (size_t*)disc;
|
|
disc = &compat;
|
|
iconv_init(disc, 0);
|
|
}
|
|
else
|
|
e = 0;
|
|
tb = 0;
|
|
fe = OK;
|
|
ft = 0;
|
|
fn = n;
|
|
do
|
|
{
|
|
if (n != SF_UNBOUND)
|
|
n = -((ssize_t)(n & (((size_t)(~0))>>1)));
|
|
if ((!(fb = (char*)sfreserve(ip, n, locked = SF_LOCKR)) || !(fo = sfvalue(ip))) &&
|
|
(!(fb = (char*)sfreserve(ip, n, locked = 0)) || !(fo = sfvalue(ip))))
|
|
break;
|
|
fs = fb;
|
|
fn = fo;
|
|
if (!(tb = (char*)sfreserve(op, SF_UNBOUND, SF_WRITE|SF_LOCKR)))
|
|
{
|
|
if (!r)
|
|
r = -1;
|
|
break;
|
|
}
|
|
ts = tb;
|
|
tn = sfvalue(op);
|
|
while (fn > 0 && _ast_iconv(cd, &fs, &fn, &ts, &tn) == (size_t)(-1))
|
|
{
|
|
switch (errno)
|
|
{
|
|
case E2BIG:
|
|
break;
|
|
case EINVAL:
|
|
if (fe == ft + (fo - fn))
|
|
{
|
|
fe = OK;
|
|
if (disc->errorf)
|
|
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "incomplete multibyte sequence at offset %I*u", sizeof(ft), ft + (fo - fn));
|
|
goto bad;
|
|
}
|
|
fe = ft;
|
|
break;
|
|
default:
|
|
if (disc->errorf)
|
|
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "invalid multibyte sequence at offset %I*u", sizeof(ft), ft + (fo - fn));
|
|
bad:
|
|
disc->errors++;
|
|
if (!(disc->flags & ICONV_FATAL))
|
|
{
|
|
if (!(disc->flags & ICONV_OMIT) && tn > 0)
|
|
{
|
|
*ts++ = (disc->fill >= 0) ? disc->fill : *fs;
|
|
tn--;
|
|
}
|
|
fs++;
|
|
fn--;
|
|
continue;
|
|
}
|
|
ok = 0;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
sfwrite(op, tb, ts - tb);
|
|
r += ts - tb;
|
|
ts = tb;
|
|
if (locked)
|
|
sfread(ip, fb, fs - fb);
|
|
else
|
|
for (i = fn; --i >= (fs - fb);)
|
|
sfungetc(ip, fb[i]);
|
|
if (n != SF_UNBOUND)
|
|
{
|
|
if (n <= (fs - fb))
|
|
break;
|
|
n -= fs - fb;
|
|
}
|
|
ft += (fs - fb);
|
|
if (fn == fo)
|
|
fn++;
|
|
} while (ok);
|
|
if (fb && locked)
|
|
sfread(ip, fb, 0);
|
|
if (tb)
|
|
{
|
|
sfwrite(op, tb, 0);
|
|
if (ts > tb)
|
|
{
|
|
sfwrite(op, tb, ts - tb);
|
|
r += ts - tb;
|
|
}
|
|
}
|
|
if (e)
|
|
*e = disc->errors;
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* iconv_list_t iterator
|
|
* call with arg 0 to start
|
|
* prev return value is current arg
|
|
*/
|
|
|
|
_ast_iconv_list_t*
|
|
_ast_iconv_list(_ast_iconv_list_t* cp)
|
|
{
|
|
#if _UWIN
|
|
struct dirent* ent;
|
|
|
|
if (!cp)
|
|
{
|
|
if (!(cp = newof(0, _ast_iconv_list_t, 1, 0)))
|
|
return ccmaplist(NiL);
|
|
if (!(cp->data = opendir(_win_maps)))
|
|
{
|
|
free(cp);
|
|
return ccmaplist(NiL);
|
|
}
|
|
}
|
|
if (cp->data)
|
|
{
|
|
if (ent = readdir((DIR*)cp->data))
|
|
{
|
|
cp->name = cp->match = cp->desc = (const char*)ent->d_name;
|
|
return cp;
|
|
}
|
|
closedir((DIR*)cp->data);
|
|
free(cp);
|
|
return ccmaplist(NiL);
|
|
}
|
|
#else
|
|
if (!cp)
|
|
return ccmaplist(NiL);
|
|
#endif
|
|
if (cp->ccode >= 0)
|
|
return (cp = ccmaplist(cp)) ? cp : (_ast_iconv_list_t*)codes;
|
|
return (++cp)->name ? cp : (_ast_iconv_list_t*)0;
|
|
}
|