mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
Multibyte character handling overhaul; allow global disable
The SHOPT_MULTIBYTE compile-time option did not make much sense as disabling it only disabled multibyte support for ksh/libshell, not libast or libcmd built-in commands. This commit allows disabling multibyte support for the entire codebase by defining the macro AST_NOMULTIBYTE (e.g. via CCFLAGS). This slightly speeds up the code and makes an optimised binary about 5% smaller. src/lib/libast/include/ast.h: - Add non-multibyte fallback versions of the multibyte macros that are used if AST_NOMULTIBYTE is defined. This should cause most multibyte handling to be automatically optimised out everywhere. - Reformat the multibyte macros for legibility. - Similify mbchar() and and mbsize() macros by defining them in terms of mbnchar() and mbnsize(), eliminating code duplication. - Correct non-multibyte fallback of mbwidth(). For consistent behaviour, control characters and out-of-range values should return -1 as they do for UTF-8. The fallback is now the same as default_wcwidth() in src/lib/libast/comp/setlocale.c. src/lib/libast/comp/setlocale.c: - If AST_NOMULTIBYTE is defined, do not compile in the debug and UTF-8 locale conversion functions, including several large conversion tables. Define their fallback macros as 0 as these are used as function pointers. src/cmd/ksh93/SHOPT.sh, src/cmd/ksh93/Mamfile: - Change the SHOPT_MULTIBYTE default to empty, indicating "probe". - Synchronise SHOPT_MULTIBYTE with !AST_NOMULTIBYTE by default. src/cmd/ksh93/include/defs.h: - When SHOPT_MULTIBYTE is zero but AST_NOMULTIBYTE is not non-zero, then enable AST_NOMULTIBYTE here to use the ast.h non-multibyte fallbacks for ksh. When this is done, the effect is that multibyte is optimized out for ksh only, as before. - Remove previous fallback for disabling multibyte (re:c2cb0eae). src/cmd/ksh93/include/lexstates.h, src/cmd/ksh93/sh/lex.c: - Define SETLEN() macro to assign to LEN (i.e. _Fcin.fclen) for multibyte only and do not assign to it directly. With no SHOPT_MULTIBYTE, define that macro as empty. This allows removing multiple '#if SHOPT_MULTIBYTE' directives from lex.c, as that code will all be optimised out automatically if it's disabled. src/cmd/ksh93/include/national.h, src/cmd/ksh93/sh/string.c: - Fix flagrantly incorrect non-multibyte fallback for sh_strchr(). The latter returns an integer offset (-1 if not found), whereas strchr(3) returns a char pointer (NULL if not found). Incorporate the fallback into the function for correct handling instead of falling back to strchr(3) directly. src/cmd/ksh93/sh/macro.c: - lastchar() optimisation: avoid function call if SHOPT_MULTIBYTE is enabled but we're not actually in a multibyte locale. src/cmd/ksh93/sh/name.c: - Use ja_size() even with SHOPT_MULTIBYTE disabled (re:2182ecfa). Though no regression tests failed, the non-multibyte fallback for typeset -L/-R/-Z length calculation was probably not quite correct as ja_size() does more. The ast.h change to mbwidth() ensures correct behaviour for non-multibyte locales. src/cmd/ksh93/tests/shtests: - Since its value in SHOPT.sh is now empty by default, add a quick feature test (for the length of the UTF-8 character 'é') to check if SHOPT_MULTIBYTE needs to be enabled for the regression tests.
This commit is contained in:
parent
59e79dc026
commit
7c4418ccdc
16 changed files with 147 additions and 101 deletions
|
|
@ -232,6 +232,8 @@ native_setlocale(int category, const char* locale)
|
|||
#define DZ (DB-DX*DC+1) /* wchar_t embedded size bits */
|
||||
#define DD 3 /* # mb delimiter chars <n...> */
|
||||
|
||||
#if !AST_NOMULTIBYTE
|
||||
|
||||
static unsigned char debug_order[] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
|
|
@ -490,6 +492,18 @@ debug_strcoll(const char* a, const char* b)
|
|||
return strcmp(ab, bb);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define debug_mbtowc 0
|
||||
#define debug_wctomb 0
|
||||
#define debug_mblen 0
|
||||
#define debug_wcwidth 0
|
||||
#define debug_alpha 0
|
||||
#define debug_strxfrm 0
|
||||
#define debug_strcoll 0
|
||||
|
||||
#endif /* !AST_NOMULTIBYTE */
|
||||
|
||||
/*
|
||||
* default locale
|
||||
*/
|
||||
|
|
@ -529,7 +543,7 @@ set_collate(Lc_category_t* cp)
|
|||
* workaround the interesting SJIS that translates unshifted 7 bit ASCII!
|
||||
*/
|
||||
|
||||
#if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc
|
||||
#if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc && !AST_NOMULTIBYTE
|
||||
|
||||
#define mb_state_zero ((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
|
||||
#define mb_state ((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
|
||||
|
|
@ -547,6 +561,8 @@ sjis_mbtowc(register wchar_t* p, register const char* s, size_t n)
|
|||
|
||||
#endif
|
||||
|
||||
#if !AST_NOMULTIBYTE
|
||||
|
||||
static int
|
||||
utf8_wctomb(char* u, wchar_t w)
|
||||
{
|
||||
|
|
@ -593,8 +609,6 @@ utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
|
|||
register int c;
|
||||
register wchar_t w = 0;
|
||||
|
||||
if (!wp && !sp)
|
||||
ast.mb_sync = 0; /* assume call from mbinit() macro: reset global multibyte sync state */
|
||||
if (!sp || !n)
|
||||
return 0;
|
||||
if ((m = utf8tab[*sp]) > 0)
|
||||
|
|
@ -2196,6 +2210,16 @@ utf8_alpha(wchar_t c)
|
|||
return !!(utf8_wam[(c >> 3) & 0x1fff] & (1 << (c & 0x7)));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define utf8_wctomb 0
|
||||
#define utf8_mbtowc 0
|
||||
#define utf8_mblen 0
|
||||
#define utf8_wcwidth 0
|
||||
#define utf8_alpha 0
|
||||
|
||||
#endif /* !AST_NOMULTIBYTE */
|
||||
|
||||
#if !_hdr_wchar || !_lib_wctype || !_lib_iswctype
|
||||
#undef iswalpha
|
||||
#define iswalpha default_iswalpha
|
||||
|
|
|
|||
|
|
@ -208,22 +208,46 @@ typedef struct
|
|||
* multibyte macros
|
||||
*/
|
||||
|
||||
#define mbmax() (ast.mb_cur_max)
|
||||
#define mberr() (ast.tmp_int<0)
|
||||
#if !AST_NOMULTIBYTE
|
||||
|
||||
#define mbcoll() (ast.mb_xfrm!=0)
|
||||
#define mbwide() (mbmax()>1)
|
||||
#define mbmax() ( ast.mb_cur_max )
|
||||
#define mberr() ( ast.tmp_int < 0 )
|
||||
|
||||
#define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n)
|
||||
#define mbchar(p) (mbwide()?((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),mbmax()))>0?((p+=ast.tmp_int),ast.tmp_wchar):(p+=ast.mb_sync+1,ast.tmp_int)):(*(unsigned char*)(p++)))
|
||||
#define mbnchar(p,n) (mbwide()?((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),n))>0?((p+=ast.tmp_int),ast.tmp_wchar):(p+=ast.mb_sync+1,ast.tmp_int)):(*(unsigned char*)(p++)))
|
||||
#define mbinit() (mbwide()?(*ast.mb_towc)((wchar_t*)0,(char*)0,mbmax()):0)
|
||||
#define mbsize(p) (mbwide()?(*ast.mb_len)((char*)(p),mbmax()):((p),1))
|
||||
#define mbnsize(p,n) (mbwide()?(*ast.mb_len)((char*)(p),n):((p),1))
|
||||
#define mbconv(s,w) (ast.mb_conv?(*ast.mb_conv)(s,w):((*(s)=(w)),1))
|
||||
#define mbwidth(w) (ast.mb_width?(*ast.mb_width)(w):1)
|
||||
#define mbxfrm(t,f,n) (mbcoll()?(*ast.mb_xfrm)((char*)(t),(char*)(f),n):0)
|
||||
#define mbalpha(w) (ast.mb_alpha?(*ast.mb_alpha)(w):isalpha((w)&0xff))
|
||||
#define mbcoll() ( ast.mb_xfrm != 0 )
|
||||
#define mbwide() ( mbmax() > 1 )
|
||||
|
||||
#define mb2wc(w,p,n) ( *ast.mb_towc)(&w, (char*)p, n )
|
||||
#define mbchar(p) mbnchar(p, mbmax())
|
||||
#define mbnchar(p,n) ( mbwide() ? ( (ast.tmp_int = (*ast.mb_towc)(&ast.tmp_wchar, (char*)(p), n)) > 0 ? \
|
||||
( (p+=ast.tmp_int),ast.tmp_wchar) : (p+=ast.mb_sync+1,ast.tmp_int) ) : (*(unsigned char*)(p++)) )
|
||||
#define mbinit() ( ast.mb_sync = 0 )
|
||||
#define mbsize(p) mbnsize(p, mbmax())
|
||||
#define mbnsize(p,n) ( mbwide() ? (*ast.mb_len)((char*)(p), n) : ((p), 1) )
|
||||
#define mbconv(s,w) ( ast.mb_conv ? (*ast.mb_conv)(s,w) : ((*(s)=(w)), 1) )
|
||||
#define mbwidth(w) ( ast.mb_width ? (*ast.mb_width)(w) : (w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1) )
|
||||
#define mbxfrm(t,f,n) ( mbcoll() ? (*ast.mb_xfrm)((char*)(t), (char*)(f), n) : 0 )
|
||||
#define mbalpha(w) ( ast.mb_alpha ? (*ast.mb_alpha)(w) : isalpha((w) & 0xff) )
|
||||
|
||||
#else
|
||||
|
||||
#define mbmax() 1
|
||||
#define mberr() 0
|
||||
|
||||
#define mbcoll() 0
|
||||
#define mbwide() 0
|
||||
|
||||
#define mb2wc(w,p,n) ( (w) = *(unsigned char*)(p), 1 )
|
||||
#define mbchar(p) ( *(unsigned char*)(p++) )
|
||||
#define mbnchar(p,n) mbchar(p)
|
||||
#define mbinit() 0
|
||||
#define mbsize(p) 1
|
||||
#define mbnsize(p,n) 1
|
||||
#define mbconv(s,w) ( (*(s)=(w)), 1 )
|
||||
#define mbwidth(w) ( w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1 )
|
||||
#define mbxfrm(t,f,n) 0
|
||||
#define mbalpha(w) ( isalpha((w) & 0xff) )
|
||||
|
||||
#endif /* !AST_NOMULTIBYTE */
|
||||
|
||||
/*
|
||||
* common macros
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue