mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
Multibyte character handling overhaul; allow global disable
The SHOPT_MULTIBYTE compile-time option did not make much sense as disabling it only disabled multibyte support for ksh/libshell, not libast or libcmd built-in commands. This commit allows disabling multibyte support for the entire codebase by defining the macro AST_NOMULTIBYTE (e.g. via CCFLAGS). This slightly speeds up the code and makes an optimised binary about 5% smaller. src/lib/libast/include/ast.h: - Add non-multibyte fallback versions of the multibyte macros that are used if AST_NOMULTIBYTE is defined. This should cause most multibyte handling to be automatically optimised out everywhere. - Reformat the multibyte macros for legibility. - Similify mbchar() and and mbsize() macros by defining them in terms of mbnchar() and mbnsize(), eliminating code duplication. - Correct non-multibyte fallback of mbwidth(). For consistent behaviour, control characters and out-of-range values should return -1 as they do for UTF-8. The fallback is now the same as default_wcwidth() in src/lib/libast/comp/setlocale.c. src/lib/libast/comp/setlocale.c: - If AST_NOMULTIBYTE is defined, do not compile in the debug and UTF-8 locale conversion functions, including several large conversion tables. Define their fallback macros as 0 as these are used as function pointers. src/cmd/ksh93/SHOPT.sh, src/cmd/ksh93/Mamfile: - Change the SHOPT_MULTIBYTE default to empty, indicating "probe". - Synchronise SHOPT_MULTIBYTE with !AST_NOMULTIBYTE by default. src/cmd/ksh93/include/defs.h: - When SHOPT_MULTIBYTE is zero but AST_NOMULTIBYTE is not non-zero, then enable AST_NOMULTIBYTE here to use the ast.h non-multibyte fallbacks for ksh. When this is done, the effect is that multibyte is optimized out for ksh only, as before. - Remove previous fallback for disabling multibyte (re:c2cb0eae
). src/cmd/ksh93/include/lexstates.h, src/cmd/ksh93/sh/lex.c: - Define SETLEN() macro to assign to LEN (i.e. _Fcin.fclen) for multibyte only and do not assign to it directly. With no SHOPT_MULTIBYTE, define that macro as empty. This allows removing multiple '#if SHOPT_MULTIBYTE' directives from lex.c, as that code will all be optimised out automatically if it's disabled. src/cmd/ksh93/include/national.h, src/cmd/ksh93/sh/string.c: - Fix flagrantly incorrect non-multibyte fallback for sh_strchr(). The latter returns an integer offset (-1 if not found), whereas strchr(3) returns a char pointer (NULL if not found). Incorporate the fallback into the function for correct handling instead of falling back to strchr(3) directly. src/cmd/ksh93/sh/macro.c: - lastchar() optimisation: avoid function call if SHOPT_MULTIBYTE is enabled but we're not actually in a multibyte locale. src/cmd/ksh93/sh/name.c: - Use ja_size() even with SHOPT_MULTIBYTE disabled (re:2182ecfa
). Though no regression tests failed, the non-multibyte fallback for typeset -L/-R/-Z length calculation was probably not quite correct as ja_size() does more. The ast.h change to mbwidth() ensures correct behaviour for non-multibyte locales. src/cmd/ksh93/tests/shtests: - Since its value in SHOPT.sh is now empty by default, add a quick feature test (for the length of the UTF-8 character 'é') to check if SHOPT_MULTIBYTE needs to be enabled for the regression tests.
This commit is contained in:
parent
59e79dc026
commit
7c4418ccdc
16 changed files with 147 additions and 101 deletions
|
@ -27,6 +27,12 @@ make install
|
||||||
exec - SHOPT()
|
exec - SHOPT()
|
||||||
exec - {
|
exec - {
|
||||||
exec - case $1 in
|
exec - case $1 in
|
||||||
|
exec - 'MULTIBYTE=')
|
||||||
|
exec - echo
|
||||||
|
exec - echo '#if !defined(SHOPT_MULTIBYTE) && !AST_NOMULTIBYTE'
|
||||||
|
exec - echo '#define SHOPT_MULTIBYTE 1'
|
||||||
|
exec - echo '#endif'
|
||||||
|
exec - ;;
|
||||||
exec - *=?*) echo
|
exec - *=?*) echo
|
||||||
exec - echo "#ifndef SHOPT_${1%%=*}"
|
exec - echo "#ifndef SHOPT_${1%%=*}"
|
||||||
exec - echo "#define SHOPT_${1%%=*} ${1#*=}"
|
exec - echo "#define SHOPT_${1%%=*} ${1#*=}"
|
||||||
|
|
|
@ -100,8 +100,9 @@ The options have the following defaults and meanings:
|
||||||
As of 2021-05-10, no tool that can parse this database is
|
As of 2021-05-10, no tool that can parse this database is
|
||||||
known. If you know of any, please contact us.
|
known. If you know of any, please contact us.
|
||||||
|
|
||||||
MULTIBYTE on Multibyte character handling. Requires mblen() and
|
MULTIBYTE Multibyte character handling. This is on by default unless
|
||||||
mbctowc().
|
the flag -DAST_NOMULTIBYTE is passed to the compiler via
|
||||||
|
CCFLAGS. The UTF-8 character set is fully supported.
|
||||||
|
|
||||||
NAMESPACE on Adds a 'namespace' reserved word that allows defining name
|
NAMESPACE on Adds a 'namespace' reserved word that allows defining name
|
||||||
spaces. Variables and functions defined within a block like
|
spaces. Variables and functions defined within a block like
|
||||||
|
@ -191,6 +192,11 @@ Note: Do not add compiler flags that cause the compiler to emit terminal
|
||||||
escape codes, such as -fdiagnostics-color=always; this will cause the build
|
escape codes, such as -fdiagnostics-color=always; this will cause the build
|
||||||
to fail as the probing code greps compiler diagnostics.
|
to fail as the probing code greps compiler diagnostics.
|
||||||
|
|
||||||
|
If you are certain that you don't need support for UTF-8 and other multibyte
|
||||||
|
character locales and really want to save some memory and CPU cycles, add
|
||||||
|
'-DAST_NOMULTIBYTE' to CCFLAGS to compile out all multibyte character
|
||||||
|
handling in ksh and supporting libraries. Not recommended for most users.
|
||||||
|
|
||||||
For more information, run:
|
For more information, run:
|
||||||
|
|
||||||
bin/package help
|
bin/package help
|
||||||
|
|
|
@ -25,7 +25,7 @@ SHOPT GLOBCASEDET= # -o globcasedetect: adapt globbing/completion to case-inse
|
||||||
SHOPT HISTEXPAND=1 # csh-style history file expansions
|
SHOPT HISTEXPAND=1 # csh-style history file expansions
|
||||||
SHOPT KIA= # ksh -R <outfile> <script> generates cross-ref database from script
|
SHOPT KIA= # ksh -R <outfile> <script> generates cross-ref database from script
|
||||||
SHOPT MKSERVICE=0 # enable the mkservice and eloop builtins
|
SHOPT MKSERVICE=0 # enable the mkservice and eloop builtins
|
||||||
SHOPT MULTIBYTE=1 # multibyte character handling
|
SHOPT MULTIBYTE= # multibyte character handling
|
||||||
SHOPT NAMESPACE=1 # allow namespaces
|
SHOPT NAMESPACE=1 # allow namespaces
|
||||||
SHOPT NOECHOE=0 # turn off 'echo -e' when SHOPT_ECHOPRINT is disabled
|
SHOPT NOECHOE=0 # turn off 'echo -e' when SHOPT_ECHOPRINT is disabled
|
||||||
SHOPT OLDTERMIO= # support both TCGETA and TCGETS
|
SHOPT OLDTERMIO= # support both TCGETA and TCGETS
|
||||||
|
|
|
@ -28,6 +28,12 @@
|
||||||
#ifndef defs_h_defined
|
#ifndef defs_h_defined
|
||||||
#define defs_h_defined
|
#define defs_h_defined
|
||||||
|
|
||||||
|
/* In case multibyte support was disabled for ksh only (SHOPT_MULTIBYTE==0) and not for libast */
|
||||||
|
#if !SHOPT_MULTIBYTE && !AST_NOMULTIBYTE
|
||||||
|
# undef AST_NOMULTIBYTE
|
||||||
|
# define AST_NOMULTIBYTE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <ast.h>
|
#include <ast.h>
|
||||||
#if !defined(AST_VERSION) || AST_VERSION < 20220208
|
#if !defined(AST_VERSION) || AST_VERSION < 20220208
|
||||||
#error libast version 20220208 or later is required
|
#error libast version 20220208 or later is required
|
||||||
|
@ -35,20 +41,9 @@
|
||||||
#if !_lib_fork
|
#if !_lib_fork
|
||||||
#error In 2021, ksh joined the 21st century and started requiring fork(2).
|
#error In 2021, ksh joined the 21st century and started requiring fork(2).
|
||||||
#endif
|
#endif
|
||||||
#if !SHOPT_MULTIBYTE
|
|
||||||
/*
|
|
||||||
* Disable multibyte without need for excessive '#if SHOPT_MULTIBYTE' preprocessor conditionals.
|
|
||||||
* If we redefine the maximum character size mbmax() as 1 byte, the mbwide() macro will always
|
|
||||||
* evaluate to 0. All the other multibyte macros have multibtye code conditional upon mbwide(),
|
|
||||||
* so the compiler should optimize all of that code away. See src/lib/libast/include/ast.h
|
|
||||||
*/
|
|
||||||
# undef mbmax
|
|
||||||
# define mbmax() 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <sfio.h>
|
#include <sfio.h>
|
||||||
#include <error.h>
|
#include <error.h>
|
||||||
#include "shopt.h"
|
|
||||||
#include "FEATURE/externs"
|
#include "FEATURE/externs"
|
||||||
#include "FEATURE/options"
|
#include "FEATURE/options"
|
||||||
#include <cdt.h>
|
#include <cdt.h>
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* *
|
* *
|
||||||
* This software is part of the ast package *
|
* This software is part of the ast package *
|
||||||
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
||||||
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
|
* Copyright (c) 2020-2022 Contributors to ksh 93u+m *
|
||||||
* and is licensed under the *
|
* and is licensed under the *
|
||||||
* Eclipse Public License, Version 1.0 *
|
* Eclipse Public License, Version 1.0 *
|
||||||
* by AT&T Intellectual Property *
|
* by AT&T Intellectual Property *
|
||||||
|
@ -45,6 +45,7 @@ typedef struct _fcin
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
#if SHOPT_MULTIBYTE
|
||||||
# define fcmbget(x) (mbwide()?_fcmbget(x):fcget())
|
# define fcmbget(x) (mbwide()?_fcmbget(x):fcget())
|
||||||
|
extern int _fcmbget(short*);
|
||||||
#else
|
#else
|
||||||
# define fcmbget(x) (fcget())
|
# define fcmbget(x) (fcget())
|
||||||
#endif
|
#endif
|
||||||
|
@ -64,7 +65,6 @@ extern int fcfill(void);
|
||||||
extern int fcfopen(Sfio_t*);
|
extern int fcfopen(Sfio_t*);
|
||||||
extern int fcclose(void);
|
extern int fcclose(void);
|
||||||
void fcnotify(void(*)(Sfio_t*,const char*,int,void*),void*);
|
void fcnotify(void(*)(Sfio_t*,const char*,int,void*),void*);
|
||||||
extern int _fcmbget(short*);
|
|
||||||
|
|
||||||
extern Fcin_t _Fcin; /* used by macros */
|
extern Fcin_t _Fcin; /* used by macros */
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* *
|
* *
|
||||||
* This software is part of the ast package *
|
* This software is part of the ast package *
|
||||||
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
||||||
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
|
* Copyright (c) 2020-2022 Contributors to ksh 93u+m *
|
||||||
* and is licensed under the *
|
* and is licensed under the *
|
||||||
* Eclipse Public License, Version 1.0 *
|
* Eclipse Public License, Version 1.0 *
|
||||||
* by AT&T Intellectual Property *
|
* by AT&T Intellectual Property *
|
||||||
|
@ -107,14 +107,17 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef LEN
|
#undef LEN
|
||||||
|
#undef SETLEN
|
||||||
#if SHOPT_MULTIBYTE
|
#if SHOPT_MULTIBYTE
|
||||||
# define LEN _Fcin.fclen
|
# define LEN _Fcin.fclen
|
||||||
|
# define SETLEN(x) (_Fcin.fclen = x)
|
||||||
# define isaname(c) ((c)>0x7f?isalpha(c): sh_lexstates[ST_NAME][(c)]==0)
|
# define isaname(c) ((c)>0x7f?isalpha(c): sh_lexstates[ST_NAME][(c)]==0)
|
||||||
# define isaletter(c) ((c)>0x7f?isalpha(c): sh_lexstates[ST_DOL][(c)]==S_ALP && (c)!='.')
|
# define isaletter(c) ((c)>0x7f?isalpha(c): sh_lexstates[ST_DOL][(c)]==S_ALP && (c)!='.')
|
||||||
#else
|
#else
|
||||||
# undef mbwide
|
# undef mbwide
|
||||||
# define mbwide() (0)
|
# define mbwide() (0)
|
||||||
# define LEN 1
|
# define LEN 1
|
||||||
|
# define SETLEN(x) (x)
|
||||||
# define isaname(c) (sh_lexstates[ST_NAME][c]==0)
|
# define isaname(c) (sh_lexstates[ST_NAME][c]==0)
|
||||||
# define isaletter(c) (sh_lexstates[ST_DOL][c]==S_ALP && (c)!='.')
|
# define isaletter(c) (sh_lexstates[ST_DOL][c]==S_ALP && (c)!='.')
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* *
|
* *
|
||||||
* This software is part of the ast package *
|
* This software is part of the ast package *
|
||||||
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
* Copyright (c) 1982-2011 AT&T Intellectual Property *
|
||||||
* Copyright (c) 2020-2021 Contributors to ksh 93u+m *
|
* Copyright (c) 2020-2022 Contributors to ksh 93u+m *
|
||||||
* and is licensed under the *
|
* and is licensed under the *
|
||||||
* Eclipse Public License, Version 1.0 *
|
* Eclipse Public License, Version 1.0 *
|
||||||
* by AT&T Intellectual Property *
|
* by AT&T Intellectual Property *
|
||||||
|
@ -27,13 +27,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
#if SHOPT_MULTIBYTE
|
||||||
|
|
||||||
# ifndef MARKER
|
# ifndef MARKER
|
||||||
# define MARKER 0xdfff /* Must be invalid character */
|
# define MARKER 0xdfff /* Must be invalid character */
|
||||||
# endif
|
# endif
|
||||||
|
#endif /* SHOPT_MULTIBYTE */
|
||||||
|
|
||||||
extern int sh_strchr(const char*,const char*);
|
extern int sh_strchr(const char*,const char*);
|
||||||
|
|
||||||
#else
|
|
||||||
# define sh_strchr(s,c) strchr(s,*(c))
|
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
|
|
|
@ -151,6 +151,7 @@ extern void fcrestore(Fcin_t *fp)
|
||||||
_Fcin = *fp;
|
_Fcin = *fp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if SHOPT_MULTIBYTE
|
||||||
int _fcmbget(short *len)
|
int _fcmbget(short *len)
|
||||||
{
|
{
|
||||||
register int c;
|
register int c;
|
||||||
|
@ -168,3 +169,4 @@ int _fcmbget(short *len)
|
||||||
}
|
}
|
||||||
return(c);
|
return(c);
|
||||||
}
|
}
|
||||||
|
#endif /* SHOPT_MULTIBYTE */
|
||||||
|
|
|
@ -260,9 +260,7 @@ int sh_lex(Lex_t* lp)
|
||||||
register int n, c, mode=ST_BEGIN, wordflags=0;
|
register int n, c, mode=ST_BEGIN, wordflags=0;
|
||||||
int inlevel=lp->lexd.level, assignment=0, ingrave=0;
|
int inlevel=lp->lexd.level, assignment=0, ingrave=0;
|
||||||
int epatchar=0;
|
int epatchar=0;
|
||||||
#if SHOPT_MULTIBYTE
|
SETLEN(1);
|
||||||
LEN=1;
|
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
if(lp->lexd.paren)
|
if(lp->lexd.paren)
|
||||||
{
|
{
|
||||||
lp->lexd.paren = 0;
|
lp->lexd.paren = 0;
|
||||||
|
@ -1819,18 +1817,15 @@ static int here_copy(Lex_t *lp,register struct ionod *iop)
|
||||||
if(n!=S_NL)
|
if(n!=S_NL)
|
||||||
{
|
{
|
||||||
/* skip over regular characters */
|
/* skip over regular characters */
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if(fcleft()< MB_LEN_MAX && mbsize(fcseek(0))<0)
|
if(mbsize(fcseek(0)) < 0 && fcleft() < MB_LEN_MAX)
|
||||||
{
|
{
|
||||||
n = S_EOF;
|
n = S_EOF;
|
||||||
LEN = -fcleft();
|
SETLEN(-fcleft());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
|
|
||||||
while((n=STATE(state,c))==0);
|
while((n=STATE(state,c))==0);
|
||||||
}
|
}
|
||||||
if(n==S_EOF || !(c=fcget()))
|
if(n==S_EOF || !(c=fcget()))
|
||||||
|
@ -1846,17 +1841,15 @@ static int here_copy(Lex_t *lp,register struct ionod *iop)
|
||||||
if(!lp->lexd.dolparen && (c=sfwrite(sp,bufp,c))>0)
|
if(!lp->lexd.dolparen && (c=sfwrite(sp,bufp,c))>0)
|
||||||
iop->iosize += c;
|
iop->iosize += c;
|
||||||
}
|
}
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
if(LEN==0)
|
if(LEN==0)
|
||||||
LEN=1;
|
SETLEN(1);
|
||||||
if(LEN < 0)
|
if(LEN < 0)
|
||||||
{
|
{
|
||||||
n = LEN;
|
n = LEN;
|
||||||
c = fcmbget(&LEN);
|
c = fcmbget(&LEN);
|
||||||
LEN += n;
|
SETLEN(LEN + n);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
c = lexfill(lp);
|
c = lexfill(lp);
|
||||||
if(c<0)
|
if(c<0)
|
||||||
break;
|
break;
|
||||||
|
@ -1874,10 +1867,8 @@ static int here_copy(Lex_t *lp,register struct ionod *iop)
|
||||||
sfputc(sp,'\\');
|
sfputc(sp,'\\');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
if(LEN < 1)
|
if(LEN < 1)
|
||||||
LEN = 1;
|
SETLEN(1);
|
||||||
#endif
|
|
||||||
bufp = fcseek(-LEN);
|
bufp = fcseek(-LEN);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -105,7 +105,8 @@ static void endfield(Mac_t*,int);
|
||||||
static char *mac_getstring(char*);
|
static char *mac_getstring(char*);
|
||||||
static int charlen(const char*,int);
|
static int charlen(const char*,int);
|
||||||
#if SHOPT_MULTIBYTE
|
#if SHOPT_MULTIBYTE
|
||||||
static char *lastchar(const char*,const char*);
|
# define lastchar(string,endstring) (mbwide() ? _lastchar(string,endstring) : (endstring))
|
||||||
|
static char *_lastchar(const char*,const char*);
|
||||||
#else
|
#else
|
||||||
# define lastchar(string,endstring) (endstring)
|
# define lastchar(string,endstring) (endstring)
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
#endif /* SHOPT_MULTIBYTE */
|
||||||
|
@ -2570,7 +2571,7 @@ static int substring(register const char *string,size_t len,const char *pat,int
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
#if SHOPT_MULTIBYTE
|
||||||
static char *lastchar(const char *string, const char *endstring)
|
static char *_lastchar(const char *string, const char *endstring)
|
||||||
{
|
{
|
||||||
register char *str = (char*)string;
|
register char *str = (char*)string;
|
||||||
register int c;
|
register int c;
|
||||||
|
|
|
@ -1549,12 +1549,10 @@ skip:
|
||||||
return(np);
|
return(np);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
static int ja_size(char*, int, int);
|
static int ja_size(char*, int, int);
|
||||||
static void ja_restore(void);
|
static void ja_restore(void);
|
||||||
static char *savep;
|
static char *savep;
|
||||||
static char savechars[8+1];
|
static char savechars[8+1];
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* put value <string> into name-value node <np>.
|
* put value <string> into name-value node <np>.
|
||||||
|
@ -1863,22 +1861,8 @@ void nv_putval(register Namval_t *np, const char *string, int flags)
|
||||||
for( ; *sp == '0'; sp++)
|
for( ; *sp == '0'; sp++)
|
||||||
;
|
;
|
||||||
size = nv_size(np);
|
size = nv_size(np);
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
if(size)
|
if(size)
|
||||||
size = ja_size((char*)sp,size,nv_isattr(np,NV_RJUST|NV_ZFILL));
|
size = ja_size((char*)sp,size,nv_isattr(np,NV_RJUST|NV_ZFILL));
|
||||||
#else
|
|
||||||
/* fallback: consider control characters to have zero width */
|
|
||||||
if(size)
|
|
||||||
{
|
|
||||||
char *c = (char*)sp;
|
|
||||||
int s = size;
|
|
||||||
for( ; *c && s; c++)
|
|
||||||
if(iscntrl(*c))
|
|
||||||
size++;
|
|
||||||
else
|
|
||||||
s--;
|
|
||||||
}
|
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
}
|
}
|
||||||
if(!up->cp || *up->cp==0)
|
if(!up->cp || *up->cp==0)
|
||||||
flags &= ~NV_APPEND;
|
flags &= ~NV_APPEND;
|
||||||
|
@ -2005,11 +1989,9 @@ void nv_putval(register Namval_t *np, const char *string, int flags)
|
||||||
cp = cp+size;
|
cp = cp+size;
|
||||||
for (; dp < cp; *dp++ = ' ');
|
for (; dp < cp; *dp++ = ' ');
|
||||||
}
|
}
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
/* restore original string */
|
/* restore original string */
|
||||||
if(savep)
|
if(savep)
|
||||||
ja_restore();
|
ja_restore();
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
}
|
}
|
||||||
if(flags&NV_APPEND)
|
if(flags&NV_APPEND)
|
||||||
stakseek(offset);
|
stakseek(offset);
|
||||||
|
@ -2066,13 +2048,11 @@ static void rightjust(char *str, int size, int fill)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
|
||||||
/*
|
/*
|
||||||
* handle left and right justified fields for multi-byte chars
|
* Handle left and right justified fields for multibyte characters.
|
||||||
* given physical size, return a logical size which reflects the
|
* Given physical size, return a logical size that reflects the screen width.
|
||||||
* screen width of multi-byte characters
|
* Multi-width characters are replaced by spaces if they cross the boundary.
|
||||||
* Multi-width characters replaced by spaces if they cross the boundary
|
* <type> is non-zero for right-justified fields.
|
||||||
* <type> is non-zero for right justified fields
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int ja_size(char *str,int size,int type)
|
static int ja_size(char *str,int size,int type)
|
||||||
|
@ -2127,7 +2107,6 @@ static void rightjust(char *str, int size, int fill)
|
||||||
*savep++ = *cp++;
|
*savep++ = *cp++;
|
||||||
savep = 0;
|
savep = 0;
|
||||||
}
|
}
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
|
||||||
|
|
||||||
static char *staknam(register Namval_t *np, char *value)
|
static char *staknam(register Namval_t *np, char *value)
|
||||||
{
|
{
|
||||||
|
|
|
@ -659,11 +659,17 @@ char *sh_fmtqf(const char *string, int single, int fold)
|
||||||
return(stakptr(offset));
|
return(stakptr(offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SHOPT_MULTIBYTE
|
/*
|
||||||
|
* Find a multi-byte character in a string.
|
||||||
|
* NOTE: Unlike strchr(3), the return value is an integer offset or -1 if not found.
|
||||||
|
*/
|
||||||
int sh_strchr(const char *string, register const char *dp)
|
int sh_strchr(const char *string, register const char *dp)
|
||||||
|
{
|
||||||
|
const char *cp;
|
||||||
|
if(mbwide())
|
||||||
{
|
{
|
||||||
wchar_t c, d;
|
wchar_t c, d;
|
||||||
register const char *cp=string;
|
cp = string;
|
||||||
mbinit();
|
mbinit();
|
||||||
d = mbchar(dp);
|
d = mbchar(dp);
|
||||||
mbinit();
|
mbinit();
|
||||||
|
@ -676,7 +682,9 @@ char *sh_fmtqf(const char *string, int single, int fold)
|
||||||
return(cp-string);
|
return(cp-string);
|
||||||
return(-1);
|
return(-1);
|
||||||
}
|
}
|
||||||
#endif /* SHOPT_MULTIBYTE */
|
cp = strchr(string,*dp);
|
||||||
|
return(cp ? cp-string : -1);
|
||||||
|
}
|
||||||
|
|
||||||
const char *_sh_translate(const char *message)
|
const char *_sh_translate(const char *message)
|
||||||
{
|
{
|
||||||
|
|
|
@ -476,7 +476,17 @@ typeset -l x=
|
||||||
|
|
||||||
unset x
|
unset x
|
||||||
typeset -L4 x=$'\001abcdef'
|
typeset -L4 x=$'\001abcdef'
|
||||||
[[ ${#x} == 5 ]] || err_exit "width of character '\001' is not zero"
|
exp=$'\001abcd'
|
||||||
|
[[ e=${#x} -eq 5 && $x == "$exp" ]] || err_exit "typeset -L: width of control character '\001' is not zero" \
|
||||||
|
"(expected length 5 and $(printf %q "$exp"), got length $e and $(printf %q "$x"))"
|
||||||
|
typeset -R10 x=$'a\tb'
|
||||||
|
exp=$' a\tb'
|
||||||
|
[[ e=${#x} -eq 11 && $x == "$exp" ]] || err_exit "typeset -R: width of control character '\t' is not zero" \
|
||||||
|
"(expected length 11 and $(printf %q "$exp"), got length $e and $(printf %q "$x"))"
|
||||||
|
typeset -Z10 x=$'1\t2'
|
||||||
|
exp=$'000000001\t2'
|
||||||
|
[[ e=${#x} -eq 11 && $x == "$exp" ]] || err_exit "typeset -Z: width of control character '\t' is not zero" \
|
||||||
|
"(expected length 11 and $(printf %q "$exp"), got length $e and $(printf %q "$x"))"
|
||||||
|
|
||||||
unset x
|
unset x
|
||||||
typeset -L x=-1
|
typeset -L x=-1
|
||||||
|
|
|
@ -312,8 +312,9 @@ SHOPT()
|
||||||
}
|
}
|
||||||
. "${SHOPTFILE:-../SHOPT.sh}"
|
. "${SHOPTFILE:-../SHOPT.sh}"
|
||||||
unset -f SHOPT
|
unset -f SHOPT
|
||||||
|
[[ -n $SHOPT_MULTIBYTE ]] || SHOPT_MULTIBYTE=$( LC_ALL=C.UTF-8; x=$'\xc3\xa9'; print $(( ${#x}==1 )) )
|
||||||
if (( !SHOPT_MULTIBYTE && utf8 && !posix && !compile ))
|
if (( !SHOPT_MULTIBYTE && utf8 && !posix && !compile ))
|
||||||
then echo "The -u/--utf8 option is unavailable as SHOPT_MULTIBYTE is turned off in ${SHOPTFILE:-SHOPT.sh}." >&2
|
then echo "-u/--utf8 is unavailable because multibyte support was not compiled in." >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -232,6 +232,8 @@ native_setlocale(int category, const char* locale)
|
||||||
#define DZ (DB-DX*DC+1) /* wchar_t embedded size bits */
|
#define DZ (DB-DX*DC+1) /* wchar_t embedded size bits */
|
||||||
#define DD 3 /* # mb delimiter chars <n...> */
|
#define DD 3 /* # mb delimiter chars <n...> */
|
||||||
|
|
||||||
|
#if !AST_NOMULTIBYTE
|
||||||
|
|
||||||
static unsigned char debug_order[] =
|
static unsigned char debug_order[] =
|
||||||
{
|
{
|
||||||
0, 1, 2, 3, 4, 5, 6, 7,
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
@ -490,6 +492,18 @@ debug_strcoll(const char* a, const char* b)
|
||||||
return strcmp(ab, bb);
|
return strcmp(ab, bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define debug_mbtowc 0
|
||||||
|
#define debug_wctomb 0
|
||||||
|
#define debug_mblen 0
|
||||||
|
#define debug_wcwidth 0
|
||||||
|
#define debug_alpha 0
|
||||||
|
#define debug_strxfrm 0
|
||||||
|
#define debug_strcoll 0
|
||||||
|
|
||||||
|
#endif /* !AST_NOMULTIBYTE */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* default locale
|
* default locale
|
||||||
*/
|
*/
|
||||||
|
@ -529,7 +543,7 @@ set_collate(Lc_category_t* cp)
|
||||||
* workaround the interesting SJIS that translates unshifted 7 bit ASCII!
|
* workaround the interesting SJIS that translates unshifted 7 bit ASCII!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc
|
#if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc && !AST_NOMULTIBYTE
|
||||||
|
|
||||||
#define mb_state_zero ((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
|
#define mb_state_zero ((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
|
||||||
#define mb_state ((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
|
#define mb_state ((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
|
||||||
|
@ -547,6 +561,8 @@ sjis_mbtowc(register wchar_t* p, register const char* s, size_t n)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !AST_NOMULTIBYTE
|
||||||
|
|
||||||
static int
|
static int
|
||||||
utf8_wctomb(char* u, wchar_t w)
|
utf8_wctomb(char* u, wchar_t w)
|
||||||
{
|
{
|
||||||
|
@ -593,8 +609,6 @@ utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
|
||||||
register int c;
|
register int c;
|
||||||
register wchar_t w = 0;
|
register wchar_t w = 0;
|
||||||
|
|
||||||
if (!wp && !sp)
|
|
||||||
ast.mb_sync = 0; /* assume call from mbinit() macro: reset global multibyte sync state */
|
|
||||||
if (!sp || !n)
|
if (!sp || !n)
|
||||||
return 0;
|
return 0;
|
||||||
if ((m = utf8tab[*sp]) > 0)
|
if ((m = utf8tab[*sp]) > 0)
|
||||||
|
@ -2196,6 +2210,16 @@ utf8_alpha(wchar_t c)
|
||||||
return !!(utf8_wam[(c >> 3) & 0x1fff] & (1 << (c & 0x7)));
|
return !!(utf8_wam[(c >> 3) & 0x1fff] & (1 << (c & 0x7)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define utf8_wctomb 0
|
||||||
|
#define utf8_mbtowc 0
|
||||||
|
#define utf8_mblen 0
|
||||||
|
#define utf8_wcwidth 0
|
||||||
|
#define utf8_alpha 0
|
||||||
|
|
||||||
|
#endif /* !AST_NOMULTIBYTE */
|
||||||
|
|
||||||
#if !_hdr_wchar || !_lib_wctype || !_lib_iswctype
|
#if !_hdr_wchar || !_lib_wctype || !_lib_iswctype
|
||||||
#undef iswalpha
|
#undef iswalpha
|
||||||
#define iswalpha default_iswalpha
|
#define iswalpha default_iswalpha
|
||||||
|
|
|
@ -208,6 +208,8 @@ typedef struct
|
||||||
* multibyte macros
|
* multibyte macros
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if !AST_NOMULTIBYTE
|
||||||
|
|
||||||
#define mbmax() ( ast.mb_cur_max )
|
#define mbmax() ( ast.mb_cur_max )
|
||||||
#define mberr() ( ast.tmp_int < 0 )
|
#define mberr() ( ast.tmp_int < 0 )
|
||||||
|
|
||||||
|
@ -215,16 +217,38 @@ typedef struct
|
||||||
#define mbwide() ( mbmax() > 1 )
|
#define mbwide() ( mbmax() > 1 )
|
||||||
|
|
||||||
#define mb2wc(w,p,n) ( *ast.mb_towc)(&w, (char*)p, n )
|
#define mb2wc(w,p,n) ( *ast.mb_towc)(&w, (char*)p, n )
|
||||||
#define mbchar(p) (mbwide()?((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),mbmax()))>0?((p+=ast.tmp_int),ast.tmp_wchar):(p+=ast.mb_sync+1,ast.tmp_int)):(*(unsigned char*)(p++)))
|
#define mbchar(p) mbnchar(p, mbmax())
|
||||||
#define mbnchar(p,n) (mbwide()?((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),n))>0?((p+=ast.tmp_int),ast.tmp_wchar):(p+=ast.mb_sync+1,ast.tmp_int)):(*(unsigned char*)(p++)))
|
#define mbnchar(p,n) ( mbwide() ? ( (ast.tmp_int = (*ast.mb_towc)(&ast.tmp_wchar, (char*)(p), n)) > 0 ? \
|
||||||
#define mbinit() (mbwide()?(*ast.mb_towc)((wchar_t*)0,(char*)0,mbmax()):0)
|
( (p+=ast.tmp_int),ast.tmp_wchar) : (p+=ast.mb_sync+1,ast.tmp_int) ) : (*(unsigned char*)(p++)) )
|
||||||
#define mbsize(p) (mbwide()?(*ast.mb_len)((char*)(p),mbmax()):((p),1))
|
#define mbinit() ( ast.mb_sync = 0 )
|
||||||
|
#define mbsize(p) mbnsize(p, mbmax())
|
||||||
#define mbnsize(p,n) ( mbwide() ? (*ast.mb_len)((char*)(p), n) : ((p), 1) )
|
#define mbnsize(p,n) ( mbwide() ? (*ast.mb_len)((char*)(p), n) : ((p), 1) )
|
||||||
#define mbconv(s,w) ( ast.mb_conv ? (*ast.mb_conv)(s,w) : ((*(s)=(w)), 1) )
|
#define mbconv(s,w) ( ast.mb_conv ? (*ast.mb_conv)(s,w) : ((*(s)=(w)), 1) )
|
||||||
#define mbwidth(w) (ast.mb_width?(*ast.mb_width)(w):1)
|
#define mbwidth(w) ( ast.mb_width ? (*ast.mb_width)(w) : (w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1) )
|
||||||
#define mbxfrm(t,f,n) ( mbcoll() ? (*ast.mb_xfrm)((char*)(t), (char*)(f), n) : 0 )
|
#define mbxfrm(t,f,n) ( mbcoll() ? (*ast.mb_xfrm)((char*)(t), (char*)(f), n) : 0 )
|
||||||
#define mbalpha(w) ( ast.mb_alpha ? (*ast.mb_alpha)(w) : isalpha((w) & 0xff) )
|
#define mbalpha(w) ( ast.mb_alpha ? (*ast.mb_alpha)(w) : isalpha((w) & 0xff) )
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define mbmax() 1
|
||||||
|
#define mberr() 0
|
||||||
|
|
||||||
|
#define mbcoll() 0
|
||||||
|
#define mbwide() 0
|
||||||
|
|
||||||
|
#define mb2wc(w,p,n) ( (w) = *(unsigned char*)(p), 1 )
|
||||||
|
#define mbchar(p) ( *(unsigned char*)(p++) )
|
||||||
|
#define mbnchar(p,n) mbchar(p)
|
||||||
|
#define mbinit() 0
|
||||||
|
#define mbsize(p) 1
|
||||||
|
#define mbnsize(p,n) 1
|
||||||
|
#define mbconv(s,w) ( (*(s)=(w)), 1 )
|
||||||
|
#define mbwidth(w) ( w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1 )
|
||||||
|
#define mbxfrm(t,f,n) 0
|
||||||
|
#define mbalpha(w) ( isalpha((w) & 0xff) )
|
||||||
|
|
||||||
|
#endif /* !AST_NOMULTIBYTE */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* common macros
|
* common macros
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue