mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-02-13 19:52:20 +00:00
Fix corrupt UTF-8 char processing & shellquoting after aborted read
If the processing of a multibyte character was interrupted in UTF-8 locales, e.g. by reading just one byte of a two-byte character 'ü' (\303\274) with a command like: print -nr $'\303\274' | read -n1 g then the shellquoting algorithm was corrupted in such a way that the final quote in simple single-quoted string was missing. This bug may have had other, as yet undiscovered, effects as well. The problem was with corrupted multibyte character processing and not with the shell-quoting routine sh_fmtq() itself. Full trace and discussion at: https://github.com/ksh93/ksh/issues/5 (which is also an attempt to begin to understand the esoteric workings of the libast mb* macros that process UTF-8 characters). src/lib/libast/comp/setlocale.c: utf8_mbtowc(): - If called from the mbinit() macro (i.e. if both pointer parameters are null), reset the global multibyte character synchronisation state variable. This fixes the problem with interrupted processing leaving an inconsistent state, provided that mbinit() is called before processing multibyte characters (which it is, in most (?) places that do this). Before this fix, calling mbinit() in UTF-8 locales was a no-op. src/cmd/ksh93/sh/string.c: sh_fmtq(): - Call mbinit() before potentially processing multibyte characters. Testing suggests that this could be superfluous, but at worst, it's harmless; better be sure. src/cmd/ksh93/tests/builtins.sh: - Add regression test for shellquoting with 'printf %q' after interrupting the processing of a multibyte characeter with 'read -n1'. This test only fails in a UTF-8 locale, e.g. when running: bin/shtests -u builtins SHELL=/buggy/ksh-2012-08-01 Fixes #5.
This commit is contained in:
parent
2624b297fc
commit
300cd19987
5 changed files with 28 additions and 1 deletions
6
NEWS
6
NEWS
|
@ -3,6 +3,12 @@ For full details, see the git log at: https://github.com/ksh93/ksh
|
||||||
|
|
||||||
Any uppercase BUG_* names are modernish shell bug IDs.
|
Any uppercase BUG_* names are modernish shell bug IDs.
|
||||||
|
|
||||||
|
2020-07-05:
|
||||||
|
|
||||||
|
- In UTF-8 locales, fix corruption of the shell's internal string quoting
|
||||||
|
algorithm (as used by xtrace, 'printf %q', and more) that occurred when
|
||||||
|
the processing of a multibyte character was interrupted.
|
||||||
|
|
||||||
2020-07-03:
|
2020-07-03:
|
||||||
|
|
||||||
- Backslashes are no longer escaped in the raw Bourne Shell-like editing
|
- Backslashes are no longer escaped in the raw Bourne Shell-like editing
|
||||||
|
|
|
@ -17,4 +17,4 @@
|
||||||
* David Korn <dgk@research.att.com> *
|
* David Korn <dgk@research.att.com> *
|
||||||
* *
|
* *
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
#define SH_RELEASE "93u+m 2020-07-02"
|
#define SH_RELEASE "93u+m 2020-07-05"
|
||||||
|
|
|
@ -336,6 +336,9 @@ char *sh_fmtq(const char *string)
|
||||||
int offset;
|
int offset;
|
||||||
if(!cp)
|
if(!cp)
|
||||||
return((char*)0);
|
return((char*)0);
|
||||||
|
#if SHOPT_MULTIBYTE
|
||||||
|
mbinit();
|
||||||
|
#endif /* SHOPT_MULTIBYTE */
|
||||||
offset = staktell();
|
offset = staktell();
|
||||||
state = ((c= mbchar(cp))==0);
|
state = ((c= mbchar(cp))==0);
|
||||||
if(isaletter(c))
|
if(isaletter(c))
|
||||||
|
|
|
@ -302,7 +302,23 @@ fi
|
||||||
if [[ $(printf '%..*s\n' : abc def) != abc:def ]]
|
if [[ $(printf '%..*s\n' : abc def) != abc:def ]]
|
||||||
then err_exit "printf '%..*s' not working"
|
then err_exit "printf '%..*s' not working"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# ======
|
||||||
|
# shell-quoting using printf %q (same algorithm used for xtrace and output of 'set', 'trap', ...)
|
||||||
|
|
||||||
[[ $(printf '%q\n') == '' ]] || err_exit 'printf "%q" with missing arguments'
|
[[ $(printf '%q\n') == '' ]] || err_exit 'printf "%q" with missing arguments'
|
||||||
|
|
||||||
|
# the following fails on 2012-08-01 in UTF-8 locales
|
||||||
|
expect="'shell-quoted string'"
|
||||||
|
actual=$(
|
||||||
|
print -nr $'\303\274' | read -n1 foo # interrupt processing of 2-byte UTF-8 char after reading 1 byte
|
||||||
|
printf '%q\n' "shell-quoted string"
|
||||||
|
)
|
||||||
|
LC_CTYPE=POSIX true # on buggy ksh, a locale re-init via temp assignment restores correct shellquoting
|
||||||
|
[[ $actual == "$expect" ]] || err_exit 'shell-quoting corrupted after interrupted processing of UTF-8 char' \
|
||||||
|
"(expected $expect; got $actual)"
|
||||||
|
|
||||||
|
# ======
|
||||||
# we won't get hit by the one second boundary twice, right?
|
# we won't get hit by the one second boundary twice, right?
|
||||||
expect= actual=
|
expect= actual=
|
||||||
{ expect=$(LC_ALL=C date | sed 's/ GMT / UTC /') && actual=$(LC_ALL=C printf '%T\n' now) && [[ $actual == "$expect" ]]; } ||
|
{ expect=$(LC_ALL=C date | sed 's/ GMT / UTC /') && actual=$(LC_ALL=C printf '%T\n' now) && [[ $actual == "$expect" ]]; } ||
|
||||||
|
|
|
@ -600,6 +600,8 @@ utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
|
||||||
register int c;
|
register int c;
|
||||||
register wchar_t w = 0;
|
register wchar_t w = 0;
|
||||||
|
|
||||||
|
if (!wp && !sp)
|
||||||
|
ast.mb_sync = 0; /* assume call from mbinit() macro: reset global multibyte sync state */
|
||||||
if (!sp || !n)
|
if (!sp || !n)
|
||||||
return 0;
|
return 0;
|
||||||
if ((m = utf8tab[*sp]) > 0)
|
if ((m = utf8tab[*sp]) > 0)
|
||||||
|
|
Loading…
Reference in a new issue