mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-02-13 11:42:21 +00:00
This commit fixes two bugs in the generation of $'...' shellquoted
strings:
1. A bug introduced in f9d28935
. In UTF-8 locales, a byte that is
invalid in UTF-8, e.g. hex byte 86, would be shellquoted as
\u[86], which is not the same as the correct quoting, \x86.
2. A bug inherited from 93u+. Single bytes (e.g. hex 11) were
always quoted as \x11 and not \x[11], even if a subsequent
character was a hexadecimal digit. However, the parser reads
past two hexadecimal digits, so we got:
$ printf '%q\n' $'\x[11]1'
$'\x111'
$ printf $'\x111' | od -t x1
0000000 c4 91
0000002
After the bug fix, this works correctly:
$ printf '%q\n' $'\x[11]1'
$'\x[11]1'
$ printf $'\x[11]1' | od -t x1
0000000 11 31
0000002
src/cmd/ksh93/sh/string.c: sh_fmtq():
- Make the multibyte code for $'...' more readable, eliminating the
'isbyte' flag.
- When in a multibyte locale, make sure to shellquote both invalid
multibyte characters and unprintable ASCII characters as
hexadecimal bytes (\xNN). This reinstates 93u+ behaviour.
- When quoting bytes, use isxdigit(3) to determine if the next
character is a hex digit, and if so, protect the quoted byte with
square brackets.
src/cmd/ksh93/tests/quoting2.sh:
- Move the 'printf %q' shellquoting regression tests here from
builtins.sh; they test the shellquoting algorithm, not so much
the printf builtin itself.
- Add regression tests for these bugs.
This commit is contained in:
parent
e53177abca
commit
ac8991e525
4 changed files with 73 additions and 48 deletions
6
NEWS
6
NEWS
|
@ -11,6 +11,12 @@ Any uppercase BUG_* names are modernish shell bug IDs.
|
|||
- Fixed a bug that caused scripts to continue running after over-shifting
|
||||
in a function when the function call had a redirection.
|
||||
|
||||
- When generating shellquoted strings (such as with 'printf %q'), the
|
||||
hexadecimal value of a quoted unprintable character was not protected with
|
||||
square braces, e.g. 0x12 followed by '3' would be quoted as '\x123', which
|
||||
is a different value. Such strings are now quoted like '\x[12]3' if the
|
||||
next character is a hexadecimal digit.
|
||||
|
||||
2020-07-31:
|
||||
|
||||
- Fixed a bug that caused multidimensional associative arrays to be created
|
||||
|
|
|
@ -40,6 +40,10 @@
|
|||
# define iswprint(c) (((c)&~0377) || isprint(c))
|
||||
#endif
|
||||
|
||||
#ifndef isxdigit
|
||||
# define isxdigit(c) ((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F')
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Table lookup routine
|
||||
|
@ -410,7 +414,6 @@ char *sh_fmtq(const char *string)
|
|||
}
|
||||
else
|
||||
{
|
||||
int isbyte=0;
|
||||
stakwrite("$'",2);
|
||||
cp = string;
|
||||
#if SHOPT_MULTIBYTE
|
||||
|
@ -447,24 +450,29 @@ char *sh_fmtq(const char *string)
|
|||
break;
|
||||
default:
|
||||
#if SHOPT_MULTIBYTE
|
||||
isbyte = 0;
|
||||
if(c<0)
|
||||
if(mbwide())
|
||||
{
|
||||
c = *((unsigned char *)op);
|
||||
cp = op+1;
|
||||
isbyte = 1;
|
||||
/* We're in a multibyte locale */
|
||||
if(c<0 || c<128 && !isprint(c))
|
||||
{
|
||||
/* Invalid multibyte char, or unprintable ASCII char: quote as hex byte */
|
||||
c = *((unsigned char *)op);
|
||||
cp = op+1;
|
||||
goto quote_one_byte;
|
||||
}
|
||||
if(is_invisible(c))
|
||||
{
|
||||
/* Unicode hex code */
|
||||
sfprintf(staksp,"\\u[%x]",c);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(mbwide() && is_invisible(c))
|
||||
{
|
||||
sfprintf(staksp,"\\u[%x]",c);
|
||||
continue;
|
||||
}
|
||||
else if(isbyte)
|
||||
#else
|
||||
else
|
||||
#endif /* SHOPT_MULTIBYTE */
|
||||
if(!isprint(c))
|
||||
#endif
|
||||
{
|
||||
sfprintf(staksp,"\\x%.2x",c);
|
||||
quote_one_byte:
|
||||
sfprintf(staksp, isxdigit(*cp) ? "\\x[%.2x]" : "\\x%.2x", c);
|
||||
continue;
|
||||
}
|
||||
state=0;
|
||||
|
|
|
@ -303,39 +303,6 @@ if [[ $(printf '%..*s\n' : abc def) != abc:def ]]
|
|||
then err_exit "printf '%..*s' not working"
|
||||
fi
|
||||
|
||||
# ======
|
||||
# shell-quoting using printf %q (same algorithm used for xtrace and output of 'set', 'trap', ...)
|
||||
|
||||
[[ $(printf '%q\n') == '' ]] || err_exit 'printf "%q" with missing arguments'
|
||||
|
||||
# the following fails on 2012-08-01 in UTF-8 locales
|
||||
expect="'shell-quoted string'"
|
||||
actual=$(
|
||||
print -nr $'\303\274' | read -n1 foo # interrupt processing of 2-byte UTF-8 char after reading 1 byte
|
||||
printf '%q\n' "shell-quoted string"
|
||||
)
|
||||
LC_CTYPE=POSIX true # on buggy ksh, a locale re-init via temp assignment restores correct shellquoting
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting corrupted after interrupted processing of UTF-8 char' \
|
||||
"(expected $expect; got $actual)"
|
||||
|
||||
# shell-quoting UTF-8 characters: check for unnecessary encoding
|
||||
case ${LC_ALL:-${LC_CTYPE:-${LANG:-}}} in
|
||||
( *[Uu][Tt][Ff]8* | *[Uu][Tt][Ff]-8* )
|
||||
expect=$'$\'عندما يريد العالم أن \\u[202a]يتكلّم \\u[202c] ، فهو يتحدّث بلغة يونيكود.\''
|
||||
actual=$(printf %q 'عندما يريد العالم أن يتكلّم ، فهو يتحدّث بلغة يونيكود.')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Arabic UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect="'正常終了 正常終了'"
|
||||
actual=$(printf %q '正常終了 正常終了')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Japanese UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect="'aeu aéu'"
|
||||
actual=$(printf %q 'aeu aéu')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Latin UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
;;
|
||||
esac
|
||||
|
||||
# ======
|
||||
# we won't get hit by the one second boundary twice, right?
|
||||
expect= actual=
|
||||
|
|
|
@ -215,4 +215,48 @@ exp='ac'
|
|||
got=$'a\0b'c
|
||||
[[ $got == "$exp" ]] || err_exit "\$'a\\0b'c expansion failed -- expected '$exp', got '$got'"
|
||||
|
||||
# ======
|
||||
# generating shell-quoted strings using printf %q (same algorithm used for xtrace and output of 'set', 'trap', ...)
|
||||
|
||||
[[ $(printf '%q\n') == '' ]] || err_exit 'printf "%q" with missing arguments yields non-empty result'
|
||||
|
||||
# the following fails on 2012-08-01 in UTF-8 locales
|
||||
expect="'shell-quoted string'"
|
||||
actual=$(
|
||||
print -nr $'\303\274' | read -n1 foo # interrupt processing of 2-byte UTF-8 char after reading 1 byte
|
||||
printf '%q\n' "shell-quoted string"
|
||||
)
|
||||
LC_CTYPE=POSIX true # on buggy ksh, a locale re-init via temp assignment restores correct shellquoting
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting corrupted after interrupted processing of UTF-8 char' \
|
||||
"(expected $expect; got $actual)"
|
||||
|
||||
# shell-quoting UTF-8 characters: check for unnecessary encoding
|
||||
case ${LC_ALL:-${LC_CTYPE:-${LANG:-}}} in
|
||||
( *[Uu][Tt][Ff]8* | *[Uu][Tt][Ff]-8* )
|
||||
expect=$'$\'عندما يريد العالم أن \\u[202a]يتكلّم \\u[202c] ، فهو يتحدّث بلغة يونيكود.\''
|
||||
actual=$(printf %q 'عندما يريد العالم أن يتكلّم ، فهو يتحدّث بلغة يونيكود.')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Arabic UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect="'正常終了 正常終了'"
|
||||
actual=$(printf %q '正常終了 正常終了')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Japanese UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect="'aeu aéu'"
|
||||
actual=$(printf %q 'aeu aéu')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: Latin UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect=$'$\'\\x86\\u[86]\\xf0\\x96v\\xa7\\xb5\''
|
||||
actual=$(printf %q $'\x86\u86\xF0\x96\x76\xA7\xB5')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: invalid UTF-8 characters not encoded with \xNN' \
|
||||
"(expected $expect; got $actual)"
|
||||
;;
|
||||
esac
|
||||
|
||||
# check that hex bytes are protected with square braces if needed
|
||||
expect=$'$\'1\\x[11]1\''
|
||||
actual=$(printf %q $'1\x[11]1')
|
||||
[[ $actual == "$expect" ]] || err_exit 'shell-quoting: hex bytes not protected from subsequent hex-like chars' \
|
||||
"(expected $expect; got $actual)"
|
||||
|
||||
# ======
|
||||
exit $((Errors<125?Errors:125))
|
||||
|
|
Loading…
Reference in a new issue