mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
printf %H: fix/reduce encoding into entities (re: 8477d2ce
)
The entity is not valid in XML, only in HTML. Since we must be compatible with both, it can't be used. Thanks to Andras Farkas for the bug report. In addition, the generation of numeric entities for unprintable characters was only valid while processing UTF-8 text while in a UTF-8 locale. In all other conditions it produced invalid results. This is not worth trying to fix. Discussion: https://groups.google.com/d/msgid/korn-shell/CAA0nTRta%3DPbOYduyBv%3DXCzumTcUCU8Lki%3DQQf2O8Erk2BFvO1g%40mail.gmail.com src/cmd/ksh93/bltins/print.c: - Remove conversion to entity. - Remove conversion of non-graph characters to numeric entities. Convert only the 5 semantically meaningful characters: < > & " ' src/cmd/ksh93/include/defs.h, src/cmd/ksh93/sh/string.c: - We don't need sh_isprint() in print.c anymore, so turn it back into a static function. src/cmd/ksh93/tests/builtins.sh: - Update and trim regression tests.
This commit is contained in:
parent
61437b2728
commit
e01801572d
4 changed files with 6 additions and 32 deletions
|
@ -276,7 +276,7 @@ if [[ $(getopts $'[+?X\ffoobar\fX]' v --man 2>&1) != *'Xhello world'X* ]]
|
|||
then err_exit '\f...\f not working in getopts usage strings'
|
||||
fi
|
||||
|
||||
expect='<>"& '	abc'
|
||||
expect=$'<>"& '\tabc'
|
||||
actual=$(printf '%H\n' $'<>"& \'\tabc')
|
||||
[[ $expect == "$actual" ]] || err_exit 'printf %H not working' \
|
||||
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
|
||||
|
@ -295,24 +295,12 @@ actual=$(printf 'foo://ab_c%(url)q\n' $'<>"& \'\tabc')
|
|||
case ${LC_ALL:-${LC_CTYPE:-${LANG:-}}} in
|
||||
( *[Uu][Tt][Ff]8* | *[Uu][Tt][Ff]-8* )
|
||||
# HTML encoding UTF-8 characters
|
||||
expect='what?'
|
||||
actual=$(printf %H 'what?')
|
||||
[[ $actual == "$expect" ]] || err_exit 'printf %H: ASCII characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect='عندما يريد العالم أن ‪يتكلّم ‬ ، فهو يتحدّث بلغة يونيكود.'
|
||||
actual=$(printf %H 'عندما يريد العالم أن يتكلّم ، فهو يتحدّث بلغة يونيكود.')
|
||||
[[ $actual == "$expect" ]] || err_exit 'printf %H: Arabic UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect='正常終了 正常終了'
|
||||
actual=$(printf %H '正常終了 正常終了')
|
||||
[[ $actual == "$expect" ]] || err_exit 'printf %H: Japanese UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect='« l’abîme de mon métier… »'
|
||||
actual=$(printf %H '« l’abîme de mon métier… »')
|
||||
[[ $actual == "$expect" ]] || err_exit 'printf %H: Latin UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
expect='?†???'
|
||||
actual=$(printf %H $'\x86\u86\xF0\x96\x76\xA7\xB5')
|
||||
expect='w?h?á?t??'
|
||||
actual=$(printf %H $'w\x80h\x81\uE1\x82t\x83?')
|
||||
[[ $actual == "$expect" ]] || err_exit 'printf %H: invalid UTF-8 characters' \
|
||||
"(expected $expect; got $actual)"
|
||||
# URL/URI encoding of UTF-8 characters
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue