1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00

Fix shellquoting of invalid multibyte char (re: f9d28935, 8c7c60ec)

This commit fixes two bugs in the generation of $'...' shellquoted
strings:
1. A bug introduced in f9d28935. In UTF-8 locales, a byte that is
   invalid in UTF-8, e.g. hex byte 86, would be shellquoted as
   \u[86], which is not the same as the correct quoting, \x86.
2. A bug inherited from 93u+. Single bytes (e.g. hex 11) were
   always quoted as \x11 and not \x[11], even if a subsequent
   character was a hexadecimal digit. However, the parser reads
   past two hexadecimal digits, so we got:
	$ printf '%q\n' $'\x[11]1'
	$'\x111'
	$ printf $'\x111' | od -t x1
	0000000    c4  91
	0000002
   After the bug fix, this works correctly:
	$ printf '%q\n' $'\x[11]1'
	$'\x[11]1'
	$ printf $'\x[11]1' | od -t x1
	0000000    11  31
	0000002

src/cmd/ksh93/sh/string.c: sh_fmtq():
- Make the multibyte code for $'...' more readable, eliminating the
  'isbyte' flag.
- When in a multibyte locale, make sure to shellquote both invalid
  multibyte characters and unprintable ASCII characters as
  hexadecimal bytes (\xNN). This reinstates 93u+ behaviour.
- When quoting bytes, use isxdigit(3) to determine if the next
  character is a hex digit, and if so, protect the quoted byte with
  square brackets.

src/cmd/ksh93/tests/quoting2.sh:
- Move the 'printf %q' shellquoting regression tests here from
  builtins.sh; they test the shellquoting algorithm, not so much
  the printf builtin itself.
- Add regression tests for these bugs.
This commit is contained in:
Martijn Dekker 2020-08-05 18:22:22 +01:00
parent e53177abca
commit ac8991e525
4 changed files with 73 additions and 48 deletions

View file

@ -40,6 +40,10 @@
# define iswprint(c) (((c)&~0377) || isprint(c))
#endif
#ifndef isxdigit
# define isxdigit(c) ((c)>='0'&&(c)<='9'||(c)>='a'&&(c)<='f'||(c)>='A'&&(c)<='F')
#endif
/*
* Table lookup routine
@ -410,7 +414,6 @@ char *sh_fmtq(const char *string)
}
else
{
int isbyte=0;
stakwrite("$'",2);
cp = string;
#if SHOPT_MULTIBYTE
@ -447,24 +450,29 @@ char *sh_fmtq(const char *string)
break;
default:
#if SHOPT_MULTIBYTE
isbyte = 0;
if(c<0)
if(mbwide())
{
c = *((unsigned char *)op);
cp = op+1;
isbyte = 1;
/* We're in a multibyte locale */
if(c<0 || c<128 && !isprint(c))
{
/* Invalid multibyte char, or unprintable ASCII char: quote as hex byte */
c = *((unsigned char *)op);
cp = op+1;
goto quote_one_byte;
}
if(is_invisible(c))
{
/* Unicode hex code */
sfprintf(staksp,"\\u[%x]",c);
continue;
}
}
if(mbwide() && is_invisible(c))
{
sfprintf(staksp,"\\u[%x]",c);
continue;
}
else if(isbyte)
#else
else
#endif /* SHOPT_MULTIBYTE */
if(!isprint(c))
#endif
{
sfprintf(staksp,"\\x%.2x",c);
quote_one_byte:
sfprintf(staksp, isxdigit(*cp) ? "\\x[%.2x]" : "\\x%.2x", c);
continue;
}
state=0;