1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-13 11:42:21 +00:00

File completion: fix incomplete multibyte support

Upon encountering two filenames with multibyte characters starting
with the same byte, a partial multibyte character was completed.

Reproducer (to run in UTF-8 locale):
$ touch XXXá XXXë
$ : XX		<== pres tab
$ : XXX^?	<== partial multibyte character appears

Note: á is $'\xc3\xa1' and ë is $'\xc3\xab' (same initial byte).

src/cmd/ksh93/edit/completion.c:
- Add multibyte support to the charcmp() and overlaid() functions.
  Thanks to Harald van Dijk for useful code and suggestions.
- Add a few missing mbinit() calls. The state of multibyte
  processing must be reset before starting a new loop in case a
  previous processing run was interrupted mid-character.

src/cmd/ksh93/tests/pty.sh:
- Add test based on Harald's reproducer.

Resolves: https://github.com/ksh93/ksh/issues/223
This commit is contained in:
Martijn Dekker 2021-03-17 22:34:45 +00:00
parent 936a1939a8
commit 33d0f004de
4 changed files with 37 additions and 5 deletions

6
NEWS
View file

@ -3,6 +3,12 @@ For full details, see the git log at: https://github.com/ksh93/ksh
Any uppercase BUG_* names are modernish shell bug IDs.
2021-03-17:
- Fixed a bug with file name completion on the interactive shell in multibyte
locales. Upon encountering two filenames with multibyte characters starting
with the same byte, a partial multibyte character was autocompleted.
2021-03-16:
- Tilde expansion can now be extended or modified by defining a .sh.tilde.get

View file

@ -39,6 +39,7 @@ static char *fmtx(const char *string)
int offset = staktell();
if(*cp=='#' || *cp=='~')
stakputc('\\');
mbinit();
while((c=mbchar(cp)),(c>UCHAR_MAX)||(n=state[c])==0 || n==S_EPAT);
if(n==S_EOF && *string!='#')
return((char*)string);
@ -62,11 +63,19 @@ static int charcmp(int a, int b, int nocase)
{
if(nocase)
{
if(isupper(a))
#if _lib_towlower
if(mbwide())
{
a = (int)towlower((wint_t)a);
b = (int)towlower((wint_t)b);
}
else
#endif
{
a = tolower(a);
if(isupper(b))
b = tolower(b);
}
}
return(a==b);
}
@ -78,8 +87,10 @@ static int charcmp(int a, int b, int nocase)
static char *overlaid(register char *str,register const char *newstr,int nocase)
{
register int c,d;
while((c= *(unsigned char *)str) && ((d= *(unsigned char*)newstr++),charcmp(c,d,nocase)))
str++;
char *strnext;
mbinit();
while((strnext = str, c = mbchar(strnext)) && (d = mbchar(newstr), charcmp(c,d,nocase)))
str = strnext;
if(*str)
*str = 0;
else if(*newstr==0)
@ -98,6 +109,7 @@ static char *find_begin(char outbuff[], char *last, int endchar, int *type)
int mode=*type;
bp = outbuff;
*type = 0;
mbinit();
while(cp < last)
{
xp = cp;
@ -500,6 +512,7 @@ int ed_expand(Edit_t *ep, char outbuff[],int *cur,int *eol,int mode, int count)
/* first re-adjust cur */
c = outbuff[*cur];
outbuff[*cur] = 0;
mbinit();
for(out=outbuff; *out;n++)
mbchar(out);
outbuff[*cur] = c;

View file

@ -20,7 +20,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2021-03-16" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2021-03-17" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -772,5 +772,18 @@ w echo $? ~\t
u 42 /tmp
!
# err_exit #
((SHOPT_MULTIBYTE)) &&
[[ ${LC_ALL:-${LC_CTYPE:-${LANG:-}}} =~ [Uu][Tt][Ff]-?8 ]] &&
touch $'XXX\xc3\xa1' $'XXX\xc3\xab' &&
tst $LINENO <<"!"
L autocomplete should not fill partial multibyte characters
# https://github.com/ksh93/ksh/issues/223
d 15
p :test-1:
w : XX\t
r ^:test-1: : XXX\r\n$
!
# ======
exit $((Errors<125?Errors:125))