File completion: fix incomplete multibyte support

Upon encountering two filenames with multibyte characters starting with the same byte, a partial multibyte character was completed. Reproducer (to run in UTF-8 locale): $ touch XXXá XXXë $ : XX <== pres tab $ : XXX^? <== partial multibyte character appears Note: á is $'\xc3\xa1' and ë is $'\xc3\xab' (same initial byte). src/cmd/ksh93/edit/completion.c: - Add multibyte support to the charcmp() and overlaid() functions. Thanks to Harald van Dijk for useful code and suggestions. - Add a few missing mbinit() calls. The state of multibyte processing must be reset before starting a new loop in case a previous processing run was interrupted mid-character. src/cmd/ksh93/tests/pty.sh: - Add test based on Harald's reproducer. Resolves: https://github.com/ksh93/ksh/issues/223
2025-03-09 15:50:02 +00:00 · 2021-03-17 22:34:45 +00:00 · 2021-03-17 22:34:45 +00:00 · 33d0f004de
commit 33d0f004de
parent 936a1939a8
4 changed files with 37 additions and 5 deletions
--- a/6
+++ b/6
@ -3,6 +3,12 @@ For full details, see the git log at: https://github.com/ksh93/ksh
 Any uppercase BUG_* names are modernish shell bug IDs.
 2021-03-17:
 - Fixed a bug with file name completion on the interactive shell in multibyte
  locales. Upon encountering two filenames with multibyte characters starting
  with the same byte, a partial multibyte character was autocompleted.
 2021-03-16:
 - Tilde expansion can now be extended or modified by defining a .sh.tilde.get
--- a/src/cmd/ksh93/edit/completion.c
+++ b/src/cmd/ksh93/edit/completion.c
@ -39,6 +39,7 @@ static char *fmtx(const char *string)
 	int offset = staktell();
 	if(*cp=='#' || *cp=='~')
 		stakputc('\\');
 	mbinit();
 	while((c=mbchar(cp)),(c>UCHAR_MAX)||(n=state[c])==0 || n==S_EPAT);
 	if(n==S_EOF && *string!='#')
 		return((char*)string);
@ -62,11 +63,19 @@ static int charcmp(int a, int b, int nocase)
 {
 	if(nocase)
 	{
-		if(isupper(a))
+#if _lib_towlower
 		if(mbwide())
 		{
 			a = (int)towlower((wint_t)a);
 			b = (int)towlower((wint_t)b);
 		}
 		else
 #endif
 		{
 			a = tolower(a);
 		if(isupper(b))
 			b = tolower(b);
 		}
 	}
 	return(a==b);
 }
@ -78,8 +87,10 @@ static int charcmp(int a, int b, int nocase)
 static char *overlaid(register char *str,register const char *newstr,int nocase)
 {
 	register int c,d;
-	while((c= *(unsigned char *)str) && ((d= *(unsigned char*)newstr++),charcmp(c,d,nocase)))
+	char *strnext;
-		str++;
+	mbinit();
 	while((strnext = str, c = mbchar(strnext)) && (d = mbchar(newstr), charcmp(c,d,nocase)))
 		str = strnext;
 	if(*str)
 		*str = 0;
 	else if(*newstr==0)
@ -98,6 +109,7 @@ static char *find_begin(char outbuff[], char *last, int endchar, int *type)
 	int		mode=*type;
 	bp = outbuff;
 	*type = 0;
 	mbinit();
 	while(cp < last)
 	{
 		xp = cp;
@ -500,6 +512,7 @@ int ed_expand(Edit_t *ep, char outbuff[],int *cur,int *eol,int mode, int count)
 		/* first re-adjust cur */
 		c = outbuff[*cur];
 		outbuff[*cur] = 0;
 		mbinit();
 		for(out=outbuff; *out;n++)
 			mbchar(out);
 		outbuff[*cur] = c;
--- a/src/cmd/ksh93/include/version.h
+++ b/src/cmd/ksh93/include/version.h
@ -20,7 +20,7 @@
 #define SH_RELEASE_FORK	"93u+m"		/* only change if you develop a new ksh93 fork */
 #define SH_RELEASE_SVER	"1.0.0-alpha"	/* semantic version number: https://semver.org */
-#define SH_RELEASE_DATE	"2021-03-16"	/* must be in this format for $((.sh.version)) */
+#define SH_RELEASE_DATE	"2021-03-17"	/* must be in this format for $((.sh.version)) */
 #define SH_RELEASE_CPYR	"(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
 /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */
--- a/src/cmd/ksh93/tests/pty.sh
+++ b/src/cmd/ksh93/tests/pty.sh
@ -772,5 +772,18 @@ w echo $? ~\t
 u 42 /tmp
 !
 # err_exit #
 ((SHOPT_MULTIBYTE)) &&
 [[ ${LC_ALL:-${LC_CTYPE:-${LANG:-}}} =~ [Uu][Tt][Ff]-?8 ]] &&
 touch $'XXX\xc3\xa1' $'XXX\xc3\xab' &&
 tst $LINENO <<"!"
 L autocomplete should not fill partial multibyte characters
 # https://github.com/ksh93/ksh/issues/223
 d 15
 p :test-1:
 w : XX\t
 r ^:test-1: : XXX\r\n$
 !
 # ======
 exit $((Errors<125?Errors:125))