From 936a1939a8ddb1aea60317ccf687e2cc4d520229 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Wed, 17 Mar 2021 21:07:14 +0000 Subject: [PATCH] Allow proper tilde expansion overrides (#225) Until now, when performing any tilde expansion like ~/foo or ~user/foo, ksh added a placeholder built-in command called '.sh.tilde', ostensibly with the intention to allow users to override it with a shell function or custom builtin. The multishell ksh93 repo shows this was added sometime between 2002-06-28 and 2004-02-29. However, it has never worked and crashed the shell. This commit replaces that with something that works. Specific tilde expansions can now be overridden using .set or .get discipline functions associated with the .sh.tilde variable (see manual, Discipline Functions). For example, you can use either of: .sh.tilde.set() { case ${.sh.value} in '~tmp') .sh.value=${XDG_RUNTIME_DIR:-${TMPDIR:-/tmp}} ;; '~doc') .sh.value=~/Documents ;; '~ksh') .sh.value=/usr/local/src/ksh93/ksh ;; esac } .sh.tilde.get() { case ${.sh.tilde} in '~tmp') .sh.value=${XDG_RUNTIME_DIR:-${TMPDIR:-/tmp}} ;; '~doc') .sh.value=~/Documents ;; '~ksh') .sh.value=/usr/local/src/ksh93/ksh ;; esac } src/cmd/ksh93/include/variables.h, src/cmd/ksh93/data/variables.c: - Add SH_TILDENOD for a new ${.sh.tilde} predefined variable. It is initially unset. src/cmd/ksh93/sh/macro.c: - sh_btilde(): Removed. - tilde_expand2(): Rewritten. I started out with the tiny version of this function from the 2002-06-28 version of ksh. It uses the stack instead of sfio, which is more efficient. A bugfix for $HOME == '/' was retrofitted so that ~/foo does not become //foo instead of /foo. The rest is entirely new code. To implement the override functionality, it now checks if ${.sh.tilde} has any discipline function associated with it. If it does, it assigns the tilde expression to ${.sh.tilde} using nv_putval(), triggering the .set discipline, and then reads it back using nv_getval(), triggering the .get discipline. The resulting value is used if it is nonempty and does not still start with a tilde. src/cmd/ksh93/bltins/typeset.c, src/cmd/ksh93/tests/builtins.sh: - Since ksh no longer adds a dummy '.sh.tilde' builtin, remove the ad-hoc hack that suppressed it from the output of 'builtin'. src/cmd/ksh93/tests/tilde.sh: - Add tests verifying everything I can think of, as well as tests for bugs found and fixed during this rewrite. src/cmd/ksh93/tests/pty.sh: - Add test verifying that the .sh.tilde.set() discipline does not modify the exit status value ($?) when performing tilde expansion as part of tab completion. src/cmd/ksh93/sh.1: - Instead of "tilde substitution", call the basic mechanism "tilde expansion", which is the term used everywhere else (including the 1995 Bolsky/Korn ksh book). - Document the new override feature. Resolves: https://github.com/ksh93/ksh/issues/217 --- NEWS | 5 ++ src/cmd/ksh93/TYPES | 2 +- src/cmd/ksh93/bltins/typeset.c | 2 +- src/cmd/ksh93/data/variables.c | 1 + src/cmd/ksh93/include/variables.h | 3 +- src/cmd/ksh93/sh.1 | 70 +++++++++++++++++++++------- src/cmd/ksh93/sh/macro.c | 76 +++++++++++------------------- src/cmd/ksh93/tests/builtins.sh | 3 -- src/cmd/ksh93/tests/pty.sh | 26 +++++++++-- src/cmd/ksh93/tests/tilde.sh | 77 ++++++++++++++++++++++++++++++- src/cmd/ksh93/tests/variables.sh | 1 + 11 files changed, 190 insertions(+), 76 deletions(-) diff --git a/NEWS b/NEWS index ebb10b1ad..3001529d3 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,11 @@ Any uppercase BUG_* names are modernish shell bug IDs. 2021-03-16: +- Tilde expansion can now be extended or modified by defining a .sh.tilde.get + or .sh.tilde.set discipline function. This replaces a 2004 undocumented + attempt to add this functionality via a .sh.tilde built-in, which never + worked and crashed the shell. See the manual for details on the new method. + - Fixed a bug in interactive shells: if a variable used by the shell called a discipline function (such as PS1.get() or COLUMNS.set()), the value of $? was set to the exit status of the discipline function instead of the last diff --git a/src/cmd/ksh93/TYPES b/src/cmd/ksh93/TYPES index 5b00a686b..8a02131fa 100644 --- a/src/cmd/ksh93/TYPES +++ b/src/cmd/ksh93/TYPES @@ -17,7 +17,7 @@ To define a type, use where definition contains assignment commands, declaration commands, and function definitions. A declaration command (for example typeset, readonly, and export), is a built-in that differs from other builtins in -that tilde substitution is performed on arguments after an =, assignments +that tilde expansion is performed on arguments after an =, assignments do not have to precede the command name, and field splitting and pathname expansion is not performed on the arguments. For example, diff --git a/src/cmd/ksh93/bltins/typeset.c b/src/cmd/ksh93/bltins/typeset.c index 76851d5e2..e2e729d03 100644 --- a/src/cmd/ksh93/bltins/typeset.c +++ b/src/cmd/ksh93/bltins/typeset.c @@ -1373,7 +1373,7 @@ static int print_namval(Sfio_t *file,register Namval_t *np,register int flag, st return(0); if(nv_isattr(np,NV_NOPRINT|NV_INTEGER)==NV_NOPRINT) { - if(is_abuiltin(np) && strcmp(np->nvname,".sh.tilde")) + if(is_abuiltin(np)) sfputr(file,nv_name(np),'\n'); return(0); } diff --git a/src/cmd/ksh93/data/variables.c b/src/cmd/ksh93/data/variables.c index a029e3bd5..8c8f0032e 100644 --- a/src/cmd/ksh93/data/variables.c +++ b/src/cmd/ksh93/data/variables.c @@ -102,6 +102,7 @@ const struct shtable2 shtab_variables[] = ".sh.math", 0, (char*)0, ".sh.pool", 0, (char*)0, ".sh.pid", NV_INTEGER|NV_NOFREE, (char*)0, + ".sh.tilde", 0, (char*)0, "SHLVL", NV_INTEGER|NV_NOFREE|NV_EXPORT, (char*)0, #if SHOPT_MULTIBYTE "CSWIDTH", 0, (char*)0, diff --git a/src/cmd/ksh93/include/variables.h b/src/cmd/ksh93/include/variables.h index 76c409159..3320fa41b 100644 --- a/src/cmd/ksh93/include/variables.h +++ b/src/cmd/ksh93/include/variables.h @@ -91,6 +91,7 @@ #define SH_MATHNOD (shgd->bltin_nodes+61) #define SH_JOBPOOL (shgd->bltin_nodes+62) #define SH_PIDNOD (shgd->bltin_nodes+63) -#define SHLVL (shgd->bltin_nodes+64) +#define SH_TILDENOD (shgd->bltin_nodes+64) +#define SHLVL (shgd->bltin_nodes+65) #endif /* SH_VALNOD */ diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index 2a619d8a3..007070e4f 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -782,11 +782,11 @@ Preset aliases can be unset or redefined. .B "r=\(fmhist \-s\(fm" .PD .RE -.SS Tilde Substitution. +.SS Tilde Expansion. After alias substitution is performed, each word is checked to see if it begins with an unquoted .BR \(ap . -For tilde substitution, +For tilde expansion, .I word\^ also refers to the .I word\^ @@ -794,10 +794,12 @@ portion of parameter expansion (see .I "Parameter Expansion\^" below). -If it does, then the word up to a +If a +.I word\^ +is preceded by a tilde, then it is checked up to a .B / -is checked to see if it matches a user name in the -password database (See +to see if it matches a user name in the +password database (see .IR getpwname (3).) If a match is found, the .B \(ap @@ -810,26 +812,32 @@ by itself, or in front of a .BR / , is replaced by .SM -.BR $HOME . +.BR $HOME , +unless the +.B HOME +variable is unset, in which case +the current user's home directory as configured in the operating system +is used. A .B \(ap followed by a .B + or .B \- -is replaced by the value of +is replaced by .B .SM $PWD -and +or .B .SM $OLDPWD respectively. .PP In addition, when expanding a -.IR "variable assignment" , -.I tilde -substitution is attempted when +variable assignment (see +.I Variable Assignments +above), +tilde expansion is attempted when the value of the assignment begins with a .BR \(ap , @@ -837,11 +845,41 @@ and when a .B \(ap appears after a .BR : . -The +A .B : -also terminates a -.B \(ap -login name. +also terminates a user name following a +.BR \(ap . +.PP +The tilde expansion mechanism may be extended or modified +by defining one of the discipline functions +.B .sh.tilde.set +or +.B .sh.tilde.get +(see +.I Functions +and +.I Discipline Functions +below). +If either exists, +then upon encountering a tilde word to expand, +that function is called with the tilde word assigned to either +.B .sh.value +(for the +.B .sh.tilde.set +function) or +.B .sh.tilde +(for the +.B .sh.tilde.get +function). +Performing tilde expansion within a discipline function will not recursively +call that function, but default tilde expansion remains active, +so literal tildes should still be quoted where required. +Either function may assign a replacement string to +.BR .sh.value . +If this value is non-empty and does not start with a +.BR \(ap , +it replaces the default tilde expansion when the function terminates. +Otherwise, the tilde expansion is left unchanged. .SS Command Substitution. The standard output from a command list enclosed in parentheses preceded by a dollar sign ( @@ -5549,7 +5587,7 @@ Commands that are preceded by a \(dd symbol below are Any following words that are in the format of a variable assignment are expanded with the same rules as a variable assignment. -This means that tilde substitution is performed after the +This means that tilde expansion is performed after the .B = sign, array assignments of the form \f2varname\^\fP\f3=(\fP\f2assign_list\^\fP\f3)\fP diff --git a/src/cmd/ksh93/sh/macro.c b/src/cmd/ksh93/sh/macro.c index 11c8d1c08..650371197 100644 --- a/src/cmd/ksh93/sh/macro.c +++ b/src/cmd/ksh93/sh/macro.c @@ -2619,64 +2619,44 @@ static int charlen(const char *string,int len) } } -/* - * This is the default tilde discipline function - */ -static int sh_btilde(int argc, char *argv[], Shbltin_t *context) -{ - Shell_t *shp = context->shp; - char *cp = sh_tilde(shp,argv[1]); - NOT_USED(argc); - if(!cp) - cp = argv[1]; - sfputr(sfstdout, cp, '\n'); - return(0); -} - /* * is byte offset for beginning of tilde string */ static void tilde_expand2(Shell_t *shp, register int offset) { - char shtilde[10], *av[3], *ptr=stkfreeze(shp->stk,1); - Sfio_t *iop, *save=sfstdout; - Namval_t *np; - static int beenhere=0; - strcpy(shtilde,".sh.tilde"); - np = nv_open(shtilde,shp->fun_tree, NV_VARNAME|NV_NOARRAY|NV_NOASSIGN|NV_NOFAIL); - if(np && !beenhere) + char *cp = NIL(char*); /* character pointer for tilde expansion result */ + char *stakp = stakptr(0); /* current stack object (&stakp[offset] is tilde string) */ + int curoff = staktell(); /* current offset of current stack object */ + static char block; /* for disallowing tilde expansion in .get/.set to change ${.sh.tilde} */ + /* + * Allow overriding tilde expansion with a .sh.tilde.set or .get discipline function. + */ + if(!block && SH_TILDENOD->nvfun && SH_TILDENOD->nvfun->disc) { - beenhere = 1; - sh_addbuiltin(shtilde,sh_btilde,0); - nv_onattr(np,NV_EXPORT); + stakfreeze(1); /* terminate current stack object to avoid data corruption */ + block++; + nv_putval(SH_TILDENOD, &stakp[offset], 0); + cp = nv_getval(SH_TILDENOD); + block--; + if(cp[0]=='\0' || cp[0]=='~') + cp = NIL(char*); /* do not use empty or unexpanded result */ + stakset(stakp,curoff); /* restore stack to state on function entry */ } - av[0] = ".sh.tilde"; - av[1] = &ptr[offset]; - av[2] = 0; - iop = sftmp((IOBSIZE>PATH_MAX?IOBSIZE:PATH_MAX)+1); - sfset(iop,SF_READ,0); - sfstdout = iop; - if(np) - sh_fun(np, (Namval_t*)0, av); - else - sh_btilde(2, av, &shp->bltindata); - sfstdout = save; - stkset(shp->stk,ptr, offset); - sfseek(iop,(Sfoff_t)0,SEEK_SET); - sfset(iop,SF_READ,1); - if(ptr = sfreserve(iop, SF_UNBOUND, -1)) + /* + * Perform default tilde expansion unless overridden. + * Write the result to the stack, if any. + */ + stakputc(0); + if(!cp) + cp = sh_tilde(shp,&stakp[offset]); + if(cp) { - Sfoff_t n = sfvalue(iop); - while(ptr[n-1]=='\n') - n--; - if(n==1 && fcpeek(0)=='/' && ptr[n-1]) - n--; - if(n) - sfwrite(shp->stk,ptr,n); + stakseek(offset); + if(!(cp[0]=='/' && !cp[1] && fcpeek(0)=='/')) + stakputs(cp); /* for ~ == /, avoid ~/foo -> //foo */ } else - sfputr(shp->stk,av[1],0); - sfclose(iop); + stakseek(curoff); } /* diff --git a/src/cmd/ksh93/tests/builtins.sh b/src/cmd/ksh93/tests/builtins.sh index 297be3928..1cc4b028c 100755 --- a/src/cmd/ksh93/tests/builtins.sh +++ b/src/cmd/ksh93/tests/builtins.sh @@ -689,9 +689,6 @@ v=$($SHELL 2> /dev/null +o rc -ic $'getopts a:bc: opt --man\nprint $?') read baz <<< 'foo\\\\bar' [[ $baz == 'foo\\bar' ]] || err_exit 'read of foo\\\\bar not getting foo\\bar' -: ~root -[[ $(builtin) == *.sh.tilde* ]] && err_exit 'builtin contains .sh.tilde' - # ====== # Check that I/O errors are detected actual=$( diff --git a/src/cmd/ksh93/tests/pty.sh b/src/cmd/ksh93/tests/pty.sh index e34901c68..b5c013482 100755 --- a/src/cmd/ksh93/tests/pty.sh +++ b/src/cmd/ksh93/tests/pty.sh @@ -40,8 +40,6 @@ Darwin | FreeBSD | Linux ) exit 0 ;; esac -integer lineno=1 - # On some systems, the stty command does not appear to work correctly on a pty pseudoterminal. # To avoid false regressions, we have to set 'erase' and 'kill' on the real terminal. if test -t 0 2>/dev/null /dev/null || err_exit 'tilde fails inside a script run by name' +# ====== +# Tilde expansion should not change the value of $HOME. + +HOME=/ +: ~/foo +[[ $HOME == / ]] || err_exit "tilde expansion changes \$HOME (value: $(printf %q "$HOME"))" + # ====== # After unsetting HOME, ~ should expand to the current user's OS-configured home directory. + unset HOME +exp=~${ id -un; } got=~ -[[ $got == /* && -d $got ]] || err_exit "expansion of bare tilde breaks after unsetting HOME (value: $(printf %q "$got"))" -HOME=$tmp +[[ $got == "$exp" ]] || err_exit 'expansion of bare tilde breaks after unsetting HOME' \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" +HOME=$saveHOME + +# ====== +# Tilde expansion discipline function tests + +# This nonfunctional mess was removed in ksh 93u+m ... +if builtin .sh.tilde 2>/dev/null +then got=$(.sh.tilde & wait "$!" 2>&1) + ((!(e = $?))) || err_exit ".sh.tilde builtin crashes the shell" \ + "(got status $e$( ((e>128)) && print -n / && kill -l "$e"), $(printf %q "$got"))" +fi + +# ... and replaced by a proper use of discipline functions that allows customising tilde expansion. +((.sh.version >= 20210316)) && +for disc in get set +do ( + ulimit -t unlimited 2>/dev/null # fork subshell to cope with a possible crash + + eval ".sh.tilde.$disc() + { + case \${.sh.${ [[ $disc == get ]] && print tilde || print value; }} in + '~tmp') .sh.value=\$tmp ;; + '~INC') .sh.value=\$((++i)) ;; + '~spc') .sh.value=$'one\ttwo three\n\tfour' ;; + '~') .sh.value=~/addition ;; # this should not recurse + esac + }" + + got=~/foo + exp=$HOME/addition/foo + [[ $got == "$exp" ]] || err_exit "$disc discipline: bare tilde expansion:" \ + "expected $(printf %q "$exp"), got $(printf %q "$got")" + + .sh.tilde=oldvalue + got=$(print ~tmp/foo.$$; print "${.sh.tilde}") + exp=$tmp/foo.$$$'\n'$tmp + [[ $got == "$exp" ]] || err_exit "$disc discipline: result left in \${.sh.tilde}:" \ + "expected $(printf %q "$tmp"), got $(printf %q "${.sh.tilde}")" + [[ ${.sh.tilde} == oldvalue ]] || err_exit "$disc discipline: \${.sh.tilde} subshell leak" + + i=0 + set -- ~INC ~INC ~INC ~INC ~INC + got=$#,$1,$2,$3,$4,$5 + exp=5,1,2,3,4,5 + [[ $got == "$exp" ]] || err_exit "$disc discipline: counter:" \ + "expected $(printf %q "$exp"), got $(printf %q "$got")" + ((i==5)) || err_exit "$disc discipline: counter: $i != 5" + + set -- ~spc ~spc ~spc + got=$#,$1,$2,$3 + exp=$'3,one\ttwo three\n\tfour,one\ttwo three\n\tfour,one\ttwo three\n\tfour' + [[ $got == "$exp" ]] || err_exit "$disc discipline: quoting of whitespace:" \ + "expected $(printf %q "$exp"), got $(printf %q "$got")" + + print "$Errors" >$tmp/Errors + ) & + wait "$!" 2>crashmsg + if ((!(e = $?))) + then read Errors <$tmp/Errors + else err_exit ".sh.tilde.$disc discipline function crashes the shell" \ + "(got status $e$( ((e>128)) && print -n / && kill -l "$e"), $(printf %q "$(