From 2b9cbbbc8e4171491004aec661ddbb8733534475 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Mon, 29 Nov 2021 08:06:17 +0100 Subject: [PATCH] Fix defining types conditionally and/or in subshells (re: 8ced1daa) This commit mitigates the effects of the hack explained in the referenced commit so that dummy built-in command nodes added by the parser for declaration/assignment purposes do not leak out into the execution level, except in a relatively harmless corner case. Something like if false; then typeset -T Foo_t=(integer -i bar) fi will no longer leave a broken dummy Foo_t declaration command. The same applies to declaration commands created with enum. The corner case remaining is: $ ksh -c 'false && enum E_t=(a b c); E_t -a x=(b b a c)' ksh: E_t: not found Since the 'enum' command is not executed, this should have thrown a syntax error on the 'E_t -a' declaration: ksh: syntax error at line 1: `(' unexpected This is because the -c script is parsed entirely before being executed, so E_t is recognised as a declaration built-in at parse time. However, the 'not found' error shows that it was successfully eliminated at execution time, so the inconsistent state will no longer persist. This fix now allows another fix to be effective as well: since built-ins do not know about virtual subshells, fork a virtual subshell into a real subshell before adding any built-ins. src/cmd/ksh93/sh/parse.c: - Add a pair of functions, dcl_hactivate() and dcl_dehacktivate(), that (de)activate an internal declaration built-ins tree into which check_typedef() can pre-add dummy type declaration command nodes. A viewpath from the main built-ins tree to this internal tree is added, unifying the two for search purposes and causing new nodes to be added to the internal tree. When parsing is done, we close that viewpath. This hides those pre-added nodes at execution time. Since the parser is sometimes called recursively (e.g. for command substitutions), keep track of this and only activate and deactivate at the first level. - We also need to catch errors. This is done by setting libast's error_info.exit variable to a dcl_exit() function that tidies up and then passes control to the original (usually sh_exit()). - sh_cmd(): This is the most central function in the parser. You'd think it was sh_parse(), but $(modern)-form command substitutions use sh_dolparen() instead. Both call sh_cmd(). So let's simply add a dcl_hacktivate() call at the beginning and a dcl_deactivate() call at the end. - assign(): This function calls path_search(), which among many other things executes an FATH search, which may execute arbitrary code at parse time (!!!). So, regardless of recursion level, forcibly dehacktivate() to avoid those ugly parser side effects returning in that context. src/cmd/ksh93/bltins/enum.c: b_enum(): - Fork a virtual subshell before adding a built-in. src/cmd/ksh93/sh/xec.c: sh_exec(): - Fork a virtual subshell when detecting typeset's -T option. Improves fix to https://github.com/ksh93/ksh/issues/256 --- NEWS | 7 ++++ src/cmd/ksh93/bltins/enum.c | 6 ++- src/cmd/ksh93/data/builtins.c | 2 +- src/cmd/ksh93/include/version.h | 2 +- src/cmd/ksh93/sh.1 | 4 -- src/cmd/ksh93/sh/nvtype.c | 2 +- src/cmd/ksh93/sh/parse.c | 65 +++++++++++++++++++++++++++++---- src/cmd/ksh93/sh/xec.c | 2 + src/cmd/ksh93/tests/enum.sh | 5 +++ src/cmd/ksh93/tests/types.sh | 24 ++++++++++-- 10 files changed, 101 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 1be10d9c8..e03d5a2a7 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,13 @@ For full details, see the git log at: https://github.com/ksh93/ksh Any uppercase BUG_* names are modernish shell bug IDs. +2021-11-29: + +- Ksh no longer behaves badly when parsing a type definition command + ('typeset -T' or 'enum') without executing it or when executing it in + a subshell. Types can now safely be defined in subshells and defined + conditionally as in 'if condition; then enum ...; fi'. + 2021-11-24: - The --posix mode was amended to stop the '.' command (but not 'source') from diff --git a/src/cmd/ksh93/bltins/enum.c b/src/cmd/ksh93/bltins/enum.c index 782ecfd51..c84794385 100644 --- a/src/cmd/ksh93/bltins/enum.c +++ b/src/cmd/ksh93/bltins/enum.c @@ -21,7 +21,7 @@ #pragma prototyped #include "defs.h" -#define ENUM_ID "enum (ksh 93u+m) 2021-11-23" +#define ENUM_ID "enum (ksh 93u+m) 2021-11-29" const char sh_optenum[] = "[-?@(#)$Id: " ENUM_ID " $\n]" @@ -239,6 +239,10 @@ int b_enum(int argc, char** argv, Shbltin_t *context) error(ERROR_USAGE|2, "%s", optusage(NiL)); return 1; } +#ifndef STANDALONE + if(shp->subshell && !shp->subshare) + sh_subfork(); +#endif while(cp = *argv++) { if(!(np = nv_open(cp, (void*)0, NV_VARNAME|NV_NOADD)) || !(ap=nv_arrayptr(np)) || ap->fun || (sz=ap->nelem&(((1L<lastline) #ifndef NIL @@ -183,13 +187,10 @@ static void typeset_order(const char *str,int line) } /* - * Pre-add type declaration built-ins at parse time to avoid - * syntax errors when using -c, shcomp, '.'/source or eval. + * This function handles linting for 'typeset' options via typeset_order(). * - * This hack has a bad side effect: defining a type with 'typeset -T' or 'enum' - * in a subshell or an 'if false' block will cause an inconsistent state. But - * as these built-ins alter the syntax of the shell, it's necessary for making - * them work if we're parsing an entire script before or without executing it. + * Also, upon parsing typeset -T or enum, it pre-adds the type declaration built-ins that these would create to + * an internal tree to avoid syntax errors upon pre-execution parsing of assignment-arguments with parentheses. * * intypeset==1 for typeset & friends; intypeset==2 for enum */ @@ -250,6 +251,43 @@ static void check_typedef(struct comnod *tp, char intypeset) if(cp) nv_onattr(sh_addbuiltin(cp, (Shbltin_f)SYSTRUE->nvalue.bfp, NIL(void*)), NV_BLTIN|BLT_DCL); } +/* + * (De)activate an internal declaration built-ins tree into which check_typedef() can pre-add dummy type + * declaration command nodes, allowing correct parsing of assignment-arguments with parentheses for custom + * type declaration commands before actually executing the commands that create those commands. + * + * A viewpath from the main built-ins tree to this internal tree is added, unifying the two for search + * purposes and causing new nodes to be added to the internal tree. When parsing is done, we close that + * viewpath. This hides those pre-added nodes at execution time, avoiding an inconsistent state if a type + * creation command is parsed but not executed. + */ +static void dcl_hacktivate(void) +{ + if(!dcl_tree) + dcl_tree = dtopen(&_Nvdisc, Dtoset); + if(dcl_recursion++) + return; + dtview(sh.bltin_tree, dcl_tree); + orig_exit = error_info.exit; + error_info.exit = dcl_exit; +} +static void dcl_dehacktivate(void) +{ +#if !_AST_ksh_release + if(!dcl_recursion || !dcl_tree) + abort(); +#endif + if(--dcl_recursion) + return; + error_info.exit = orig_exit; + dtview(sh.bltin_tree, NIL(Dt_t*)); +} +static noreturn void dcl_exit(int e) +{ + dcl_dehacktivate(); + (*error_info.exit)(e); + UNREACHABLE(); +} /* * Make a parent node for fork() or io-redirection @@ -508,6 +546,7 @@ static Shnode_t *sh_cmd(Lex_t *lexp, register int sym, int flag) { register Shnode_t *left, *right; register int type = FINT|FAMP; + dcl_hacktivate(); if(sym==NL) lexp->lasttok = 0; left = list(lexp,flag); @@ -549,6 +588,7 @@ static Shnode_t *sh_cmd(Lex_t *lexp, register int sym, int flag) sh_syntax(lexp); } } + dcl_dehacktivate(); return(left); } @@ -1050,8 +1090,19 @@ static struct argnod *assign(Lex_t *lexp, register struct argnod *ap, int type) if(array && type==NV_TYPE) { struct argnod *arg = lexp->arg; + int save_recursion = dcl_recursion; + int p; + /* + * Forcibly deactivate the dummy declaration built-ins tree as path_search() does an + * FPATH search, which may cause arbitrary ksh code to be executed. Yes, at parse time. + */ n = lexp->token; - if(path_search(lexp->sh,lexp->arg->argval,NIL(Pathcomp_t**),1) && (np=nv_search(lexp->arg->argval,lexp->sh->fun_tree,0)) && nv_isattr(np,BLT_DCL)) + dcl_recursion = 1; + dcl_dehacktivate(); + p = path_search(lexp->sh,lexp->arg->argval,NIL(Pathcomp_t**),1); + dcl_hacktivate(); + dcl_recursion = save_recursion; + if(p && (np=nv_search(lexp->arg->argval,lexp->sh->fun_tree,0)) && nv_isattr(np,BLT_DCL)) { lexp->token = n; lexp->arg = arg; diff --git a/src/cmd/ksh93/sh/xec.c b/src/cmd/ksh93/sh/xec.c index 87525fc9c..8610667fb 100644 --- a/src/cmd/ksh93/sh/xec.c +++ b/src/cmd/ksh93/sh/xec.c @@ -1090,6 +1090,8 @@ int sh_exec(register const Shnode_t *t, int flags) #if SHOPT_TYPEDEF else if(argn>=3 && checkopt(com,'T')) { + if(shp->subshell && !shp->subshare) + sh_subfork(); # if SHOPT_NAMESPACE if(shp->namespace) { diff --git a/src/cmd/ksh93/tests/enum.sh b/src/cmd/ksh93/tests/enum.sh index 437752b86..d8402492d 100755 --- a/src/cmd/ksh93/tests/enum.sh +++ b/src/cmd/ksh93/tests/enum.sh @@ -155,6 +155,11 @@ got=$(eval 2>&1 'command command command enum -i -i -iii --igno -ii PARSER_t=(r exp='PARSER_t -r -A hack=([C]=g)' [[ $got == "$exp" ]] || err_exit "incorrect typeset output for enum with command prefix and options" \ "(expected $(printf %q "$exp"); got $(printf %q "$got"))" +PATH=/dev/null command -v PARSER_t >/dev/null && err_exit "PARSER_t leaked out of subshell" +if false +then enum PARSER2_t=(a b) +fi +PATH=/dev/null command -v PARSER2_t >/dev/null && err_exit "PARSER2_t incompletely defined though definition was never executed" # ====== exit $((Errors<125?Errors:125)) diff --git a/src/cmd/ksh93/tests/types.sh b/src/cmd/ksh93/tests/types.sh index 1b1c1a1d3..142f861ca 100755 --- a/src/cmd/ksh93/tests/types.sh +++ b/src/cmd/ksh93/tests/types.sh @@ -452,20 +452,24 @@ cd "$tmp" FPATH=$PWD PATH=$PWD:$PATH cat > A_t <<- \EOF + if false + then typeset -T Parser_shenanigans=(typeset -i foo) + fi typeset -T A_t=( B_t b ) EOF cat > B_t <<- \EOF + PATH=/dev/null command -v Parser_shenanigans typeset -T B_t=( integer n=5 ) EOF unset n -if n=$(FPATH=$PWD PATH=$PWD:$PATH $SHELL 2> /dev/null -c 'A_t a; print ${a.b.n}') -then (( n==5 )) || err_exit 'dynamic loading of types gives wrong result' -else err_exit 'unable to load types dynamically' +if n=$(FPATH=$PWD PATH=$PWD:$PATH "$SHELL" -c 'A_t a; print ${a.b.n}' 2>&1) +then [[ $n == '5' ]] || err_exit "dynamic loading of types gives wrong result (got $(printf %q "$n"))" +else err_exit "unable to load types dynamically (got $(printf %q "$n"))" fi # check that typeset -T reproduces a type. @@ -639,5 +643,19 @@ got=$($SHELL -c 'enum Foo_t=(foo bar); typeset -T') [[ -z $got ]] || err_exit "Types created by enum are listed with 'typeset -T'" \ "(got $(printf %q "$got"))" +# ====== +# Parser shenanigans. +if false +then typeset -T PARSER_t=(typeset name=foobar) +fi +PATH=/dev/null command -v PARSER_t >/dev/null && err_exit "PARSER_t incompletely defined though definition was never executed" + +unset v +got=$( set +x; redirect 2>&1; typeset -T Subsh_t=(typeset -i x); Subsh_t -a v=( (x=1) (x=2) (x=3) ); typeset -p v ) +exp='Subsh_t -a v=((typeset -i x=1) (typeset -i x=2) (typeset -i x=3))' +[[ $got == "$exp" ]] || err_exit "bad typeset output for Subsh_t" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" +PATH=/dev/null command -v Subsh_t >/dev/null && err_exit "Subsh_t leaked out of subshell" + # ====== exit $((Errors<125?Errors:125))