From 6a0e9a1a751a644aaf3b2dcb5ceadeb8d0f4a3f1 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Mon, 1 Feb 2021 00:28:18 +0000 Subject: [PATCH] Tweak and regress-test 'command -x' (re: 66e1d446) Turns out the assumption I was operating on, that Linux and macOS align arguments on 32 or 64 bit boundaries, is incorrect -- they just need some extra bytes per argument. So we can use a bit more of the arguments buffer on these systems than I thought. src/cmd/ksh93/features/externs: - Change the feature test to simply detect the # of extra bytes per argument needed. On *BSD and commercial Unices, ARG_EXTRA_BYTES shows as zero; on Linux and macOS (64-bit), this yields 8. On Linux (32-bit), this yields 4. src/cmd/ksh93/sh/path.c: path_xargs(): - Do not try to calculate alignment, just add ARG_EXTRA_BYTES to each argument. - Also add this when substracting the length of environment variables and leading and trailing static command arguments. src/cmd/ksh93/tests/path.sh: - Test command -v/-V with -x. - Add a robust regression test for command -x. src/cmd/ksh93/data/builtins.c, src/cmd/ksh93/sh.1: - Tweak docs. Glob patterns also expand to multiple words. --- src/cmd/ksh93/data/builtins.c | 5 ++- src/cmd/ksh93/features/externs | 39 +++++++---------- src/cmd/ksh93/include/version.h | 2 +- src/cmd/ksh93/sh.1 | 12 +++--- src/cmd/ksh93/sh/path.c | 13 +++--- src/cmd/ksh93/tests/path.sh | 76 +++++++++++++++++++++++++++++++-- 6 files changed, 103 insertions(+), 44 deletions(-) diff --git a/src/cmd/ksh93/data/builtins.c b/src/cmd/ksh93/data/builtins.c index 7e57bdce4..9080b5a43 100644 --- a/src/cmd/ksh93/data/builtins.c +++ b/src/cmd/ksh93/data/builtins.c @@ -487,13 +487,14 @@ const char sh_optcommand[] = "[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]" "[x?Search \acmd\a as an external command, bypassing built-ins. " "If the \aarg\as include a word " - "such as \b\"$@\"\b or \b\"${array[@]]}\"\b " + "such as \b\"$@\"\b or \b\"*.txt\"\b " "that expands to multiple arguments, " "and the size of the expanded \aarg\a list " "exceeds \bgetconf ARG_MAX\b bytes, " "then \acmd\a will be run multiple times, " "dividing the \aarg\as over the invocations. " - "Any \aarg\as that come before the first \b\"$@\"\b or similar, " + "Any \aarg\as that come before the first " + "word that expands to multiple arguments, " "as well as any that follow the last such word, " "are considered static and will be repeated for each invocation " "so as to allow all invocations to use the same command options. " diff --git a/src/cmd/ksh93/features/externs b/src/cmd/ksh93/features/externs index 4f58bf7e6..8f91456c0 100644 --- a/src/cmd/ksh93/features/externs +++ b/src/cmd/ksh93/features/externs @@ -12,18 +12,13 @@ extern nice int (int) extern setreuid int (uid_t,uid_t) extern setregid int (gid_t,gid_t) -tst note{ determining data alignment factor for arguments list }end output{ +tst note{ determining extra bytes per argument for arguments list }end output{ /* - * Feature test to figure out if this OS does data alignment on - * the arguments list of a process, and if so, at how many bits. - * Outputs an appropriate #define ARG_ALIGN_BITS. + * Figure out if this OS requires extra bytes per argument + * in the arguments list of a process. + * Outputs an appropriate #define ARG_EXTRA_BYTES. * Without this, 'command -x' failed with E2BIG on macOS and Linux even * if all the arguments should fit in ARG_MAX based on their length. - * - * Strategy: first try to fill as many single-character arguments as - * should fit in ARG_MAX without alignment. If that fails with E2BIG, - * then start with a 2-byte alignment factor and keep doubling it - * until we either succeed or exceed an absurdly large value. */ /* AST includes */ @@ -48,13 +43,13 @@ tst note{ determining data alignment factor for arguments list }end output{ int main(int argc,char *argv[]) { - int align_bytes = 0, envlen = 0, argmax, i; + int extra_bytes = 0, envlen = 0, argmax, i; pid_t childpid; - error_info.id="args list aligment test (parent)"; + error_info.id="ARG_EXTRA_BYTES test (parent)"; for(i=0; environ[i]; i++) envlen += strlen(environ[i]) + 1; - argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - envlen - 1024; + argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - 2 * envlen; if (argmax < 2048) { error(ERROR_ERROR|2, "argmax too small"); @@ -67,7 +62,7 @@ tst note{ determining data alignment factor for arguments list }end output{ /* child */ int bytec; - error_info.id="args list aligment test (child)"; + error_info.id="ARG_EXTRA_BYTES test (child)"; argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*)); argc = bytec = 0; while(bytec < argmax) @@ -78,9 +73,8 @@ tst note{ determining data alignment factor for arguments list }end output{ argv[argc] = "true"; else argv[argc] = "x"; - bytec += strlen(argv[argc]) + 1 + align_bytes; - if(align_bytes) - bytec += bytec % align_bytes; + /* also add 1 for terminating zero byte */ + bytec += strlen(argv[argc]) + 1 + extra_bytes; argc++; } argv[argc] = (char*)0; @@ -114,11 +108,8 @@ tst note{ determining data alignment factor for arguments list }end output{ } if (exitstatus == 0) break; /* yay :) */ - if (!align_bytes) - align_bytes = 2; - else - align_bytes *= 2; - if (align_bytes > 256) + extra_bytes++; + if (extra_bytes > 256) { error(ERROR_ERROR|2, "giving up"); return 1; @@ -126,10 +117,10 @@ tst note{ determining data alignment factor for arguments list }end output{ } } sfprintf(sfstdout, - "#define ARG_ALIGN_BYTES\t%d\t/* data alignment factor for arguments list */\n", - align_bytes); + "#define ARG_EXTRA_BYTES\t%d\t/* extra bytes per argument for arguments list */\n", + extra_bytes); return 0; } }end fail{ - echo "#define ARG_ALIGN_BYTES 16 /* BUG: arg list alignment factor test failed; assuming 16 */" + echo "#define ARG_EXTRA_BYTES 8 /* BUG: test failed; assuming 8 */" }end diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index fdfa28de4..073c261cd 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -20,7 +20,7 @@ #define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */ #define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */ -#define SH_RELEASE_DATE "2021-01-30" /* must be in this format for $((.sh.version)) */ +#define SH_RELEASE_DATE "2021-01-31" /* must be in this format for $((.sh.version)) */ #define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */ diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index dfcff5d7d..ed8236d47 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -5847,16 +5847,16 @@ The option runs .I name\^ as an external command, bypassing built-ins. -If the arguments contain a word that expands to multiple arguments, such as -\f3"$@"\fP or \f3"${array[@]}"\fP, then the +If the arguments contain at least one word that expands to multiple arguments, +such as \f3"$@"\fP or \f3*.txt\fP, then the .B \-x option also allows executing external commands with argument lists that are longer than the operating system allows. This functionality is similar to .BR xargs (1) but is easier to use. The shell does this by invoking the external command multiple times if needed, dividing the expanded argument list over the -invocations. Any arguments that come before the first \f3"$@"\fP or similar -expansion, as well as any that follow the last \f3"$@"\fP or similar, are +invocations. Any arguments that come before the first word that expands to +multiple arguments, as well as any that follow the last such word, are considered static arguments and are repeated for each invocation. This allows each invocation to use the same command options, as well as the same trailing destination arguments for commands like @@ -5869,8 +5869,8 @@ exits with the status of the invocation that had the highest exit status. (Note that .B "command \-x" may still fail with an "argument list too long" error if a single argument -exceeds the maximum length of the argument list, or if no \f3"$@"\fP or -similar expansion was used.) +exceeds the maximum length of the argument list, or if a long arguments +list contains no word that expands to multiple arguments.) .TP \(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|. Causes each diff --git a/src/cmd/ksh93/sh/path.c b/src/cmd/ksh93/sh/path.c index 72353f7af..b9d30ed0f 100644 --- a/src/cmd/ksh93/sh/path.c +++ b/src/cmd/ksh93/sh/path.c @@ -164,23 +164,20 @@ static pid_t path_xargs(Shell_t *shp,const char *path, char *argv[],char *const return((pid_t)-1); size = shp->gd->lim.arg_max-2048; for(ev=envp; cp= *ev; ev++) - size -= strlen(cp)+1; + size -= strlen(cp) + 1 + ARG_EXTRA_BYTES; for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++) - size -= strlen(cp)+1; + size -= strlen(cp) + 1 + ARG_EXTRA_BYTES; for(av=avlast; cp= *av; av++,nlast++) - size -= strlen(cp)+1; + size -= strlen(cp) + 1 + ARG_EXTRA_BYTES; av = &argv[shp->xargmin]; if(!spawn) job_clear(); shp->exitval = 0; while(av0 && av $tmp/ls chmod +x $tmp/ls expect=/dev/null -actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null) +actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null) [[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \ "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" -actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization +actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization [[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \ "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" expect=$(builtin getconf; PATH=$(getconf PATH); whence -p ls) -actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls) +actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls) [[ $actual == "$expect" ]] || err_exit "'command -p -v' fails to search default path if tracked alias exists" \ "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" +# Check that adding '-x' makes '-v'/'-V' look up external commands +if ((.sh.version >= 20210130)) +then + exp=$tmp/echo + touch "$exp" + chmod +x "$exp" + got=$(PATH=$tmp; command -vx echo 2>&1) + [[ $got == "$exp" ]] || err_exit "'command -v -x' failed to look up external command in \$PATH" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" + exp="echo is a tracked alias for $exp" + got=$(PATH=$tmp; command -Vx echo 2>&1) + [[ $got == "$exp" ]] || err_exit "'command -V -x' failed to look up external command in \$PATH" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" + exp=$(PATH=${ getconf PATH 2>/dev/null; }; whence -p echo) + got=$(PATH=$tmp; command -pvx echo 2>&1) + [[ $got == "$exp" ]] || err_exit "'command -p -v -x' failed to look up external command in default path" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" + exp="echo is $exp" + got=$(PATH=$tmp; command -pVx echo 2>&1) + [[ $got == "$exp" ]] || err_exit "'command -p -V -x' failed to look up external command in default path" \ + "(expected $(printf %q "$exp"), got $(printf %q "$got"))" +fi + +# 'command -x' used to hang in an endless E2BIG loop on Linux and macOS +ofile=$tmp/command_x_chunks.sh +trap 'sleep_pid=; while kill -9 $pid; do :; done 2>/dev/null; err_exit "'\''command -x'\'' hung"' TERM +{ sleep 15; kill $$; } & +sleep_pid=$! +( + export LC_ALL=C + unset IFS; set +f + builtin getconf && arg_max=$(getconf ARG_MAX) && let arg_max || { err_exit "getconf ARG_MAX not working"; exit 1; } + set +x # trust me, you don't want to xtrace what follows + # let's try to use a good variety of argument lengths + set -- $(typeset -p) $(functions) /dev/* /tmp/* /* * + args=$* + while let "${#args} < 3 * arg_max" + do set -- "$RANDOM" "$@" "$RANDOM" "$@" "$RANDOM" + args=$* + done + print "chunks=0 expargs=$# args=0 expsize=$((${#args}+1)) size=0" + unset args + command -px awk 'BEGIN { + ARGC -= 2; # final static args + for (i=1; i$ofile & +pid=$! +wait $pid +e=$? +trap - TERM +[[ $sleep_pid ]] && kill $sleep_pid +if let "e > 0" +then err_exit "'command -x' test yielded exit status $e$( let "e>128" && print -n / && kill -l "$e")" +fi +if [[ ! -s $ofile ]] +then err_exit "'command -x' test failed to produce output" +else save_Command=$Command + Command+=": ${ofile##*/}" + . "$ofile" + Command=$save_Command + let "args == expargs && size == expsize" || err_exit "'command -x' did not correctly divide arguments" \ + "(expected $expargs args of total size $expsize, got $args args of total size $size;" \ + "divided in $chunks chunks)" +fi + # ====== # whence -a/-v tests