1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00

Tweak and regress-test 'command -x' (re: 66e1d446)

Turns out the assumption I was operating on, that Linux and macOS
align arguments on 32 or 64 bit boundaries, is incorrect -- they
just need some extra bytes per argument. So we can use a bit more
of the arguments buffer on these systems than I thought.

src/cmd/ksh93/features/externs:
- Change the feature test to simply detect the # of extra bytes per
  argument needed. On *BSD and commercial Unices, ARG_EXTRA_BYTES
  shows as zero; on Linux and macOS (64-bit), this yields 8. On
  Linux (32-bit), this yields 4.

src/cmd/ksh93/sh/path.c: path_xargs():
- Do not try to calculate alignment, just add ARG_EXTRA_BYTES to
  each argument.
- Also add this when substracting the length of environment
  variables and leading and trailing static command arguments.

src/cmd/ksh93/tests/path.sh:
- Test command -v/-V with -x.
- Add a robust regression test for command -x.

src/cmd/ksh93/data/builtins.c, src/cmd/ksh93/sh.1:
- Tweak docs. Glob patterns also expand to multiple words.
This commit is contained in:
Martijn Dekker 2021-02-01 00:28:18 +00:00
parent f37098f177
commit 6a0e9a1a75
6 changed files with 103 additions and 44 deletions

View file

@ -487,13 +487,14 @@ const char sh_optcommand[] =
"[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]" "[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]"
"[x?Search \acmd\a as an external command, bypassing built-ins. " "[x?Search \acmd\a as an external command, bypassing built-ins. "
"If the \aarg\as include a word " "If the \aarg\as include a word "
"such as \b\"$@\"\b or \b\"${array[@]]}\"\b " "such as \b\"$@\"\b or \b\"*.txt\"\b "
"that expands to multiple arguments, " "that expands to multiple arguments, "
"and the size of the expanded \aarg\a list " "and the size of the expanded \aarg\a list "
"exceeds \bgetconf ARG_MAX\b bytes, " "exceeds \bgetconf ARG_MAX\b bytes, "
"then \acmd\a will be run multiple times, " "then \acmd\a will be run multiple times, "
"dividing the \aarg\as over the invocations. " "dividing the \aarg\as over the invocations. "
"Any \aarg\as that come before the first \b\"$@\"\b or similar, " "Any \aarg\as that come before the first "
"word that expands to multiple arguments, "
"as well as any that follow the last such word, " "as well as any that follow the last such word, "
"are considered static and will be repeated for each invocation " "are considered static and will be repeated for each invocation "
"so as to allow all invocations to use the same command options. " "so as to allow all invocations to use the same command options. "

View file

@ -12,18 +12,13 @@ extern nice int (int)
extern setreuid int (uid_t,uid_t) extern setreuid int (uid_t,uid_t)
extern setregid int (gid_t,gid_t) extern setregid int (gid_t,gid_t)
tst note{ determining data alignment factor for arguments list }end output{ tst note{ determining extra bytes per argument for arguments list }end output{
/* /*
* Feature test to figure out if this OS does data alignment on * Figure out if this OS requires extra bytes per argument
* the arguments list of a process, and if so, at how many bits. * in the arguments list of a process.
* Outputs an appropriate #define ARG_ALIGN_BITS. * Outputs an appropriate #define ARG_EXTRA_BYTES.
* Without this, 'command -x' failed with E2BIG on macOS and Linux even * Without this, 'command -x' failed with E2BIG on macOS and Linux even
* if all the arguments should fit in ARG_MAX based on their length. * if all the arguments should fit in ARG_MAX based on their length.
*
* Strategy: first try to fill as many single-character arguments as
* should fit in ARG_MAX without alignment. If that fails with E2BIG,
* then start with a 2-byte alignment factor and keep doubling it
* until we either succeed or exceed an absurdly large value.
*/ */
/* AST includes */ /* AST includes */
@ -48,13 +43,13 @@ tst note{ determining data alignment factor for arguments list }end output{
int main(int argc,char *argv[]) int main(int argc,char *argv[])
{ {
int align_bytes = 0, envlen = 0, argmax, i; int extra_bytes = 0, envlen = 0, argmax, i;
pid_t childpid; pid_t childpid;
error_info.id="args list aligment test (parent)"; error_info.id="ARG_EXTRA_BYTES test (parent)";
for(i=0; environ[i]; i++) for(i=0; environ[i]; i++)
envlen += strlen(environ[i]) + 1; envlen += strlen(environ[i]) + 1;
argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - envlen - 1024; argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - 2 * envlen;
if (argmax < 2048) if (argmax < 2048)
{ {
error(ERROR_ERROR|2, "argmax too small"); error(ERROR_ERROR|2, "argmax too small");
@ -67,7 +62,7 @@ tst note{ determining data alignment factor for arguments list }end output{
/* child */ /* child */
int bytec; int bytec;
error_info.id="args list aligment test (child)"; error_info.id="ARG_EXTRA_BYTES test (child)";
argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*)); argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*));
argc = bytec = 0; argc = bytec = 0;
while(bytec < argmax) while(bytec < argmax)
@ -78,9 +73,8 @@ tst note{ determining data alignment factor for arguments list }end output{
argv[argc] = "true"; argv[argc] = "true";
else else
argv[argc] = "x"; argv[argc] = "x";
bytec += strlen(argv[argc]) + 1 + align_bytes; /* also add 1 for terminating zero byte */
if(align_bytes) bytec += strlen(argv[argc]) + 1 + extra_bytes;
bytec += bytec % align_bytes;
argc++; argc++;
} }
argv[argc] = (char*)0; argv[argc] = (char*)0;
@ -114,11 +108,8 @@ tst note{ determining data alignment factor for arguments list }end output{
} }
if (exitstatus == 0) if (exitstatus == 0)
break; /* yay :) */ break; /* yay :) */
if (!align_bytes) extra_bytes++;
align_bytes = 2; if (extra_bytes > 256)
else
align_bytes *= 2;
if (align_bytes > 256)
{ {
error(ERROR_ERROR|2, "giving up"); error(ERROR_ERROR|2, "giving up");
return 1; return 1;
@ -126,10 +117,10 @@ tst note{ determining data alignment factor for arguments list }end output{
} }
} }
sfprintf(sfstdout, sfprintf(sfstdout,
"#define ARG_ALIGN_BYTES\t%d\t/* data alignment factor for arguments list */\n", "#define ARG_EXTRA_BYTES\t%d\t/* extra bytes per argument for arguments list */\n",
align_bytes); extra_bytes);
return 0; return 0;
} }
}end fail{ }end fail{
echo "#define ARG_ALIGN_BYTES 16 /* BUG: arg list alignment factor test failed; assuming 16 */" echo "#define ARG_EXTRA_BYTES 8 /* BUG: test failed; assuming 8 */"
}end }end

View file

@ -20,7 +20,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */ #define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */ #define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2021-01-30" /* must be in this format for $((.sh.version)) */ #define SH_RELEASE_DATE "2021-01-31" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK #define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */ /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -5847,16 +5847,16 @@ The
option runs option runs
.I name\^ .I name\^
as an external command, bypassing built-ins. as an external command, bypassing built-ins.
If the arguments contain a word that expands to multiple arguments, such as If the arguments contain at least one word that expands to multiple arguments,
\f3"$@"\fP or \f3"${array[@]}"\fP, then the such as \f3"$@"\fP or \f3*.txt\fP, then the
.B \-x .B \-x
option also allows executing external commands with argument lists that are option also allows executing external commands with argument lists that are
longer than the operating system allows. This functionality is similar to longer than the operating system allows. This functionality is similar to
.BR xargs (1) .BR xargs (1)
but is easier to use. The shell does this by invoking the external command but is easier to use. The shell does this by invoking the external command
multiple times if needed, dividing the expanded argument list over the multiple times if needed, dividing the expanded argument list over the
invocations. Any arguments that come before the first \f3"$@"\fP or similar invocations. Any arguments that come before the first word that expands to
expansion, as well as any that follow the last \f3"$@"\fP or similar, are multiple arguments, as well as any that follow the last such word, are
considered static arguments and are repeated for each invocation. This allows considered static arguments and are repeated for each invocation. This allows
each invocation to use the same command options, as well as the same trailing each invocation to use the same command options, as well as the same trailing
destination arguments for commands like destination arguments for commands like
@ -5869,8 +5869,8 @@ exits with the status of the invocation that had the highest exit status.
(Note that (Note that
.B "command \-x" .B "command \-x"
may still fail with an "argument list too long" error if a single argument may still fail with an "argument list too long" error if a single argument
exceeds the maximum length of the argument list, or if no \f3"$@"\fP or exceeds the maximum length of the argument list, or if a long arguments
similar expansion was used.) list contains no word that expands to multiple arguments.)
.TP .TP
\(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|. \(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|.
Causes each Causes each

View file

@ -164,23 +164,20 @@ static pid_t path_xargs(Shell_t *shp,const char *path, char *argv[],char *const
return((pid_t)-1); return((pid_t)-1);
size = shp->gd->lim.arg_max-2048; size = shp->gd->lim.arg_max-2048;
for(ev=envp; cp= *ev; ev++) for(ev=envp; cp= *ev; ev++)
size -= strlen(cp)+1; size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++) for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++)
size -= strlen(cp)+1; size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
for(av=avlast; cp= *av; av++,nlast++) for(av=avlast; cp= *av; av++,nlast++)
size -= strlen(cp)+1; size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
av = &argv[shp->xargmin]; av = &argv[shp->xargmin];
if(!spawn) if(!spawn)
job_clear(); job_clear();
shp->exitval = 0; shp->exitval = 0;
while(av<avlast) while(av<avlast)
{ {
/* for each argument, account for terminating zero and possible alignment */ /* for each argument, account for terminating zero and possible extra bytes */
for(xv=av,left=size; left>0 && av<avlast;) for(xv=av,left=size; left>0 && av<avlast;)
{ left -= strlen(*av++) + 1 + ARG_EXTRA_BYTES;
n = strlen(*av++) + 1 + ARG_ALIGN_BYTES;
left -= n + (ARG_ALIGN_BYTES ? n % ARG_ALIGN_BYTES : 0);
}
/* leave at least two for last */ /* leave at least two for last */
if(left<0 && (avlast-av)<2) if(left<0 && (avlast-av)<2)
av--; av--;

View file

@ -489,17 +489,87 @@ EOF
print 'echo "wrong path used"' > $tmp/ls print 'echo "wrong path used"' > $tmp/ls
chmod +x $tmp/ls chmod +x $tmp/ls
expect=/dev/null expect=/dev/null
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null) actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null)
[[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \ [[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))" "(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization
[[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \ [[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))" "(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
expect=$(builtin getconf; PATH=$(getconf PATH); whence -p ls) expect=$(builtin getconf; PATH=$(getconf PATH); whence -p ls)
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls) actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls)
[[ $actual == "$expect" ]] || err_exit "'command -p -v' fails to search default path if tracked alias exists" \ [[ $actual == "$expect" ]] || err_exit "'command -p -v' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))" "(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
# Check that adding '-x' makes '-v'/'-V' look up external commands
if ((.sh.version >= 20210130))
then
exp=$tmp/echo
touch "$exp"
chmod +x "$exp"
got=$(PATH=$tmp; command -vx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -v -x' failed to look up external command in \$PATH" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp="echo is a tracked alias for $exp"
got=$(PATH=$tmp; command -Vx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -V -x' failed to look up external command in \$PATH" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp=$(PATH=${ getconf PATH 2>/dev/null; }; whence -p echo)
got=$(PATH=$tmp; command -pvx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -p -v -x' failed to look up external command in default path" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp="echo is $exp"
got=$(PATH=$tmp; command -pVx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -p -V -x' failed to look up external command in default path" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
fi
# 'command -x' used to hang in an endless E2BIG loop on Linux and macOS
ofile=$tmp/command_x_chunks.sh
trap 'sleep_pid=; while kill -9 $pid; do :; done 2>/dev/null; err_exit "'\''command -x'\'' hung"' TERM
{ sleep 15; kill $$; } &
sleep_pid=$!
(
export LC_ALL=C
unset IFS; set +f
builtin getconf && arg_max=$(getconf ARG_MAX) && let arg_max || { err_exit "getconf ARG_MAX not working"; exit 1; }
set +x # trust me, you don't want to xtrace what follows
# let's try to use a good variety of argument lengths
set -- $(typeset -p) $(functions) /dev/* /tmp/* /* *
args=$*
while let "${#args} < 3 * arg_max"
do set -- "$RANDOM" "$@" "$RANDOM" "$@" "$RANDOM"
args=$*
done
print "chunks=0 expargs=$# args=0 expsize=$((${#args}+1)) size=0"
unset args
command -px awk 'BEGIN {
ARGC -= 2; # final static args
for (i=1; i<ARGC; i++)
size += length(ARGV[i]) + 1;
print ("let chunks++ args+=")(ARGC - 1)(" size+=")(size);
if(ARGV[ARGC] != "static_arg_1" || ARGV[ARGC+1] != "final_static_arg_2")
print "err_exit \"'\''command -x'\'': static final arguments for chunk $chunks incorrect\"";
}' "$@" static_arg_1 final_static_arg_2
) >$ofile &
pid=$!
wait $pid
e=$?
trap - TERM
[[ $sleep_pid ]] && kill $sleep_pid
if let "e > 0"
then err_exit "'command -x' test yielded exit status $e$( let "e>128" && print -n / && kill -l "$e")"
fi
if [[ ! -s $ofile ]]
then err_exit "'command -x' test failed to produce output"
else save_Command=$Command
Command+=": ${ofile##*/}"
. "$ofile"
Command=$save_Command
let "args == expargs && size == expsize" || err_exit "'command -x' did not correctly divide arguments" \
"(expected $expargs args of total size $expsize, got $args args of total size $size;" \
"divided in $chunks chunks)"
fi
# ====== # ======
# whence -a/-v tests # whence -a/-v tests