1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-15 04:32:24 +00:00

Tweak and regress-test 'command -x' (re: 66e1d446)

Turns out the assumption I was operating on, that Linux and macOS
align arguments on 32 or 64 bit boundaries, is incorrect -- they
just need some extra bytes per argument. So we can use a bit more
of the arguments buffer on these systems than I thought.

src/cmd/ksh93/features/externs:
- Change the feature test to simply detect the # of extra bytes per
  argument needed. On *BSD and commercial Unices, ARG_EXTRA_BYTES
  shows as zero; on Linux and macOS (64-bit), this yields 8. On
  Linux (32-bit), this yields 4.

src/cmd/ksh93/sh/path.c: path_xargs():
- Do not try to calculate alignment, just add ARG_EXTRA_BYTES to
  each argument.
- Also add this when substracting the length of environment
  variables and leading and trailing static command arguments.

src/cmd/ksh93/tests/path.sh:
- Test command -v/-V with -x.
- Add a robust regression test for command -x.

src/cmd/ksh93/data/builtins.c, src/cmd/ksh93/sh.1:
- Tweak docs. Glob patterns also expand to multiple words.
This commit is contained in:
Martijn Dekker 2021-02-01 00:28:18 +00:00
parent f37098f177
commit 6a0e9a1a75
6 changed files with 103 additions and 44 deletions

View file

@ -487,13 +487,14 @@ const char sh_optcommand[] =
"[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]"
"[x?Search \acmd\a as an external command, bypassing built-ins. "
"If the \aarg\as include a word "
"such as \b\"$@\"\b or \b\"${array[@]]}\"\b "
"such as \b\"$@\"\b or \b\"*.txt\"\b "
"that expands to multiple arguments, "
"and the size of the expanded \aarg\a list "
"exceeds \bgetconf ARG_MAX\b bytes, "
"then \acmd\a will be run multiple times, "
"dividing the \aarg\as over the invocations. "
"Any \aarg\as that come before the first \b\"$@\"\b or similar, "
"Any \aarg\as that come before the first "
"word that expands to multiple arguments, "
"as well as any that follow the last such word, "
"are considered static and will be repeated for each invocation "
"so as to allow all invocations to use the same command options. "

View file

@ -12,18 +12,13 @@ extern nice int (int)
extern setreuid int (uid_t,uid_t)
extern setregid int (gid_t,gid_t)
tst note{ determining data alignment factor for arguments list }end output{
tst note{ determining extra bytes per argument for arguments list }end output{
/*
* Feature test to figure out if this OS does data alignment on
* the arguments list of a process, and if so, at how many bits.
* Outputs an appropriate #define ARG_ALIGN_BITS.
* Figure out if this OS requires extra bytes per argument
* in the arguments list of a process.
* Outputs an appropriate #define ARG_EXTRA_BYTES.
* Without this, 'command -x' failed with E2BIG on macOS and Linux even
* if all the arguments should fit in ARG_MAX based on their length.
*
* Strategy: first try to fill as many single-character arguments as
* should fit in ARG_MAX without alignment. If that fails with E2BIG,
* then start with a 2-byte alignment factor and keep doubling it
* until we either succeed or exceed an absurdly large value.
*/
/* AST includes */
@ -48,13 +43,13 @@ tst note{ determining data alignment factor for arguments list }end output{
int main(int argc,char *argv[])
{
int align_bytes = 0, envlen = 0, argmax, i;
int extra_bytes = 0, envlen = 0, argmax, i;
pid_t childpid;
error_info.id="args list aligment test (parent)";
error_info.id="ARG_EXTRA_BYTES test (parent)";
for(i=0; environ[i]; i++)
envlen += strlen(environ[i]) + 1;
argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - envlen - 1024;
argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - 2 * envlen;
if (argmax < 2048)
{
error(ERROR_ERROR|2, "argmax too small");
@ -67,7 +62,7 @@ tst note{ determining data alignment factor for arguments list }end output{
/* child */
int bytec;
error_info.id="args list aligment test (child)";
error_info.id="ARG_EXTRA_BYTES test (child)";
argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*));
argc = bytec = 0;
while(bytec < argmax)
@ -78,9 +73,8 @@ tst note{ determining data alignment factor for arguments list }end output{
argv[argc] = "true";
else
argv[argc] = "x";
bytec += strlen(argv[argc]) + 1 + align_bytes;
if(align_bytes)
bytec += bytec % align_bytes;
/* also add 1 for terminating zero byte */
bytec += strlen(argv[argc]) + 1 + extra_bytes;
argc++;
}
argv[argc] = (char*)0;
@ -114,11 +108,8 @@ tst note{ determining data alignment factor for arguments list }end output{
}
if (exitstatus == 0)
break; /* yay :) */
if (!align_bytes)
align_bytes = 2;
else
align_bytes *= 2;
if (align_bytes > 256)
extra_bytes++;
if (extra_bytes > 256)
{
error(ERROR_ERROR|2, "giving up");
return 1;
@ -126,10 +117,10 @@ tst note{ determining data alignment factor for arguments list }end output{
}
}
sfprintf(sfstdout,
"#define ARG_ALIGN_BYTES\t%d\t/* data alignment factor for arguments list */\n",
align_bytes);
"#define ARG_EXTRA_BYTES\t%d\t/* extra bytes per argument for arguments list */\n",
extra_bytes);
return 0;
}
}end fail{
echo "#define ARG_ALIGN_BYTES 16 /* BUG: arg list alignment factor test failed; assuming 16 */"
echo "#define ARG_EXTRA_BYTES 8 /* BUG: test failed; assuming 8 */"
}end

View file

@ -20,7 +20,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2021-01-30" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2021-01-31" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -5847,16 +5847,16 @@ The
option runs
.I name\^
as an external command, bypassing built-ins.
If the arguments contain a word that expands to multiple arguments, such as
\f3"$@"\fP or \f3"${array[@]}"\fP, then the
If the arguments contain at least one word that expands to multiple arguments,
such as \f3"$@"\fP or \f3*.txt\fP, then the
.B \-x
option also allows executing external commands with argument lists that are
longer than the operating system allows. This functionality is similar to
.BR xargs (1)
but is easier to use. The shell does this by invoking the external command
multiple times if needed, dividing the expanded argument list over the
invocations. Any arguments that come before the first \f3"$@"\fP or similar
expansion, as well as any that follow the last \f3"$@"\fP or similar, are
invocations. Any arguments that come before the first word that expands to
multiple arguments, as well as any that follow the last such word, are
considered static arguments and are repeated for each invocation. This allows
each invocation to use the same command options, as well as the same trailing
destination arguments for commands like
@ -5869,8 +5869,8 @@ exits with the status of the invocation that had the highest exit status.
(Note that
.B "command \-x"
may still fail with an "argument list too long" error if a single argument
exceeds the maximum length of the argument list, or if no \f3"$@"\fP or
similar expansion was used.)
exceeds the maximum length of the argument list, or if a long arguments
list contains no word that expands to multiple arguments.)
.TP
\(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|.
Causes each

View file

@ -164,23 +164,20 @@ static pid_t path_xargs(Shell_t *shp,const char *path, char *argv[],char *const
return((pid_t)-1);
size = shp->gd->lim.arg_max-2048;
for(ev=envp; cp= *ev; ev++)
size -= strlen(cp)+1;
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++)
size -= strlen(cp)+1;
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
for(av=avlast; cp= *av; av++,nlast++)
size -= strlen(cp)+1;
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
av = &argv[shp->xargmin];
if(!spawn)
job_clear();
shp->exitval = 0;
while(av<avlast)
{
/* for each argument, account for terminating zero and possible alignment */
/* for each argument, account for terminating zero and possible extra bytes */
for(xv=av,left=size; left>0 && av<avlast;)
{
n = strlen(*av++) + 1 + ARG_ALIGN_BYTES;
left -= n + (ARG_ALIGN_BYTES ? n % ARG_ALIGN_BYTES : 0);
}
left -= strlen(*av++) + 1 + ARG_EXTRA_BYTES;
/* leave at least two for last */
if(left<0 && (avlast-av)<2)
av--;

View file

@ -489,17 +489,87 @@ EOF
print 'echo "wrong path used"' > $tmp/ls
chmod +x $tmp/ls
expect=/dev/null
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null)
actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null)
[[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization
actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p ls /dev/null; exit) # the 'exit' disables subshell optimization
[[ $actual == "$expect" ]] || err_exit "'command -p' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
expect=$(builtin getconf; PATH=$(getconf PATH); whence -p ls)
actual=$(PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls)
actual=$(set +x; PATH=$tmp; redirect 2>&1; hash ls; command -p -v ls)
[[ $actual == "$expect" ]] || err_exit "'command -p -v' fails to search default path if tracked alias exists" \
"(expected $(printf %q "$expect"), got $(printf %q "$actual"))"
# Check that adding '-x' makes '-v'/'-V' look up external commands
if ((.sh.version >= 20210130))
then
exp=$tmp/echo
touch "$exp"
chmod +x "$exp"
got=$(PATH=$tmp; command -vx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -v -x' failed to look up external command in \$PATH" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp="echo is a tracked alias for $exp"
got=$(PATH=$tmp; command -Vx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -V -x' failed to look up external command in \$PATH" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp=$(PATH=${ getconf PATH 2>/dev/null; }; whence -p echo)
got=$(PATH=$tmp; command -pvx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -p -v -x' failed to look up external command in default path" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
exp="echo is $exp"
got=$(PATH=$tmp; command -pVx echo 2>&1)
[[ $got == "$exp" ]] || err_exit "'command -p -V -x' failed to look up external command in default path" \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
fi
# 'command -x' used to hang in an endless E2BIG loop on Linux and macOS
ofile=$tmp/command_x_chunks.sh
trap 'sleep_pid=; while kill -9 $pid; do :; done 2>/dev/null; err_exit "'\''command -x'\'' hung"' TERM
{ sleep 15; kill $$; } &
sleep_pid=$!
(
export LC_ALL=C
unset IFS; set +f
builtin getconf && arg_max=$(getconf ARG_MAX) && let arg_max || { err_exit "getconf ARG_MAX not working"; exit 1; }
set +x # trust me, you don't want to xtrace what follows
# let's try to use a good variety of argument lengths
set -- $(typeset -p) $(functions) /dev/* /tmp/* /* *
args=$*
while let "${#args} < 3 * arg_max"
do set -- "$RANDOM" "$@" "$RANDOM" "$@" "$RANDOM"
args=$*
done
print "chunks=0 expargs=$# args=0 expsize=$((${#args}+1)) size=0"
unset args
command -px awk 'BEGIN {
ARGC -= 2; # final static args
for (i=1; i<ARGC; i++)
size += length(ARGV[i]) + 1;
print ("let chunks++ args+=")(ARGC - 1)(" size+=")(size);
if(ARGV[ARGC] != "static_arg_1" || ARGV[ARGC+1] != "final_static_arg_2")
print "err_exit \"'\''command -x'\'': static final arguments for chunk $chunks incorrect\"";
}' "$@" static_arg_1 final_static_arg_2
) >$ofile &
pid=$!
wait $pid
e=$?
trap - TERM
[[ $sleep_pid ]] && kill $sleep_pid
if let "e > 0"
then err_exit "'command -x' test yielded exit status $e$( let "e>128" && print -n / && kill -l "$e")"
fi
if [[ ! -s $ofile ]]
then err_exit "'command -x' test failed to produce output"
else save_Command=$Command
Command+=": ${ofile##*/}"
. "$ofile"
Command=$save_Command
let "args == expargs && size == expsize" || err_exit "'command -x' did not correctly divide arguments" \
"(expected $expargs args of total size $expsize, got $args args of total size $size;" \
"divided in $chunks chunks)"
fi
# ======
# whence -a/-v tests