1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-13 03:32:24 +00:00

command -x: fix efficiency; always run external cmd (re: acf84e96)

This commit fixes 'command -x' to adapt to OS limitations with
regards to data alignment in the arguments list. A feature test is
added that detects if the OS aligns the argument on 32-bit or
64-bit boundaries or not at all, allowing 'command -x' to avoid
E2BIG errors while maximising efficiency.

Also, as of now, 'command -x' is a way to bypass built-ins and
run/query an external command. Built-ins do not limit the length of
their argument list, so '-x' never made sense to use for them. And
because '-x' hangs on Linux and macOS on every ksh93 release
version to date (see acf84e96), few use it, so there is little
reason not to make this change.

Finally, this fixes a longstanding bug that caused the minimum exit
status of 'command -x' to be 1 if a command with many arguments was
divided into several command invocations. This is done by replacing
broken flaggery with a new SH_XARG state flag bit.

src/cmd/ksh93/features/externs:
- Add new C feature test detecting byte alignment in args list.
  The test writes a #define ARG_ALIGN_BYTES with the amount of
  bytes the OS aligns arguments to, or zero for no alignment.

src/cmd/ksh93/include/defs.h:
- Add new SH_XARG state bit indicating 'command -x' is active.

src/cmd/ksh93/sh/path.c: path_xargs():
- Leave extra 2k in the args buffer instead of 1k, just to be sure;
  some commands add large environment variables these days.
- Fix a bug in subtracting the length of existing arguments and
  environment variables. 'size -= strlen(cp)-1;' subtracts one less
  than the size of cp, which makes no sense; what is necessary is
  to substract the length plus one to account for the terminating
  zero byte, i.e.: 'size -= strlen(cp)+1'.
- Use the ARG_ALIGN_BYTES feature test result to match the OS's
  data alignment requirements.
- path_spawn(): E2BIG: Change to checking SH_XARG state bit.

src/cmd/ksh93/bltins/whence.c: b_command():
- Allow combining -x with -p, -v and -V with the expected results
  by setting P_FLAG to act like 'whence -p'. E.g., as of now,
	command -xv printf
  is equivalent to
	whence -p printf
  but note that 'whence' has no equivalent of 'command -pvx printf'
  which searches $(getconf PATH) for a command.
- When -x will run a command, now set the new SH_XARG state flag.

src/cmd/ksh93/sh/xec.c: sh_exec():
- Change to using the new SH_XARG state bit.
- Skip the check for built-ins if SH_XARG is active, so that
  'command -x' now always runs an external command.

src/lib/libcmd/date.c, src/lib/libcmd/uname.c:
- These path-bound builtins sometimes need to run the external
  system command by the same name, but they did that by hardcoding
  an unportable direct path. Now that 'command -x' runs an external
  command, change this to using 'command -px' to guarantee using
  the known-good external system utility in the default PATH.
- In date.c, fix the format string passed to 'command -px date'
  when setting the date; it was only compatible with BSD systems.
  Use the POSIX variant on non-BSD systems.
This commit is contained in:
Martijn Dekker 2021-01-30 05:51:22 +00:00
parent 005d38f410
commit 66e1d44642
12 changed files with 239 additions and 75 deletions

12
NEWS
View file

@ -3,6 +3,18 @@ For full details, see the git log at: https://github.com/ksh93/ksh
Any uppercase BUG_* names are modernish shell bug IDs.
2021-01-30:
- The -x option to the 'command' built-in now causes it to bypass built-ins
so that it always runs/queries an external command. See 'command --man'.
- Fixed a bug in 'command -x' that caused the minimum exit status to be 1 if
a command with many arguments was divided into several command invocations.
- The 2020-08-16 fix is improved with a compile-time feature test that
detects if and how the OS uses data alignment in the arguments list,
maximising the efficiency of 'command -x' for the system it runs on.
2021-01-24:
- Fixed a bug in 'typeset': combining the -u option with -F or -E caused the

View file

@ -84,6 +84,8 @@ For more details, see the NEWS file and for complete details, see the git log.
reset like other traps upon entering a subshell or ksh-style function,
as documented, and it is no longer prone to crash or get corrupted.
14. 'command -x' now always runs an external command, bypassing built-ins.
____________________________________________________________________________
KSH-93 VS. KSH-88

View file

@ -68,7 +68,7 @@ int b_command(register int argc,char *argv[],Shbltin_t *context)
flags |= V_FLAG;
break;
case 'x':
shp->xargexit = 1;
flags |= P_FLAG;
break;
case ':':
if(argc==0)
@ -82,7 +82,13 @@ int b_command(register int argc,char *argv[],Shbltin_t *context)
break;
}
if(argc==0)
return(flags?0:opt_info.index);
{
if(flags & (X_FLAG|V_FLAG))
return(0); /* return no offset now; sh_exec() will treat command -v/-V as normal builtin */
if(flags & P_FLAG)
sh_onstate(SH_XARG);
return(opt_info.index); /* offset for sh_exec() to remove 'command' prefix + options */
}
argv += opt_info.index;
if(error_info.errors || !*argv)
errormsg(SH_DICT,ERROR_usage(2),"%s", optusage((char*)0));

View file

@ -467,40 +467,47 @@ USAGE_LICENSE
;
const char sh_optcommand[] =
"[-1c?\n@(#)$Id: command (AT&T Research/ksh93) 2020-09-09 $\n]"
"[-1c?\n@(#)$Id: command (AT&T Research/ksh93) 2021-01-30 $\n]"
USAGE_LICENSE
"[+NAME?command - execute a simple command disabling special properties]"
"[+DESCRIPTION?Without \b-v\b or \b-V\b, \bcommand\b executes \acommand\a "
"[+DESCRIPTION?Without \b-v\b or \b-V\b, \bcommand\b executes \acmd\a "
"with arguments given by \aarg\a, suppressing the shell function lookup "
"that normally occurs. If \acommand\a is a special built-in command, "
"that normally occurs. If \acmd\a is a special built-in command, "
"then the special properties are removed so that failures will not "
"cause the script that executes it to terminate and preceding "
"assignments will not persist beyond the command invocation. "
"If \acommand\a is a declaration built-in command and the "
"If \acmd\a is a declaration built-in command and the "
"\b-o posix\b shell option is on, then the declaration properties are "
"removed so that arguments containing \b=\b are not treated specially.]"
"[+?With the \b-v\b or \b-V\b options, \bcommand\b is equivalent to the "
"\bwhence\b(1) command.]"
"[p?Instead of \b$PATH\b, search the OS's default utility path as output by "
"\bgetconf PATH\b.]"
"[v?Equivalent to \bwhence\b \acommand\a [\aarg\a ...]].]"
"[x?If \acommand\a fails because there are too many \aarg\as, it will be "
"invoked multiple times with a subset of the arguments on each "
"invocation. Arguments that occur prior to the first word that expand "
"to multiple arguments and arguments that occur after the last word "
"that expands to multiple arguments will be passed on each invocation. "
"The exit status will be the maximum invocation exit status.]"
"[V?Equivalent to \bwhence \b-v\b \acommand\a [\aarg\a ...]].]"
"[v?Equivalent to \bwhence\b \acmd\a [\aarg\a ...]].]"
"[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]"
"[x?Search \acmd\a as an external command, bypassing built-ins. "
"If the \aarg\as include a word "
"such as \b\"$@\"\b or \b\"${array[@]]}\"\b "
"that expands to multiple arguments, "
"and the size of the expanded \aarg\a list "
"exceeds \bgetconf ARG_MAX\b bytes, "
"then \acmd\a will be run multiple times, "
"dividing the \aarg\as over the invocations. "
"Any \aarg\as that come before the first \b\"$@\"\b or similar, "
"as well as any that follow the last such word, "
"are considered static and will be repeated for each invocation "
"so as to allow all invocations to use the same command options. "
"The exit status will be the highest returned by the invocations.]"
"\n"
"\n[command [arg ...]]\n"
"\n[cmd [arg ...]]\n"
"\n"
"[+EXIT STATUS?If \acommand\a is invoked, the exit status of \bcommand\b "
"will be that of \acommand\a. Otherwise, it will be one of "
"[+EXIT STATUS?If \acmd\a is invoked, the exit status of \bcommand\b "
"will be that of \acmd\a. Otherwise, it will be one of "
"the following:]{"
"[+0?\bcommand\b completed successfully.]"
"[+>0?\b-v\b or \b-V\b has been specified and an error occurred.]"
"[+126?\acommand\a was found but could not be invoked.]"
"[+127?\acommand\a could not be found.]"
"[+126?\acmd\a was found but could not be invoked.]"
"[+127?\acmd\a could not be found.]"
"}"
"[+SEE ALSO?\bwhence\b(1), \bgetconf\b(1)]"

View file

@ -11,3 +11,125 @@ reference unistd.h
extern nice int (int)
extern setreuid int (uid_t,uid_t)
extern setregid int (gid_t,gid_t)
tst note{ determining data alignment factor for arguments list }end output{
/*
* Feature test to figure out if this OS does data alignment on
* the arguments list of a process, and if so, at how many bits.
* Outputs an appropriate #define ARG_ALIGN_BITS.
* Without this, 'command -x' failed with E2BIG on macOS and Linux even
* if all the arguments should fit in ARG_MAX based on their length.
*
* Strategy: first try to fill as many single-character arguments as
* should fit in ARG_MAX without alignment. If that fails with E2BIG,
* then start with a 2-byte alignment factor and keep doubling it
* until we either succeed or exceed an absurdly large value.
*/
/* AST includes */
#include <ast.h>
#include <error.h>
#include <sfio.h>
#include <stak.h>
#include <wait.h>
/* Standard includes */
#include <errno.h>
#ifndef _lib_fork
#error requires fork(2)
#endif
#ifndef _lib_execve
#error requires execve(2)
#endif
#ifndef _lib_waitpid
#error requires waitpid(2)
#endif
int main(int argc,char *argv[])
{
int align_bytes = 0, envlen = 0, argmax, i;
pid_t childpid;
error_info.id="args list aligment test (parent)";
for(i=0; environ[i]; i++)
envlen += strlen(environ[i]) + 1;
argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - envlen - 1024;
if (argmax < 2048)
{
error(ERROR_ERROR|2, "argmax too small");
return 1;
}
while(1)
{
if(!(childpid = fork()))
{
/* child */
int bytec;
error_info.id="args list aligment test (child)";
argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*));
argc = bytec = 0;
while(bytec < argmax)
{
if(argc==0)
argv[argc] = "/usr/bin/env";
else if(argc==1)
argv[argc] = "true";
else
argv[argc] = "x";
bytec += strlen(argv[argc]) + 1 + align_bytes;
if(align_bytes)
bytec += bytec % align_bytes;
argc++;
}
argv[argc] = (char*)0;
if(execve(argv[0], argv, environ) < 0)
{
if(errno == E2BIG)
return 1;
else
{
error(ERROR_SYSTEM|2, "execve failed");
return 2;
}
}
error(ERROR_SYSTEM|2, "[BUG] we should never get here!");
return 2;
}
else
{
/* parent */
int exitstatus;
if (waitpid(childpid,&i,0) < 0)
{
error(ERROR_SYSTEM|2, "waitpid failed");
return 1;
}
if (!WIFEXITED(i) || (exitstatus = WEXITSTATUS(i)) > 1)
{
error(ERROR_ERROR|2, "child process exited abnormally");
return 1;
}
if (exitstatus == 0)
break; /* yay :) */
if (!align_bytes)
align_bytes = 2;
else
align_bytes *= 2;
if (align_bytes > 256)
{
error(ERROR_ERROR|2, "giving up");
return 1;
}
}
}
sfprintf(sfstdout,
"#define ARG_ALIGN_BYTES\t%d\t/* data alignment factor for arguments list */\n",
align_bytes);
return 0;
}
}end fail{
echo "#define ARG_ALIGN_BYTES 16 /* BUG: arg list alignment factor test failed; assuming 16 */"
}end

View file

@ -314,6 +314,7 @@ struct shared
#define SH_PREINIT 18 /* set with SH_INIT before parsing options */
#define SH_COMPLETE 19 /* set for command completion */
#define SH_INTESTCMD 20 /* set while test/[ command is being run */
#define SH_XARG 21 /* set while in xarg (command -x) mode */
#define SH_BRACEEXPAND 42
#define SH_POSIX 46

View file

@ -20,7 +20,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2021-01-24" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2021-01-30" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -5844,19 +5844,33 @@ rather than the one defined by the value of
.IP
The
.B \-x
option allows executing non-built-in commands with argument lists exceeding
limitations imposed by the operating system. This functionality is similar to
option runs
.I name\^
as an external command, bypassing built-ins.
If the arguments contain a word that expands to multiple arguments, such as
\f3"$@"\fP or \f3"${array[@]}"\fP, then the
.B \-x
option also allows executing external commands with argument lists that are
longer than the operating system allows. This functionality is similar to
.BR xargs (1)
but is easier to use.
If a command cannot ordinarily be executed because there are too many
arguments, the shell will invoke the indicated command multiple times
with a subset of the arguments on each invocation.
Any arguments (such as command options) that come before the first word
that expands to multiple arguments, as well as any that follow the last
word that expands to multiple arguments, are considered static arguments
and are repeated for each invocation. When all invocations are completed,
but is easier to use. The shell does this by invoking the external command
multiple times if needed, dividing the expanded argument list over the
invocations. Any arguments that come before the first \f3"$@"\fP or similar
expansion, as well as any that follow the last \f3"$@"\fP or similar, are
considered static arguments and are repeated for each invocation. This allows
each invocation to use the same command options, as well as the same trailing
destination arguments for commands like
.BR cp (1)
or
.BR mv (1).
When all invocations are completed,
.B "command \-x"
exits with the status of the invocation that had the highest exit status.
(Note that
.B "command \-x"
may still fail with an "argument list too long" error if a single argument
exceeds the maximum length of the argument list, or if no \f3"$@"\fP or
similar expansion was used.)
.TP
\(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|.
Causes each
@ -6838,7 +6852,7 @@ Any file descriptor numbers greater than
.B 2
that are opened with this mechanism are closed when invoking another program,
unless they are explicitly redirected to themselves as part of that invocation
(e.g. \fb4>&4\fR) or the \fBposix\fR shell option is active.
(e.g. \fB4>&4\fR) or the \fBposix\fR shell option is active.
.TP
\(dg \f3return\fP \*(OK \f2n\^\fP \*(CK
Causes a shell

View file

@ -162,24 +162,24 @@ static pid_t path_xargs(Shell_t *shp,const char *path, char *argv[],char *const
pid_t pid;
if(shp->xargmin < 0)
return((pid_t)-1);
size = shp->gd->lim.arg_max-1024;
size = shp->gd->lim.arg_max-2048;
for(ev=envp; cp= *ev; ev++)
size -= strlen(cp)-1;
size -= strlen(cp)+1;
for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++)
size -= strlen(cp)-1;
size -= strlen(cp)+1;
for(av=avlast; cp= *av; av++,nlast++)
size -= strlen(cp)-1;
size -= strlen(cp)+1;
av = &argv[shp->xargmin];
if(!spawn)
job_clear();
shp->exitval = 0;
while(av<avlast)
{
/* for each argument, account for terminating zero and 16-byte alignment */
/* for each argument, account for terminating zero and possible alignment */
for(xv=av,left=size; left>0 && av<avlast;)
{
n = strlen(*av++) + 1 + 16;
left -= n + n % 16;
n = strlen(*av++) + 1 + ARG_ALIGN_BYTES;
left -= n + (ARG_ALIGN_BYTES ? n % ARG_ALIGN_BYTES : 0);
}
/* leave at least two for last */
if(left<0 && (avlast-av)<2)
@ -1244,11 +1244,11 @@ pid_t path_spawn(Shell_t *shp,const char *opath,register char **argv, char **env
#endif /* EMLINK */
return(-1);
case E2BIG:
if(shp->xargmin)
if(sh_isstate(SH_XARG))
{
pid = path_xargs(shp,opath, &argv[0] ,envp,spawn);
if(pid<0)
errormsg(SH_DICT,ERROR_system(ERROR_NOEXEC),"%s: 'command -x' failed",path);
errormsg(SH_DICT,ERROR_system(ERROR_NOEXEC),"command -x: could not execute %s",path);
return(pid);
}
default:

View file

@ -1024,7 +1024,7 @@ int sh_exec(register const Shnode_t *t, int flags)
}
#endif /* SHOPT_NAMESPACE */
com0 = com[0];
shp->xargexit = 0;
sh_offstate(SH_XARG);
while(np==SYSCOMMAND || !np && com0 && nv_search(com0,shp->fun_tree,0)==SYSCOMMAND)
{
register int n = b_command(0,com,&shp->bltindata);
@ -1036,13 +1036,12 @@ int sh_exec(register const Shnode_t *t, int flags)
break;
np = nv_bfsearch(com0, shp->bltin_tree, &nq, &cp);
}
if(shp->xargexit)
if(sh_isstate(SH_XARG))
{
shp->xargmin -= command;
shp->xargmax -= command;
shp->xargexit = 0;
}
else
shp->xargmin = 0;
argn -= command;
if(np && is_abuiltin(np))
{
@ -1233,7 +1232,7 @@ int sh_exec(register const Shnode_t *t, int flags)
pipejob = 1;
}
/* check for builtins */
if(np && is_abuiltin(np))
if(np && is_abuiltin(np) && !sh_isstate(SH_XARG))
{
volatile int scope=0, share=0;
volatile void *save_ptr;

View file

@ -218,18 +218,16 @@ typedef struct Fmt
static int
settime(Shbltin_t* context, const char* cmd, Time_t now, int adjust, int network)
{
char* s;
char** argv;
char* args[5];
char* args[7];
char buf[1024];
if (!adjust && !network)
return tmxsettime(now);
argv = args;
s = "/usr/bin/date";
if (!streq(cmd, s) && (!eaccess(s, X_OK) || !eaccess(s+=4, X_OK)))
{
*argv++ = s;
*argv++ = "command";
*argv++ = "-px";
*argv++ = "date";
if (streq(astconf("UNIVERSE", NiL, NiL), "att"))
{
tmxfmt(buf, sizeof(buf), "%m%d%H" "%M%Y.%S", now);
@ -238,7 +236,11 @@ settime(Shbltin_t* context, const char* cmd, Time_t now, int adjust, int network
}
else
{
#if __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __bsdi__ || __DragonFly__
tmxfmt(buf, sizeof(buf), "%Y%m%d%H" "%M.%S", now);
#else
tmxfmt(buf, sizeof(buf), "%m%d%H" "%M%Y.%S", now);
#endif
if (network)
*argv++ = "-n";
if (tm_info.flags & TM_UTC)
@ -248,7 +250,6 @@ settime(Shbltin_t* context, const char* cmd, Time_t now, int adjust, int network
*argv = 0;
if (!sh_run(context, argv - args, args))
return 0;
}
return -1;
}

View file

@ -37,7 +37,7 @@ USAGE_LICENSE
" separated, on a single line. When more than one option is specified"
" the output is in the order specified by the \b-A\b option below."
" Unsupported option values are listed as \a[option]]\a. If any unknown"
" options are specified then the local \b/usr/bin/uname\b is called.]"
" options are specified, the OS default \buname\b(1) is called.]"
"[+?If any \aname\a operands are specified then the \bsysinfo\b(2) values"
" for each \aname\a are listed, separated by space, on one line."
" \bgetconf\b(1), a pre-existing \astandard\a interface, provides"
@ -325,14 +325,14 @@ b_uname(int argc, char** argv, Shbltin_t* context)
sethost = opt_info.arg;
continue;
case ':':
s = "/usr/bin/uname";
if (!streq(argv[0], s) && (!eaccess(s, X_OK) || !eaccess(s+=4, X_OK)))
{
argv[0] = s;
return sh_run(context, argc, argv);
char **new_argv = (char **)stakalloc((argc + 3) * sizeof(char*));
new_argv[0] = "command";
new_argv[1] = "-px";
for (n = 0; n <= argc; n++)
new_argv[n + 2] = argv[n];
return sh_run(context, argc + 2, new_argv);
}
error(2, "%s", opt_info.arg);
break;
case '?':
error(ERROR_usage(2), "%s", opt_info.arg);
break;