diff --git a/NEWS b/NEWS index a640a3f50..bdea546ec 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,18 @@ For full details, see the git log at: https://github.com/ksh93/ksh Any uppercase BUG_* names are modernish shell bug IDs. +2021-01-30: + +- The -x option to the 'command' built-in now causes it to bypass built-ins + so that it always runs/queries an external command. See 'command --man'. + +- Fixed a bug in 'command -x' that caused the minimum exit status to be 1 if + a command with many arguments was divided into several command invocations. + +- The 2020-08-16 fix is improved with a compile-time feature test that + detects if and how the OS uses data alignment in the arguments list, + maximising the efficiency of 'command -x' for the system it runs on. + 2021-01-24: - Fixed a bug in 'typeset': combining the -u option with -F or -E caused the diff --git a/src/cmd/ksh93/COMPATIBILITY b/src/cmd/ksh93/COMPATIBILITY index c3d5e0c1f..d62fdc545 100644 --- a/src/cmd/ksh93/COMPATIBILITY +++ b/src/cmd/ksh93/COMPATIBILITY @@ -84,6 +84,8 @@ For more details, see the NEWS file and for complete details, see the git log. reset like other traps upon entering a subshell or ksh-style function, as documented, and it is no longer prone to crash or get corrupted. +14. 'command -x' now always runs an external command, bypassing built-ins. + ____________________________________________________________________________ KSH-93 VS. KSH-88 diff --git a/src/cmd/ksh93/bltins/whence.c b/src/cmd/ksh93/bltins/whence.c index 0c1e5a26a..600633a97 100644 --- a/src/cmd/ksh93/bltins/whence.c +++ b/src/cmd/ksh93/bltins/whence.c @@ -68,7 +68,7 @@ int b_command(register int argc,char *argv[],Shbltin_t *context) flags |= V_FLAG; break; case 'x': - shp->xargexit = 1; + flags |= P_FLAG; break; case ':': if(argc==0) @@ -82,7 +82,13 @@ int b_command(register int argc,char *argv[],Shbltin_t *context) break; } if(argc==0) - return(flags?0:opt_info.index); + { + if(flags & (X_FLAG|V_FLAG)) + return(0); /* return no offset now; sh_exec() will treat command -v/-V as normal builtin */ + if(flags & P_FLAG) + sh_onstate(SH_XARG); + return(opt_info.index); /* offset for sh_exec() to remove 'command' prefix + options */ + } argv += opt_info.index; if(error_info.errors || !*argv) errormsg(SH_DICT,ERROR_usage(2),"%s", optusage((char*)0)); diff --git a/src/cmd/ksh93/data/builtins.c b/src/cmd/ksh93/data/builtins.c index 9926927a8..b9acebfc0 100644 --- a/src/cmd/ksh93/data/builtins.c +++ b/src/cmd/ksh93/data/builtins.c @@ -467,40 +467,47 @@ USAGE_LICENSE ; const char sh_optcommand[] = -"[-1c?\n@(#)$Id: command (AT&T Research/ksh93) 2020-09-09 $\n]" +"[-1c?\n@(#)$Id: command (AT&T Research/ksh93) 2021-01-30 $\n]" USAGE_LICENSE "[+NAME?command - execute a simple command disabling special properties]" -"[+DESCRIPTION?Without \b-v\b or \b-V\b, \bcommand\b executes \acommand\a " +"[+DESCRIPTION?Without \b-v\b or \b-V\b, \bcommand\b executes \acmd\a " "with arguments given by \aarg\a, suppressing the shell function lookup " - "that normally occurs. If \acommand\a is a special built-in command, " + "that normally occurs. If \acmd\a is a special built-in command, " "then the special properties are removed so that failures will not " "cause the script that executes it to terminate and preceding " "assignments will not persist beyond the command invocation. " - "If \acommand\a is a declaration built-in command and the " + "If \acmd\a is a declaration built-in command and the " "\b-o posix\b shell option is on, then the declaration properties are " "removed so that arguments containing \b=\b are not treated specially.]" "[+?With the \b-v\b or \b-V\b options, \bcommand\b is equivalent to the " "\bwhence\b(1) command.]" "[p?Instead of \b$PATH\b, search the OS's default utility path as output by " "\bgetconf PATH\b.]" -"[v?Equivalent to \bwhence\b \acommand\a [\aarg\a ...]].]" -"[x?If \acommand\a fails because there are too many \aarg\as, it will be " - "invoked multiple times with a subset of the arguments on each " - "invocation. Arguments that occur prior to the first word that expand " - "to multiple arguments and arguments that occur after the last word " - "that expands to multiple arguments will be passed on each invocation. " - "The exit status will be the maximum invocation exit status.]" -"[V?Equivalent to \bwhence \b-v\b \acommand\a [\aarg\a ...]].]" +"[v?Equivalent to \bwhence\b \acmd\a [\aarg\a ...]].]" +"[V?Equivalent to \bwhence \b-v\b \acmd\a [\aarg\a ...]].]" +"[x?Search \acmd\a as an external command, bypassing built-ins. " + "If the \aarg\as include a word " + "such as \b\"$@\"\b or \b\"${array[@]]}\"\b " + "that expands to multiple arguments, " + "and the size of the expanded \aarg\a list " + "exceeds \bgetconf ARG_MAX\b bytes, " + "then \acmd\a will be run multiple times, " + "dividing the \aarg\as over the invocations. " + "Any \aarg\as that come before the first \b\"$@\"\b or similar, " + "as well as any that follow the last such word, " + "are considered static and will be repeated for each invocation " + "so as to allow all invocations to use the same command options. " + "The exit status will be the highest returned by the invocations.]" "\n" -"\n[command [arg ...]]\n" +"\n[cmd [arg ...]]\n" "\n" -"[+EXIT STATUS?If \acommand\a is invoked, the exit status of \bcommand\b " - "will be that of \acommand\a. Otherwise, it will be one of " +"[+EXIT STATUS?If \acmd\a is invoked, the exit status of \bcommand\b " + "will be that of \acmd\a. Otherwise, it will be one of " "the following:]{" "[+0?\bcommand\b completed successfully.]" "[+>0?\b-v\b or \b-V\b has been specified and an error occurred.]" - "[+126?\acommand\a was found but could not be invoked.]" - "[+127?\acommand\a could not be found.]" + "[+126?\acmd\a was found but could not be invoked.]" + "[+127?\acmd\a could not be found.]" "}" "[+SEE ALSO?\bwhence\b(1), \bgetconf\b(1)]" diff --git a/src/cmd/ksh93/features/externs b/src/cmd/ksh93/features/externs index bc267591d..4f58bf7e6 100644 --- a/src/cmd/ksh93/features/externs +++ b/src/cmd/ksh93/features/externs @@ -11,3 +11,125 @@ reference unistd.h extern nice int (int) extern setreuid int (uid_t,uid_t) extern setregid int (gid_t,gid_t) + +tst note{ determining data alignment factor for arguments list }end output{ + /* + * Feature test to figure out if this OS does data alignment on + * the arguments list of a process, and if so, at how many bits. + * Outputs an appropriate #define ARG_ALIGN_BITS. + * Without this, 'command -x' failed with E2BIG on macOS and Linux even + * if all the arguments should fit in ARG_MAX based on their length. + * + * Strategy: first try to fill as many single-character arguments as + * should fit in ARG_MAX without alignment. If that fails with E2BIG, + * then start with a 2-byte alignment factor and keep doubling it + * until we either succeed or exceed an absurdly large value. + */ + + /* AST includes */ + #include + #include + #include + #include + #include + + /* Standard includes */ + #include + + #ifndef _lib_fork + #error requires fork(2) + #endif + #ifndef _lib_execve + #error requires execve(2) + #endif + #ifndef _lib_waitpid + #error requires waitpid(2) + #endif + + int main(int argc,char *argv[]) + { + int align_bytes = 0, envlen = 0, argmax, i; + pid_t childpid; + + error_info.id="args list aligment test (parent)"; + for(i=0; environ[i]; i++) + envlen += strlen(environ[i]) + 1; + argmax = strtoimax(astconf("ARG_MAX",NiL,NiL),NiL,0) - envlen - 1024; + if (argmax < 2048) + { + error(ERROR_ERROR|2, "argmax too small"); + return 1; + } + while(1) + { + if(!(childpid = fork())) + { + /* child */ + int bytec; + + error_info.id="args list aligment test (child)"; + argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*)); + argc = bytec = 0; + while(bytec < argmax) + { + if(argc==0) + argv[argc] = "/usr/bin/env"; + else if(argc==1) + argv[argc] = "true"; + else + argv[argc] = "x"; + bytec += strlen(argv[argc]) + 1 + align_bytes; + if(align_bytes) + bytec += bytec % align_bytes; + argc++; + } + argv[argc] = (char*)0; + if(execve(argv[0], argv, environ) < 0) + { + if(errno == E2BIG) + return 1; + else + { + error(ERROR_SYSTEM|2, "execve failed"); + return 2; + } + } + error(ERROR_SYSTEM|2, "[BUG] we should never get here!"); + return 2; + } + else + { + /* parent */ + int exitstatus; + + if (waitpid(childpid,&i,0) < 0) + { + error(ERROR_SYSTEM|2, "waitpid failed"); + return 1; + } + if (!WIFEXITED(i) || (exitstatus = WEXITSTATUS(i)) > 1) + { + error(ERROR_ERROR|2, "child process exited abnormally"); + return 1; + } + if (exitstatus == 0) + break; /* yay :) */ + if (!align_bytes) + align_bytes = 2; + else + align_bytes *= 2; + if (align_bytes > 256) + { + error(ERROR_ERROR|2, "giving up"); + return 1; + } + } + } + sfprintf(sfstdout, + "#define ARG_ALIGN_BYTES\t%d\t/* data alignment factor for arguments list */\n", + align_bytes); + return 0; + } +}end fail{ + echo "#define ARG_ALIGN_BYTES 16 /* BUG: arg list alignment factor test failed; assuming 16 */" +}end diff --git a/src/cmd/ksh93/include/defs.h b/src/cmd/ksh93/include/defs.h index 5332ec3bd..b3f854986 100644 --- a/src/cmd/ksh93/include/defs.h +++ b/src/cmd/ksh93/include/defs.h @@ -314,6 +314,7 @@ struct shared #define SH_PREINIT 18 /* set with SH_INIT before parsing options */ #define SH_COMPLETE 19 /* set for command completion */ #define SH_INTESTCMD 20 /* set while test/[ command is being run */ +#define SH_XARG 21 /* set while in xarg (command -x) mode */ #define SH_BRACEEXPAND 42 #define SH_POSIX 46 diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index 98088afc7..fdfa28de4 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -20,7 +20,7 @@ #define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */ #define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */ -#define SH_RELEASE_DATE "2021-01-24" /* must be in this format for $((.sh.version)) */ +#define SH_RELEASE_DATE "2021-01-30" /* must be in this format for $((.sh.version)) */ #define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */ diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index 039fa15cd..dfcff5d7d 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -5844,19 +5844,33 @@ rather than the one defined by the value of .IP The .B \-x -option allows executing non-built-in commands with argument lists exceeding -limitations imposed by the operating system. This functionality is similar to +option runs +.I name\^ +as an external command, bypassing built-ins. +If the arguments contain a word that expands to multiple arguments, such as +\f3"$@"\fP or \f3"${array[@]}"\fP, then the +.B \-x +option also allows executing external commands with argument lists that are +longer than the operating system allows. This functionality is similar to .BR xargs (1) -but is easier to use. -If a command cannot ordinarily be executed because there are too many -arguments, the shell will invoke the indicated command multiple times -with a subset of the arguments on each invocation. -Any arguments (such as command options) that come before the first word -that expands to multiple arguments, as well as any that follow the last -word that expands to multiple arguments, are considered static arguments -and are repeated for each invocation. When all invocations are completed, +but is easier to use. The shell does this by invoking the external command +multiple times if needed, dividing the expanded argument list over the +invocations. Any arguments that come before the first \f3"$@"\fP or similar +expansion, as well as any that follow the last \f3"$@"\fP or similar, are +considered static arguments and are repeated for each invocation. This allows +each invocation to use the same command options, as well as the same trailing +destination arguments for commands like +.BR cp (1) +or +.BR mv (1). +When all invocations are completed, .B "command \-x" exits with the status of the invocation that had the highest exit status. +(Note that +.B "command \-x" +may still fail with an "argument list too long" error if a single argument +exceeds the maximum length of the argument list, or if no \f3"$@"\fP or +similar expansion was used.) .TP \(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|. Causes each @@ -6838,7 +6852,7 @@ Any file descriptor numbers greater than .B 2 that are opened with this mechanism are closed when invoking another program, unless they are explicitly redirected to themselves as part of that invocation -(e.g. \fb4>&4\fR) or the \fBposix\fR shell option is active. +(e.g. \fB4>&4\fR) or the \fBposix\fR shell option is active. .TP \(dg \f3return\fP \*(OK \f2n\^\fP \*(CK Causes a shell diff --git a/src/cmd/ksh93/sh/path.c b/src/cmd/ksh93/sh/path.c index 54ba9759a..72353f7af 100644 --- a/src/cmd/ksh93/sh/path.c +++ b/src/cmd/ksh93/sh/path.c @@ -162,24 +162,24 @@ static pid_t path_xargs(Shell_t *shp,const char *path, char *argv[],char *const pid_t pid; if(shp->xargmin < 0) return((pid_t)-1); - size = shp->gd->lim.arg_max-1024; + size = shp->gd->lim.arg_max-2048; for(ev=envp; cp= *ev; ev++) - size -= strlen(cp)-1; + size -= strlen(cp)+1; for(av=argv; (cp= *av) && av< &argv[shp->xargmin]; av++) - size -= strlen(cp)-1; + size -= strlen(cp)+1; for(av=avlast; cp= *av; av++,nlast++) - size -= strlen(cp)-1; + size -= strlen(cp)+1; av = &argv[shp->xargmin]; if(!spawn) job_clear(); shp->exitval = 0; while(av0 && avxargmin) + if(sh_isstate(SH_XARG)) { pid = path_xargs(shp,opath, &argv[0] ,envp,spawn); if(pid<0) - errormsg(SH_DICT,ERROR_system(ERROR_NOEXEC),"%s: 'command -x' failed",path); + errormsg(SH_DICT,ERROR_system(ERROR_NOEXEC),"command -x: could not execute %s",path); return(pid); } default: diff --git a/src/cmd/ksh93/sh/xec.c b/src/cmd/ksh93/sh/xec.c index ca3876394..d594fd59c 100644 --- a/src/cmd/ksh93/sh/xec.c +++ b/src/cmd/ksh93/sh/xec.c @@ -1024,7 +1024,7 @@ int sh_exec(register const Shnode_t *t, int flags) } #endif /* SHOPT_NAMESPACE */ com0 = com[0]; - shp->xargexit = 0; + sh_offstate(SH_XARG); while(np==SYSCOMMAND || !np && com0 && nv_search(com0,shp->fun_tree,0)==SYSCOMMAND) { register int n = b_command(0,com,&shp->bltindata); @@ -1036,13 +1036,12 @@ int sh_exec(register const Shnode_t *t, int flags) break; np = nv_bfsearch(com0, shp->bltin_tree, &nq, &cp); } - if(shp->xargexit) + if(sh_isstate(SH_XARG)) { shp->xargmin -= command; shp->xargmax -= command; + shp->xargexit = 0; } - else - shp->xargmin = 0; argn -= command; if(np && is_abuiltin(np)) { @@ -1233,7 +1232,7 @@ int sh_exec(register const Shnode_t *t, int flags) pipejob = 1; } /* check for builtins */ - if(np && is_abuiltin(np)) + if(np && is_abuiltin(np) && !sh_isstate(SH_XARG)) { volatile int scope=0, share=0; volatile void *save_ptr; diff --git a/src/lib/libcmd/date.c b/src/lib/libcmd/date.c index 641c67536..952b7b49c 100644 --- a/src/lib/libcmd/date.c +++ b/src/lib/libcmd/date.c @@ -218,37 +218,38 @@ typedef struct Fmt static int settime(Shbltin_t* context, const char* cmd, Time_t now, int adjust, int network) { - char* s; char** argv; - char* args[5]; + char* args[7]; char buf[1024]; if (!adjust && !network) return tmxsettime(now); argv = args; - s = "/usr/bin/date"; - if (!streq(cmd, s) && (!eaccess(s, X_OK) || !eaccess(s+=4, X_OK))) + *argv++ = "command"; + *argv++ = "-px"; + *argv++ = "date"; + if (streq(astconf("UNIVERSE", NiL, NiL), "att")) { - *argv++ = s; - if (streq(astconf("UNIVERSE", NiL, NiL), "att")) - { - tmxfmt(buf, sizeof(buf), "%m%d%H" "%M%Y.%S", now); - if (adjust) - *argv++ = "-a"; - } - else - { - tmxfmt(buf, sizeof(buf), "%Y%m%d%H" "%M.%S", now); - if (network) - *argv++ = "-n"; - if (tm_info.flags & TM_UTC) - *argv++ = "-u"; - } - *argv++ = buf; - *argv = 0; - if (!sh_run(context, argv - args, args)) - return 0; + tmxfmt(buf, sizeof(buf), "%m%d%H" "%M%Y.%S", now); + if (adjust) + *argv++ = "-a"; } + else + { +#if __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __bsdi__ || __DragonFly__ + tmxfmt(buf, sizeof(buf), "%Y%m%d%H" "%M.%S", now); +#else + tmxfmt(buf, sizeof(buf), "%m%d%H" "%M%Y.%S", now); +#endif + if (network) + *argv++ = "-n"; + if (tm_info.flags & TM_UTC) + *argv++ = "-u"; + } + *argv++ = buf; + *argv = 0; + if (!sh_run(context, argv - args, args)) + return 0; return -1; } diff --git a/src/lib/libcmd/uname.c b/src/lib/libcmd/uname.c index a295a757b..b90bae535 100644 --- a/src/lib/libcmd/uname.c +++ b/src/lib/libcmd/uname.c @@ -37,7 +37,7 @@ USAGE_LICENSE " separated, on a single line. When more than one option is specified" " the output is in the order specified by the \b-A\b option below." " Unsupported option values are listed as \a[option]]\a. If any unknown" -" options are specified then the local \b/usr/bin/uname\b is called.]" +" options are specified, the OS default \buname\b(1) is called.]" "[+?If any \aname\a operands are specified then the \bsysinfo\b(2) values" " for each \aname\a are listed, separated by space, on one line." " \bgetconf\b(1), a pre-existing \astandard\a interface, provides" @@ -325,14 +325,14 @@ b_uname(int argc, char** argv, Shbltin_t* context) sethost = opt_info.arg; continue; case ':': - s = "/usr/bin/uname"; - if (!streq(argv[0], s) && (!eaccess(s, X_OK) || !eaccess(s+=4, X_OK))) { - argv[0] = s; - return sh_run(context, argc, argv); + char **new_argv = (char **)stakalloc((argc + 3) * sizeof(char*)); + new_argv[0] = "command"; + new_argv[1] = "-px"; + for (n = 0; n <= argc; n++) + new_argv[n + 2] = argv[n]; + return sh_run(context, argc + 2, new_argv); } - error(2, "%s", opt_info.arg); - break; case '?': error(ERROR_usage(2), "%s", opt_info.arg); break;