1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-13 19:52:20 +00:00

More 'command -x' robustification (re: 6f6b2201, 6a0e9a1a, 66e1d446)

The xargs-like functionality of 'command -x' was still failing with
E2BIG in cases or on systems where the environment variables list
is very large. For instance, on a default NixOS installation it's
about 50k by default (absurd; *each* process carries this weight).

This commit tweaks the feature test and introduces a runtime
fallback if it still fails.

POSIX: "The number of bytes available for the new process' combined
argument and environment lists is {ARG_MAX}. It is implementation-
defined whether null terminators, pointers, and/or any alignment
bytes are included in this total."
https://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html

More recommended reading:
https://mina86.com/2021/the-real-arg-max-part-1/
https://mina86.com/2021/the-real-arg-max-part-2/

So, operating systems are free to consume ARG_MAX space in whatever
bizarre way they want, and may even come up with more innovative
ways to waste buffer space in future. <sigh>

command_xargs() allows for the possibility of adding a certain
number of extra bytes per argument to account for pointers and
whatnot. As of this commit, we still start off from the value that
was determined by the _arg_extrabytes test in features/externs, but
path_spawn() will now increase that number at runtime and retry if
E2BIG still occurs. Hopefully this makes it future-proof.

src/cmd/ksh93/features/externs:
- Rename generated ARG_EXTRA_BYTES macro to _arg_extrabytes for
  better naming consistency with other iffe feature tests.
- Tweaks to avoid detecting 9 extra bytes instead of 8 on some
  versions of 64-bit Linux (it needs the size of a 64 bit pointer).
- Show the result in the iffe output.

src/cmd/ksh93/include/shell.h,
src/cmd/ksh93/sh/init.c:
- Do not store getconf(CONF_ARG_MAX) at init time; on Linux, this
  value may be changed dynamically (via ulimit -s), so it must be
  re-obtained on every use.

src/cmd/ksh93/sh/path.c:
- command_xargs():
  - Use a static global variable for the number of extra bytes per
    argument. Initialise it with the results of the feature test.
    This allows increasing it at runtime if an OS does something
    weird causing an E2BIG failure.
  - Abort instead of return if command_xargs() is called with
    sh.xargmin < 0; this should never happen.
  - To allow retrying without crashing, restore saved args before
    returning -1.
  - Leave more generous space for the environment -- half the size
    of the existing environment. This was experimentally determined
    to be needed to keep Linux and macOS happy.
  - Instead of crashing, return with E2BIG if there is too little
    space to run.
  - Get rid of unnecessary (void*) typecasts; we no longer pretend
    to be compatible with C++ (re: a34e8319).
  - Remove a couple of dead 'if(saveargs) free(saveargs);'
    statements; at those points, saveargs is known to be NULL.
  - Return -2 instead of -1 when retrying would be pointless.
- path_spawn():
  - When command_xargs() returns -1 and the error is E2BIG,
    increase the number of extra bytes by the size of a char*
    pointer and try again. Give up if adding bytes the size of 8
    char* pointers fails.

src/cmd/ksh93/sh/xec.c: check_exec_optimization():
- Do not use this optimization if we are running 'command -x';
  I noticed some instances of the PATH search yielding incorrect
  results if we do. TODO: work this out at some point.
This commit is contained in:
Martijn Dekker 2022-06-20 19:50:35 +01:00
parent 225323f138
commit 05fc199c95
5 changed files with 85 additions and 41 deletions

View file

@ -11,13 +11,25 @@ extern nice int (int)
extern setreuid int (uid_t,uid_t)
extern setregid int (gid_t,gid_t)
tst note{ determining extra bytes per argument for arguments list }end output{
tst note{ determining default number of extra bytes per argument for arguments list }end output{
/*
* Figure out if this OS requires extra bytes per argument
* in the arguments list of a process.
* Outputs an appropriate #define ARG_EXTRA_BYTES.
* Outputs an appropriate #define _arg_extrabytes.
* Without this, 'command -x' failed with E2BIG on macOS, Linux and Solaris
* even if all the arguments should fit in ARG_MAX based on their length.
*
* POSIX: "The number of bytes available for the new process' combined argument
* and environment lists is {ARG_MAX}. It is implementation-defined whether null
* terminators, pointers, and/or any alignment bytes are included in this total."
* https://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html
* So, operating systems are free to consume ARG_MAX space in whatever bizarre way they want, and
* may even come up with more innovative ways to waste buffer space in future. This test assumes
* that null terminators are included in the total, because why wouldn't they be? It builds a
* dummy arguments list and tries fork(2)/execve(2) to figure how how many extra bytes to add to
* each argument to account for pointers and whatnot, and outputs that (_arg_extrabytes) as a
* default value for command_xargs() in path.c. We still cannot entirely rely on this value on all
* systems, so path_spawn() in path.c is able to increase it at runtime if E2BIG still occurs.
*/
#include <ast.h>
@ -30,8 +42,10 @@ tst note{ determining extra bytes per argument for arguments list }end output{
{
int extra_bytes = 0, envlen, argmax, i;
pid_t childpid;
Sfio_t *notebuf;
char *note;
error_info.id="ARG_EXTRA_BYTES test (parent)";
error_info.id="_arg_extrabytes test (parent)";
while(1)
{
envlen = 0;
@ -47,12 +61,13 @@ tst note{ determining extra bytes per argument for arguments list }end output{
error(ERROR_ERROR|2, "argmax too small");
return 1;
}
argmax -= 512;
if(!(childpid = fork()))
{
/* child */
int bytec;
error_info.id="ARG_EXTRA_BYTES test (child)";
error_info.id="_arg_extrabytes test (child)";
argv = (char **)stakalloc((argmax / 2 + 1) * sizeof(char*));
argc = bytec = 0;
while(bytec < argmax)
@ -62,7 +77,7 @@ tst note{ determining extra bytes per argument for arguments list }end output{
else if(argc==1)
argv[argc] = "true";
else
argv[argc] = "x";
argv[argc] = "xxxxxxxxxxxxxxxxxxxxxxx";
/* also add 1 for terminating zero byte */
bytec += strlen(argv[argc]) + 1 + extra_bytes;
argc++;
@ -91,7 +106,7 @@ tst note{ determining extra bytes per argument for arguments list }end output{
error(ERROR_SYSTEM|2, "waitpid failed");
return 1;
}
if (!WIFEXITED(i) || (exitstatus = WEXITSTATUS(i)) > 1)
if (!WIFEXITED(i) || (exitstatus = WEXITSTATUS(i)) > 1 && exitstatus != 126)
{
error(ERROR_ERROR|2, "child process exited abnormally");
return 1;
@ -106,11 +121,18 @@ tst note{ determining extra bytes per argument for arguments list }end output{
}
}
}
/* show note in iffe output via inherited FD 9 */
notebuf = sfstropen();
sfprintf(notebuf," %d ...",extra_bytes);
note = sfstruse(notebuf);
write(9,note,strlen(note));
sfclose(notebuf);
/* add #define to header via stdout */
sfprintf(sfstdout,
"#define ARG_EXTRA_BYTES\t%d\t/* extra bytes per argument for arguments list */\n",
"#define _arg_extrabytes\t%d\t/* extra bytes per argument for arguments list */\n",
extra_bytes);
return 0;
}
}end fail{
echo "#define ARG_EXTRA_BYTES 16 /* BUG: test failed; assuming 16 */"
echo "#define _arg_extrabytes sizeof(char*) /* BUG: test failed; assuming pointer size */"
}end

View file

@ -217,7 +217,6 @@ struct sh_scoped
struct limits
{
long arg_max; /* max arg+env exec() size */
int open_max; /* maximum number of file descriptors */
int clk_tck; /* number of ticks per second */
int child_max; /* maximum number of children */

View file

@ -147,9 +147,6 @@ char e_version[] = "\n@(#)$Id: Version "
#define RANDMASK 0x7fff
#ifndef ARG_MAX
# define ARG_MAX (1*1024*1024)
#endif
#ifndef CHILD_MAX
# define CHILD_MAX (1*1024)
#endif
@ -1284,11 +1281,8 @@ Shell_t *sh_init(register int argc,register char *argv[], Shinit_f userinit)
sh.euserid=geteuid();
sh.groupid=getgid();
sh.egroupid=getegid();
sh.lim.arg_max = astconf_long(CONF_ARG_MAX);
sh.lim.child_max = (int)astconf_long(CONF_CHILD_MAX);
sh.lim.clk_tck = (int)astconf_long(CONF_CLK_TCK);
if(sh.lim.arg_max <=0)
sh.lim.arg_max = ARG_MAX;
if(sh.lim.child_max <=0)
sh.lim.child_max = CHILD_MAX;
if(sh.lim.clk_tck <=0)

View file

@ -91,6 +91,17 @@ static pid_t _spawnveg(const char *path, char* const argv[], char* const envp[],
return(pid);
}
/*
* POSIX: "The number of bytes available for the new process' combined argument and environment lists is {ARG_MAX}. It
* is implementation-defined whether null terminators, pointers, and/or any alignment bytes are included in this total."
* https://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html
* So, operating systems are free to consume ARG_MAX space in whatever bizarre way they want, and may even come up with
* more innovative ways to waste buffer space in future. In command_xargs() below, we assume that null terminators are
* included in the total, because why wouldn't they be? Then we allow for the possibility of adding a certain number of
* extra bytes per argument to account for pointers and whatnot. We start off from the value that was determined by the
* _arg_extrabytes test in features/externs, but path_spawn() will increase arg_extra and retry if E2BIG still occurs.
*/
static unsigned arg_extra = _arg_extrabytes;
/*
* used with command -x to run the command in multiple passes
* spawn is non-zero when invoked via spawn
@ -101,19 +112,31 @@ static pid_t command_xargs(const char *path, char *argv[],char *const envp[], in
register char *cp, **av, **xv;
char **avlast= &argv[sh.xargmax], **saveargs=0;
char *const *ev;
long size, left;
ssize_t size, left;
int nlast=1,n,exitval=0;
pid_t pid;
if(sh.xargmin < 0)
return((pid_t)-1);
size = sh.lim.arg_max - (ARG_EXTRA_BYTES > 2 ? 1024*ARG_EXTRA_BYTES : 2048);
abort();
/* get env/args buffer size (may change dynamically on Linux) */
if((size = astconf_long(CONF_ARG_MAX)) < 0)
size = 131072;
/* leave fairly generous space for the environment */
for(ev=envp; cp= *ev; ev++)
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
{
n = strlen(cp);
size -= n + n / 2 + arg_extra;
}
/* subtract lengths of leading and trailing static arguments */
for(av=argv; (cp= *av) && av< &argv[sh.xargmin]; av++)
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
size -= strlen(cp) + 1 + arg_extra;
for(av=avlast; cp= *av; av++,nlast++)
size -= strlen(cp) + 1 + ARG_EXTRA_BYTES;
size -= 2 + 2 * ARG_EXTRA_BYTES; /* final null env and arg elements */
size -= strlen(cp) + 1 + arg_extra;
size -= 2 + 2 * arg_extra; /* final null env and arg elements */
if(size < 2048)
{
errno = E2BIG;
return(-2);
}
av = &argv[sh.xargmin];
if(!spawn)
job_clear();
@ -122,7 +145,7 @@ static pid_t command_xargs(const char *path, char *argv[],char *const envp[], in
{
/* for each argument, account for terminating zero and possible extra bytes */
for(xv=av,left=size; left>0 && av<avlast;)
left -= strlen(*av++) + 1 + ARG_EXTRA_BYTES;
left -= strlen(*av++) + 1 + arg_extra;
/* leave at least two for last */
if(left<0 && (avlast-av)<2)
av--;
@ -130,8 +153,8 @@ static pid_t command_xargs(const char *path, char *argv[],char *const envp[], in
{
n = nlast*sizeof(char*);
saveargs = (char**)sh_malloc(n);
memcpy((void*)saveargs, (void*)av, n);
memcpy((void*)av,(void*)avlast,n);
memcpy(saveargs,av,n);
memcpy(av,avlast,n);
}
else
{
@ -144,35 +167,36 @@ static pid_t command_xargs(const char *path, char *argv[],char *const envp[], in
if(saveargs || av<avlast || (exitval && !spawn))
{
if((pid=_spawnveg(path,argv,envp,0)) < 0)
{
if(saveargs)
{
memcpy(av,saveargs,n);
free(saveargs);
}
return(-1);
}
job_post(pid,0);
job_wait(pid);
if(sh.exitval>exitval)
exitval = sh.exitval;
if(saveargs)
{
memcpy((void*)av,saveargs,n);
free((void*)saveargs);
memcpy(av,saveargs,n);
free(saveargs);
saveargs = 0;
}
}
else if(spawn)
{
sh.xargexit = exitval;
if(saveargs)
free((void*)saveargs);
return(_spawnveg(path,argv,envp,spawn>>1));
}
else
{
if(saveargs)
free((void*)saveargs);
return(execve(path,argv,envp));
}
}
if(!spawn)
exit(exitval);
return((pid_t)-1);
return(-1);
}
/*
@ -1247,13 +1271,17 @@ pid_t path_spawn(const char *opath,register char **argv, char **envp, Pathcomp_t
case E2BIG:
if(sh_isstate(SH_XARG))
{
pid = command_xargs(opath, &argv[0] ,envp,spawn);
if(pid<0)
{
errormsg(SH_DICT,ERROR_system(ERROR_NOEXEC),"command -x: could not execute %s",path);
UNREACHABLE();
}
/*
* command -x: built-in xargs. If the argument list doesn't fit and there is more
* than one argument, then retry to allow for extra space consumed per argument.
*/
while((pid = command_xargs(opath,&argv[0],envp,spawn)) == -1
&& arg_extra < 8*sizeof(char*) && errno==E2BIG && argv[1])
arg_extra += sizeof(char*);
if(pid > 0)
return(pid);
/* error: reset */
arg_extra = _arg_extrabytes;
}
/* FALLTHROUGH */
default:

View file

@ -903,6 +903,7 @@ static int check_exec_optimization(int type, int execflg, int execflg2, struct i
|| sh.st.trapdontexec
|| sh.subshell
|| ((struct checkpt*)sh.jmplist)->mode==SH_JMPEVAL
|| sh_isstate(SH_XARG)
|| (pipejob && (sh_isstate(SH_MONITOR) || sh_isoption(SH_PIPEFAIL) || sh_isstate(SH_TIMING))))
{
return(0);