1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-13 03:32:24 +00:00

Implement hash tables for virtual subshells (re: 102868f8, 9d428f8f)

The forking fix implemented in 102868f8 and 9d428f8f, which stops
the main shell's hash table from being cleared if PATH is changed
in a subshell, can cause a significant performance penalty for
certain scripts that do something like

    ( PATH=... command foo )

in a subshell, especially if done repeatedly. This is because the
hash table is cleared (and hence a subshell forks) even for
temporary PATH assignments preceding commands.

It also just plain doesn't work. For instance:

    $ hash -r; (ls) >/dev/null; hash
    ls=/bin/ls

Simply running an external command in a subshell caches the path in
the hash table that is shared with a main shell. To remedy this, we
would have to fork the subshell before forking any external
command. And that would be an unacceptable performance regression.

Virtual subshells do not need to fork when changing PATH if they
get their own hash tables. This commit adds these. The code for
alias subshell trees (which was removed in ec888867 because they
were broken and unneeded) provided the beginning of a template for
their implementation.

src/cmd/ksh93/sh/subshell.c:
- struct subshell: Add strack pointer to subshell hash table.
- Add sh_subtracktree(): return pointer to subshell hash table.
- sh_subfuntree(): Refactor a bit for legibility.
- sh_subshell(): Add code for cleaning up subshell hash table.

src/cmd/ksh93/sh/name.c:
- nv_putval(): Remove code to fork a subshell upon resetting PATH.
- nv_rehash(): When in a subshell, invalidate a hash table entry
  for a subshell by creating the subshell scope if needed, then
  giving that entry the NV_NOALIAS attribute to invalidate it.

src/cmd/ksh93/sh/path.c: path_search():
- To set a tracked alias/hash table entry, use sh_subtracktree()
  and pass the HASH_NOSCOPE flag to nv_search() so that any new
  entries are added to the current subshell table (if any) and do
  not influence any parent scopes.

src/cmd/ksh93/bltins/typeset.c: b_alias():
- b_alias(): For hash table entries, use sh_subtracktree() instead
  of forking a subshell. Keep forking for normal aliases.
- setall(): To set a tracked alias/hash table entry, pass the
  HASH_NOSCOPE flag to nv_search() so that any new entries are
  added to the current subshell table (if any) and do not influence
  any parent scopes.

src/cmd/ksh93/sh/init.c: put_restricted():
- Update code for clearing the hash table (when changing $PATH) to
  use sh_subtracktree().

src/cmd/ksh93/bltins/cd_pwd.c:
- When invalidating path name bindings to relative paths, use the
  subshell hash tree if applicable by calling sh_subtracktree().
- rehash(): Call nv_rehash() instead of _nv_unset()ting the hash
  table entry; this is needed to work correctly in subshells.

src/cmd/ksh93/tests/leaks.sh:
- Add leak tests for various PATH-related operations in the main
  shell and in a virtual subshell.
- Several pre-existing memory leaks are exposed by the new tests
  (I've confirmed these in 93u+). The tests are disabled and marked
  TODO for now, as these bugs have not yet been fixed.

src/cmd/ksh93/tests/subshell.sh:
- Update.

Resolves: https://github.com/ksh93/ksh/issues/66
This commit is contained in:
Martijn Dekker 2021-01-07 21:18:33 +00:00
parent a95d107ee5
commit 222515bf08
11 changed files with 177 additions and 67 deletions

7
NEWS
View file

@ -7,6 +7,9 @@ Any uppercase BUG_* names are modernish shell bug IDs.
- Fixed a crash that could occur while ksh updated ${.sh.match}.
- Any changes to the hash table (a.k.a. "tracked aliases", i.e. cached $PATH
searches) in a subshell now no longer affect the parent shell's hash table.
2021-01-05:
- Fixed a bug in 'cd' that caused 'cd ./foo' to search for 'foo' in $CDPATH.
@ -633,10 +636,6 @@ Any uppercase BUG_* names are modernish shell bug IDs.
'alias -t --'. The functionality of the old command has been removed
from the alias builtin.
- Changing the hash table in a subshell will no longer affect the parent
shell's hash table. This fix applies to the hash utility and when the
PATH is reset manually.
- 'set +r' is no longer able to unset the restricted option. This change
makes the behavior of 'set +r' identical to 'set +o restricted'.

View file

@ -44,9 +44,8 @@
static void rehash(register Namval_t *np,void *data)
{
Pathcomp_t *pp = (Pathcomp_t*)np->nvalue.cp;
NOT_USED(data);
if(pp && *pp->name!='/')
_nv_unset(np,0);
nv_rehash(np,data);
}
int b_cd(int argc, char *argv[],Shbltin_t *context)
@ -210,7 +209,7 @@ success:
nv_putval(pwdnod,dir,NV_RDONLY);
nv_onattr(pwdnod,NV_NOFREE|NV_EXPORT);
shp->pwd = pwdnod->nvalue.cp;
nv_scan(shp->track_tree,rehash,(void*)0,NV_TAGGED,NV_TAGGED);
nv_scan(sh_subtracktree(1),rehash,(void*)0,NV_TAGGED,NV_TAGGED);
path_newdir(shp,shp->pathlist);
path_newdir(shp,shp->cdpathlist);
if(oldpwd)

View file

@ -137,7 +137,7 @@ int b_alias(int argc,register char *argv[],Shbltin_t *context)
tdata.sh = context->shp;
troot = tdata.sh->alias_tree;
if(*argv[0]=='h')
flag = NV_TAGGED;
flag |= NV_TAGGED;
if(argv[1])
{
opt_info.offset = 0;
@ -170,17 +170,16 @@ int b_alias(int argc,register char *argv[],Shbltin_t *context)
errormsg(SH_DICT,ERROR_usage(2),"%s",optusage(NIL(char*)));
argv += (opt_info.index-1);
}
/* fork a virtual subshell to avoid affecting parent shell's hash/alias table */
if((argv[1] || rflag) && tdata.sh->subshell && !tdata.sh->subshare)
sh_subfork();
/* 'alias -t', 'hash' */
if(flag&NV_TAGGED)
{
troot = tdata.sh->track_tree; /* use hash table */
troot = sh_subtracktree(1); /* use hash table */
tdata.aflag = '-'; /* make setall() treat 'hash' like 'alias -t' */
if(rflag) /* hash -r: clear hash table */
nv_scan(troot,nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
}
else if(argv[1] && tdata.sh->subshell && !tdata.sh->subshare)
sh_subfork(); /* avoid affecting main shell's alias table */
return(setall(argv,flag,troot,&tdata));
}
@ -642,7 +641,7 @@ static int setall(char **argv,register int flag,Dt_t *troot,struct tdata *tp
/* tracked alias */
if(troot==shp->track_tree && tp->aflag=='-')
{
np = nv_search(name,troot,NV_ADD);
np = nv_search(name,troot,NV_ADD|HASH_NOSCOPE);
path_alias(np,path_absolute(shp,nv_name(np),NIL(Pathcomp_t*),0));
continue;
}
@ -1206,31 +1205,6 @@ static int unall(int argc, char **argv, register Dt_t *troot, Shell_t* shp)
dtclear(troot);
return(r);
}
/*
* Unsetting the PATH in a non-forked subshell will cause the parent shell's hash table to be reset,
* so fork to work around the problem. To avoid crashing, this must be done before calling sh_pushcontext(),
* so we need to loop through the args separately and check if any node is equal to PATHNOD.
*/
if(shp->subshell && !shp->subshare && troot==shp->var_tree)
{
char **argv_tmp = argv;
while(name = *argv_tmp++)
{
np=nv_open(name,troot,NV_NOADD|nflag);
if(!np)
continue;
/* Are we changing the PATH? */
if(np==PATHNOD)
{
nv_close(np);
sh_subfork();
break;
}
nv_close(np);
}
}
sh_pushcontext(shp,&buff,1);
while(name = *argv++)
{

View file

@ -903,7 +903,7 @@ _JOB_
;
const char sh_opthash[] =
"[-1c?\n@(#)$Id: hash (ksh93) 2020-06-10 $\n]"
"[-1c?\n@(#)$Id: hash (ksh93) 2021-01-07 $\n]"
"[+NAME?hash - display the locations of recently used programs]"
"[+DESCRIPTION?\bhash\b displays or modifies the hash table with the "
"locations of recently used programs. If given no arguments, it lists "

View file

@ -390,6 +390,7 @@ extern Sfio_t *sh_sfeval(char*[]);
extern void sh_setmatch(Shell_t*,const char*,int,int,int[],int);
extern void sh_scope(Shell_t*, struct argnod*, int);
extern Namval_t *sh_scoped(Shell_t*, Namval_t*);
extern Dt_t *sh_subtracktree(int);
extern Dt_t *sh_subfuntree(int);
extern void sh_subjobcheck(pid_t);
extern int sh_subsavefd(int);

View file

@ -317,7 +317,7 @@ static void put_restricted(register Namval_t* np,const char *val,int flags,Namfu
if(np==PATHNOD || (path_scoped=(strcmp(name,PATHNOD->nvname)==0)))
{
/* Clear the hash table */
nv_scan(shp->track_tree,nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
nv_scan(sh_subtracktree(1),nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
if(path_scoped && !val)
val = PATHNOD->nvalue.cp;
}

View file

@ -1570,17 +1570,6 @@ void nv_putval(register Namval_t *np, const char *string, int flags)
#endif /* SHOPT_FIXEDARRAY */
if(!(flags&NV_RDONLY) && nv_isattr (np, NV_RDONLY))
errormsg(SH_DICT,ERROR_exit(1),e_readonly, nv_name(np));
/*
* Resetting the PATH in a non-forking subshell will reset the parent shell's
* hash table, so work around the problem by forking before sh_assignok
* -- however, don't do this if we were told to ignore the variable's readonly state (i.e.
* if the NV_RDONLY flag is set), because that means we're restoring the parent shell state
* after exiting a virtual subshell, and we would end up forking the parent shell instead.
*/
if(np == PATHNOD && !(flags & NV_RDONLY) && shp->subshell && !shp->subshare)
sh_subfork();
/*
* The following could cause the shell to fork if assignment
* would cause a side effect
@ -2244,6 +2233,11 @@ static int scanfilter(Dt_t *dict, void *arg, void *data)
void nv_rehash(register Namval_t *np,void *data)
{
NOT_USED(data);
if(sh.subshell)
{
/* invalidate subshell node; if none exists, add a dummy */
np = nv_search(nv_name(np),sh.track_tree,NV_ADD|HASH_NOSCOPE);
}
nv_onattr(np,NV_NOALIAS);
}

View file

@ -738,7 +738,7 @@ int path_search(Shell_t *shp,register const char *name,Pathcomp_t **oldpp, int f
}
else if(pp && !sh_isstate(SH_DEFPATH) && *name!='/' && flag<3)
{
if(np=nv_search(name,shp->track_tree,NV_ADD))
if(np=nv_search(name,sh_subtracktree(1),NV_ADD|HASH_NOSCOPE))
path_alias(np,pp);
}
return(0);

View file

@ -75,6 +75,7 @@ static struct subshell
Dt_t *var; /* variable table at time of subshell */
struct Link *svar; /* save shell variable table */
Dt_t *sfun; /* function scope for subshell */
Dt_t *strack;/* tracked alias scope for subshell */
Pathcomp_t *pathlist; /* for PATH variable */
struct Error_context_s *errcontext;
Shopt_t options;/* save shell options */
@ -393,6 +394,22 @@ static void nv_restore(struct subshell *sp)
sp->shpwd=save;
}
/*
* Return pointer to tracked alias tree (a.k.a. hash table, i.e. cached $PATH search results).
* Create new one if in a subshell and one doesn't exist and 'create' is non-zero.
*/
Dt_t *sh_subtracktree(int create)
{
register struct subshell *sp = subshell_data;
if(create && sh.subshell && !sh.subshare && sp && !sp->strack)
{
sp->strack = dtopen(&_Nvdisc,Dtset);
dtview(sp->strack,sh.track_tree);
sh.track_tree = sp->strack;
}
return(sh.track_tree);
}
/*
* return pointer to function tree
* create new one if in a subshell and one doesn't exist and create is non-zero
@ -400,15 +417,13 @@ static void nv_restore(struct subshell *sp)
Dt_t *sh_subfuntree(int create)
{
register struct subshell *sp = subshell_data;
if(!sp || sp->shp->curenv==0)
return(sh.fun_tree);
if(!sp->sfun && create && !sp->shp->subshare)
if(create && sh.subshell && !sh.subshare && sp && !sp->sfun)
{
sp->sfun = dtopen(&_Nvdisc,Dtoset);
dtview(sp->sfun,sp->shp->fun_tree);
sp->shp->fun_tree = sp->sfun;
dtview(sp->sfun,sh.fun_tree);
sh.fun_tree = sp->sfun;
}
return(sp->shp->fun_tree);
return(sh.fun_tree);
}
/*
@ -743,6 +758,29 @@ Sfio_t *sh_subshell(Shell_t *shp,Shnode_t *t, volatile int flags, int comsub)
{
int n;
shp->options = sp->options;
/* Clean up subshell hash table. */
if(sp->strack)
{
Namval_t *np, *prev_np;
/* Detach this scope from the unified view. */
shp->track_tree = dtview(sp->strack,0);
/* Delete (free) all elements of the subshell hash table. To allow dtnext() to
set the pointer to the next item, we have to delete one item beind the loop. */
prev_np = 0;
np = (Namval_t*)dtfirst(sp->strack);
while(np)
{
if(prev_np)
nv_delete(prev_np,sp->strack,0);
prev_np = np;
np = (Namval_t*)dtnext(sp->strack,np);
}
if(prev_np)
nv_delete(prev_np,sp->strack,0);
/* Close and free the table itself. */
dtclose(sp->strack);
}
/* Clean up subshell function table. */
if(sp->sfun)
{
shp->fun_tree = dtview(sp->sfun,0);

View file

@ -311,5 +311,103 @@ unset -f func1
unset -v FPATH
err_exit_if_leak 'function call via autoload in command substitution'
# ======
# add some random utilities to the hash table to detect memory leak on hash table reset when changing PATH
random_utils=(chmod cp mv awk sed diff comm cut sort uniq date env find mkdir rmdir pr sleep)
save_PATH=$PATH
hash "${random_utils[@]}"
before=$(getmem)
for ((i=0; i < N; i++))
do hash -r
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'clear hash table (hash -r) in main shell'
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'set PATH value in main shell'
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null command true
done
after=$(getmem)
err_exit_if_leak 'run command with preceding PATH assignment in main shell'
: <<'disabled' # TODO: known leak (approx 73552 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do typeset -A PATH
unset PATH
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'set PATH attribute in main shell'
disabled
: <<'disabled' # TODO: known leak (approx 99568 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do unset PATH
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'unset PATH in main shell'
disabled
hash "${random_utils[@]}"
before=$(getmem)
for ((i=0; i < N; i++))
do (hash -r)
done
after=$(getmem)
err_exit_if_leak 'clear hash table (hash -r) in subshell'
: <<'disabled' # TODO: known leak (approx 123520 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (PATH=/dev/null)
done
after=$(getmem)
err_exit_if_leak 'set PATH value in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 24544 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (PATH=/dev/null command true)
done
after=$(getmem)
err_exit_if_leak 'run command with preceding PATH assignment in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 131200 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (readonly PATH)
done
after=$(getmem)
err_exit_if_leak 'set PATH attribute in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 229440 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (unset PATH)
done
after=$(getmem)
err_exit_if_leak 'unset PATH in subshell'
disabled
# ======
exit $((Errors<125?Errors:125))

View file

@ -719,19 +719,24 @@ v=${ eval 'al'; alias al='echo subshare'; } && [[ $v == 'mainalias' && $(eval 'a
# Resetting a subshell's hash table should not affect the parent shell
check_hash_table()
{
[[ $(hash) ]] || err_\exit $1 $'resetting the hash table in a subshell affects the parent shell\'s hash table'
[[ -n ${ hash; } ]] || err_\exit $1 "resetting the hash table in a subshell affects the parent shell's hash table"
# Ensure the hash table isn't empty before the next test is run
hash -r chmod
}
(hash -r); check_hash_table $LINENO # err_exit (count me)
(PATH="$PATH"); check_hash_table $LINENO # err_exit (count me)
(unset PATH); check_hash_table $LINENO # err_exit (count me)
(nameref PATH_TWO=PATH; unset PATH_TWO); check_hash_table $LINENO # err_exit (count me)
# Adding a utility to a subshell's hash table should not affect the parent shell
(hash cat)
[[ $(hash) == "chmod=$(whence -p chmod)" ]] || err_exit $'changes to a subshell\'s hash table affect the parent shell'
hash -r
(hash ls)
[[ -z ${ hash; } ]] || err_exit "changes to a subshell's hash table affect the parent shell"
# Running a command in a subshell should not affect the parent shell's hash table
hash -r
(ls /dev/null >/dev/null)
[[ -z ${ hash; } ]] || err_exit "command run in subshell added to parent shell's hash table"
# ======
# Variables set in functions inside of a virtual subshell should not affect the
@ -747,17 +752,19 @@ EOF
v=$($SHELL $testvars) && [[ "$v" == "a= b= c=0" ]] || err_exit 'variables set in subshells are not confined to the subshell'
# ======
# Setting PATH in virtual subshell should trigger a fork; restoring PATH after leaving virtual subshell should not.
SHELL=$SHELL "$SHELL" -c '
got=$("$SHELL" -c '
PATH=/dev/null/1
(
echo "DEBUG ${.sh.pid}"
PATH=/dev/null/2
readonly PATH
echo "DEBUG ${.sh.pid}"
)
echo "DEBUG ${.sh.pid}"
[[ $PATH == /dev/null/1 ]] && echo "DEBUG ${.sh.pid}" || echo "DEBUG -1 == wrong"
: extra command to disable "-c" exec optimization
' | awk '/^DEBUG/ { pid[NR] = $2; } END { exit !(pid[1] == pid[2] && pid[2] == pid[3]); }' \
|| err_exit "setting PATH to readonly in subshell triggers an erroneous fork"
' 2>&1) \
&& awk '/^DEBUG/ { pid[NR] = $2; } END { exit !(pid[1] == pid[2] && pid[2] == pid[3]); }' <<<"$got" \
|| err_exit "setting PATH to a readonly value in a subshell either fails or forks (got $(printf %q "$got"))"
# ======
# Test command substitution with external command in here-document