Implement hash tables for virtual subshells (re: 102868f8, 9d428f8f)

The forking fix implemented in 102868f8 and 9d428f8f, which stops the main shell's hash table from being cleared if PATH is changed in a subshell, can cause a significant performance penalty for certain scripts that do something like ( PATH=... command foo ) in a subshell, especially if done repeatedly. This is because the hash table is cleared (and hence a subshell forks) even for temporary PATH assignments preceding commands. It also just plain doesn't work. For instance: $ hash -r; (ls) >/dev/null; hash ls=/bin/ls Simply running an external command in a subshell caches the path in the hash table that is shared with a main shell. To remedy this, we would have to fork the subshell before forking any external command. And that would be an unacceptable performance regression. Virtual subshells do not need to fork when changing PATH if they get their own hash tables. This commit adds these. The code for alias subshell trees (which was removed in ec888867 because they were broken and unneeded) provided the beginning of a template for their implementation. src/cmd/ksh93/sh/subshell.c: - struct subshell: Add strack pointer to subshell hash table. - Add sh_subtracktree(): return pointer to subshell hash table. - sh_subfuntree(): Refactor a bit for legibility. - sh_subshell(): Add code for cleaning up subshell hash table. src/cmd/ksh93/sh/name.c: - nv_putval(): Remove code to fork a subshell upon resetting PATH. - nv_rehash(): When in a subshell, invalidate a hash table entry for a subshell by creating the subshell scope if needed, then giving that entry the NV_NOALIAS attribute to invalidate it. src/cmd/ksh93/sh/path.c: path_search(): - To set a tracked alias/hash table entry, use sh_subtracktree() and pass the HASH_NOSCOPE flag to nv_search() so that any new entries are added to the current subshell table (if any) and do not influence any parent scopes. src/cmd/ksh93/bltins/typeset.c: b_alias(): - b_alias(): For hash table entries, use sh_subtracktree() instead of forking a subshell. Keep forking for normal aliases. - setall(): To set a tracked alias/hash table entry, pass the HASH_NOSCOPE flag to nv_search() so that any new entries are added to the current subshell table (if any) and do not influence any parent scopes. src/cmd/ksh93/sh/init.c: put_restricted(): - Update code for clearing the hash table (when changing $PATH) to use sh_subtracktree(). src/cmd/ksh93/bltins/cd_pwd.c: - When invalidating path name bindings to relative paths, use the subshell hash tree if applicable by calling sh_subtracktree(). - rehash(): Call nv_rehash() instead of _nv_unset()ting the hash table entry; this is needed to work correctly in subshells. src/cmd/ksh93/tests/leaks.sh: - Add leak tests for various PATH-related operations in the main shell and in a virtual subshell. - Several pre-existing memory leaks are exposed by the new tests (I've confirmed these in 93u+). The tests are disabled and marked TODO for now, as these bugs have not yet been fixed. src/cmd/ksh93/tests/subshell.sh: - Update. Resolves: https://github.com/ksh93/ksh/issues/66
2025-03-09 15:50:02 +00:00 · 2021-01-07 21:18:33 +00:00 · 2021-01-07 21:18:33 +00:00 · 222515bf08
commit 222515bf08
parent a95d107ee5
11 changed files with 177 additions and 67 deletions
--- a/7
+++ b/7
@ -7,6 +7,9 @@ Any uppercase BUG_* names are modernish shell bug IDs.

 - Fixed a crash that could occur while ksh updated ${.sh.match}.

+- Any changes to the hash table (a.k.a. "tracked aliases", i.e. cached $PATH
+  searches) in a subshell now no longer affect the parent shell's hash table.
+
 2021-01-05:

 - Fixed a bug in 'cd' that caused 'cd ./foo' to search for 'foo' in $CDPATH.
@ -633,10 +636,6 @@ Any uppercase BUG_* names are modernish shell bug IDs.
  'alias -t --'. The functionality of the old command has been removed
  from the alias builtin.

- Changing the hash table in a subshell will no longer affect the parent
-  shell's hash table. This fix applies to the hash utility and when the
-  PATH is reset manually.
-
 - 'set +r' is no longer able to unset the restricted option. This change
  makes the behavior of 'set +r' identical to 'set +o restricted'.

--- a/src/cmd/ksh93/bltins/cd_pwd.c
+++ b/src/cmd/ksh93/bltins/cd_pwd.c
@ -44,9 +44,8 @@
 static void rehash(register Namval_t *np,void *data)
 {
 	Pathcomp_t *pp = (Pathcomp_t*)np->nvalue.cp;
-	NOT_USED(data);
 	if(pp && *pp->name!='/')
-		_nv_unset(np,0);
+		nv_rehash(np,data);
 }

 int	b_cd(int argc, char *argv[],Shbltin_t *context)
@ -210,7 +209,7 @@ success:
 	nv_putval(pwdnod,dir,NV_RDONLY);
 	nv_onattr(pwdnod,NV_NOFREE|NV_EXPORT);
 	shp->pwd = pwdnod->nvalue.cp;
-	nv_scan(shp->track_tree,rehash,(void*)0,NV_TAGGED,NV_TAGGED);
+	nv_scan(sh_subtracktree(1),rehash,(void*)0,NV_TAGGED,NV_TAGGED);
 	path_newdir(shp,shp->pathlist);
 	path_newdir(shp,shp->cdpathlist);
 	if(oldpwd)
--- a/src/cmd/ksh93/bltins/typeset.c
+++ b/src/cmd/ksh93/bltins/typeset.c
@ -137,7 +137,7 @@ int    b_alias(int argc,register char *argv[],Shbltin_t *context)
 	tdata.sh = context->shp;
 	troot = tdata.sh->alias_tree;
 	if(*argv[0]=='h')
-		flag = NV_TAGGED;
+		flag |= NV_TAGGED;
 	if(argv[1])
 	{
 		opt_info.offset = 0;
@ -170,17 +170,16 @@ int    b_alias(int argc,register char *argv[],Shbltin_t *context)
 			errormsg(SH_DICT,ERROR_usage(2),"%s",optusage(NIL(char*)));
 		argv += (opt_info.index-1);
 	}
-	/* fork a virtual subshell to avoid affecting parent shell's hash/alias table */
-	if((argv[1] || rflag) && tdata.sh->subshell && !tdata.sh->subshare)
-		sh_subfork();
 	/* 'alias -t', 'hash' */
 	if(flag&NV_TAGGED)
 	{
-		troot = tdata.sh->track_tree;	/* use hash table */
+		troot = sh_subtracktree(1);	/* use hash table */
 		tdata.aflag = '-';		/* make setall() treat 'hash' like 'alias -t' */
 		if(rflag)			/* hash -r: clear hash table */
 			nv_scan(troot,nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
 	}
+	else if(argv[1] && tdata.sh->subshell && !tdata.sh->subshare)
+		sh_subfork();			/* avoid affecting main shell's alias table */
 	return(setall(argv,flag,troot,&tdata));
 }

@ -642,7 +641,7 @@ static int     setall(char **argv,register int flag,Dt_t *troot,struct tdata *tp
 			/* tracked alias */
 			if(troot==shp->track_tree && tp->aflag=='-')
 			{
-				np = nv_search(name,troot,NV_ADD);
+				np = nv_search(name,troot,NV_ADD|HASH_NOSCOPE);
 				path_alias(np,path_absolute(shp,nv_name(np),NIL(Pathcomp_t*),0));
 				continue;
 			}
@ -1206,31 +1205,6 @@ static int unall(int argc, char **argv, register Dt_t *troot, Shell_t* shp)
 		dtclear(troot);
 		return(r);
 	}
-
-	/*
-	 * Unsetting the PATH in a non-forked subshell will cause the parent shell's hash table to be reset,
-	 * so fork to work around the problem. To avoid crashing, this must be done before calling sh_pushcontext(),
-	 * so we need to loop through the args separately and check if any node is equal to PATHNOD.
-	 */
-	if(shp->subshell && !shp->subshare && troot==shp->var_tree)
-	{
-		char **argv_tmp = argv;
-		while(name = *argv_tmp++)
-		{
-			np=nv_open(name,troot,NV_NOADD|nflag);
-			if(!np)
-				continue;
-			/* Are we changing the PATH? */
-			if(np==PATHNOD)
-			{
-				nv_close(np);
-				sh_subfork();
-				break;
-			}
-			nv_close(np);
-		}
-	}
-
 	sh_pushcontext(shp,&buff,1);
 	while(name = *argv++)
 	{
--- a/src/cmd/ksh93/data/builtins.c
+++ b/src/cmd/ksh93/data/builtins.c
@ -903,7 +903,7 @@ _JOB_
 ;

 const char sh_opthash[] =
-"[-1c?\n@(#)$Id: hash (ksh93) 2020-06-10 $\n]"
+"[-1c?\n@(#)$Id: hash (ksh93) 2021-01-07 $\n]"
 "[+NAME?hash - display the locations of recently used programs]"
 "[+DESCRIPTION?\bhash\b displays or modifies the hash table with the "
 	"locations of recently used programs. If given no arguments, it lists "
--- a/src/cmd/ksh93/include/defs.h
+++ b/src/cmd/ksh93/include/defs.h
@ -390,6 +390,7 @@ extern Sfio_t		*sh_sfeval(char*[]);
 extern void		sh_setmatch(Shell_t*,const char*,int,int,int[],int);
 extern void             sh_scope(Shell_t*, struct argnod*, int);
 extern Namval_t		*sh_scoped(Shell_t*, Namval_t*);
+extern Dt_t		*sh_subtracktree(int);
 extern Dt_t		*sh_subfuntree(int);
 extern void		sh_subjobcheck(pid_t);
 extern int		sh_subsavefd(int);
--- a/src/cmd/ksh93/sh/init.c
+++ b/src/cmd/ksh93/sh/init.c
@ -317,7 +317,7 @@ static void put_restricted(register Namval_t* np,const char *val,int flags,Namfu
 	if(np==PATHNOD	|| (path_scoped=(strcmp(name,PATHNOD->nvname)==0)))		
 	{
 		/* Clear the hash table */
-		nv_scan(shp->track_tree,nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
+		nv_scan(sh_subtracktree(1),nv_rehash,(void*)0,NV_TAGGED,NV_TAGGED);
 		if(path_scoped && !val)
 			val = PATHNOD->nvalue.cp;
 	}
--- a/src/cmd/ksh93/sh/name.c
+++ b/src/cmd/ksh93/sh/name.c
@ -1570,17 +1570,6 @@ void nv_putval(register Namval_t *np, const char *string, int flags)
 #endif /* SHOPT_FIXEDARRAY */
 	if(!(flags&NV_RDONLY) && nv_isattr (np, NV_RDONLY))
 		errormsg(SH_DICT,ERROR_exit(1),e_readonly, nv_name(np));
-
-	/*
-	 * Resetting the PATH in a non-forking subshell will reset the parent shell's
-	 * hash table, so work around the problem by forking before sh_assignok
-	 * -- however, don't do this if we were told to ignore the variable's readonly state (i.e.
-	 * if the NV_RDONLY flag is set), because that means we're restoring the parent shell state
-	 * after exiting a virtual subshell, and we would end up forking the parent shell instead.
-	 */
-	if(np == PATHNOD && !(flags & NV_RDONLY) && shp->subshell && !shp->subshare)
-		sh_subfork();
-
 	/*
 	 * The following could cause the shell to fork if assignment
 	 * would cause a side effect
@ -2244,6 +2233,11 @@ static int scanfilter(Dt_t *dict, void *arg, void *data)
 void nv_rehash(register Namval_t *np,void *data)
 {
 	NOT_USED(data);
+	if(sh.subshell)
+	{
+		/* invalidate subshell node; if none exists, add a dummy */
+		np = nv_search(nv_name(np),sh.track_tree,NV_ADD|HASH_NOSCOPE);
+	}
 	nv_onattr(np,NV_NOALIAS);
 }

--- a/src/cmd/ksh93/sh/path.c
+++ b/src/cmd/ksh93/sh/path.c
@ -738,7 +738,7 @@ int	path_search(Shell_t *shp,register const char *name,Pathcomp_t **oldpp, int f
 	}
 	else if(pp && !sh_isstate(SH_DEFPATH) && *name!='/' && flag<3)
 	{
-		if(np=nv_search(name,shp->track_tree,NV_ADD))
+		if(np=nv_search(name,sh_subtracktree(1),NV_ADD|HASH_NOSCOPE))
 			path_alias(np,pp);
 	}
 	return(0);
--- a/src/cmd/ksh93/sh/subshell.c
+++ b/src/cmd/ksh93/sh/subshell.c
@ -75,6 +75,7 @@ static struct subshell
 	Dt_t		*var;	/* variable table at time of subshell */
 	struct Link	*svar;	/* save shell variable table */
 	Dt_t		*sfun;	/* function scope for subshell */
+	Dt_t		*strack;/* tracked alias scope for subshell */
 	Pathcomp_t	*pathlist; /* for PATH variable */
 	struct Error_context_s *errcontext;
 	Shopt_t		options;/* save shell options */
@ -393,6 +394,22 @@ static void nv_restore(struct subshell *sp)
 	sp->shpwd=save;
 }

+/*
+ * Return pointer to tracked alias tree (a.k.a. hash table, i.e. cached $PATH search results).
+ * Create new one if in a subshell and one doesn't exist and 'create' is non-zero.
+ */
+Dt_t *sh_subtracktree(int create)
+{
+	register struct subshell *sp = subshell_data;
+	if(create && sh.subshell && !sh.subshare && sp && !sp->strack)
+	{
+		sp->strack = dtopen(&_Nvdisc,Dtset);
+		dtview(sp->strack,sh.track_tree);
+		sh.track_tree = sp->strack;
+	}
+	return(sh.track_tree);
+}
+
 /*
 * return pointer to function tree
 * create new one if in a subshell and one doesn't exist and create is non-zero
@ -400,15 +417,13 @@ static void nv_restore(struct subshell *sp)
 Dt_t *sh_subfuntree(int create)
 {
 	register struct subshell *sp = subshell_data;
-	if(!sp || sp->shp->curenv==0)
-		return(sh.fun_tree);
-	if(!sp->sfun && create && !sp->shp->subshare)
+	if(create && sh.subshell && !sh.subshare && sp && !sp->sfun)
 	{
 		sp->sfun = dtopen(&_Nvdisc,Dtoset);
-		dtview(sp->sfun,sp->shp->fun_tree);
-		sp->shp->fun_tree = sp->sfun;
+		dtview(sp->sfun,sh.fun_tree);
+		sh.fun_tree = sp->sfun;
 	}
-	return(sp->shp->fun_tree);
+	return(sh.fun_tree);
 }

 /*
@ -743,6 +758,29 @@ Sfio_t *sh_subshell(Shell_t *shp,Shnode_t *t, volatile int flags, int comsub)
 	{
 		int n;
 		shp->options = sp->options;
+		/* Clean up subshell hash table. */
+		if(sp->strack)
+		{
+			Namval_t *np, *prev_np;
+			/* Detach this scope from the unified view. */
+			shp->track_tree = dtview(sp->strack,0);
+			/* Delete (free) all elements of the subshell hash table. To allow dtnext() to
+			   set the pointer to the next item, we have to delete one item beind the loop. */
+			prev_np = 0;
+			np = (Namval_t*)dtfirst(sp->strack);
+			while(np)
+			{
+				if(prev_np)
+					nv_delete(prev_np,sp->strack,0);
+				prev_np = np;
+				np = (Namval_t*)dtnext(sp->strack,np);
+			}
+			if(prev_np)
+				nv_delete(prev_np,sp->strack,0);
+			/* Close and free the table itself. */
+			dtclose(sp->strack);
+		}
+		/* Clean up subshell function table. */
 		if(sp->sfun)
 		{
 			shp->fun_tree = dtview(sp->sfun,0);
--- a/src/cmd/ksh93/tests/leaks.sh
+++ b/src/cmd/ksh93/tests/leaks.sh
@ -311,5 +311,103 @@ unset -f func1
 unset -v FPATH
 err_exit_if_leak 'function call via autoload in command substitution'

+# ======
+
+# add some random utilities to the hash table to detect memory leak on hash table reset when changing PATH
+random_utils=(chmod cp mv awk sed diff comm cut sort uniq date env find mkdir rmdir pr sleep)
+
+save_PATH=$PATH
+hash "${random_utils[@]}"
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	hash -r
+	hash "${random_utils[@]}"
+done
+after=$(getmem)
+err_exit_if_leak 'clear hash table (hash -r) in main shell'
+
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	PATH=/dev/null
+	PATH=$save_PATH
+	hash "${random_utils[@]}"
+done
+after=$(getmem)
+err_exit_if_leak 'set PATH value in main shell'
+
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	PATH=/dev/null command true
+done
+after=$(getmem)
+err_exit_if_leak 'run command with preceding PATH assignment in main shell'
+
+: <<'disabled'	# TODO: known leak (approx 73552 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	typeset -A PATH
+	unset PATH
+	PATH=$save_PATH
+	hash "${random_utils[@]}"
+done
+after=$(getmem)
+err_exit_if_leak 'set PATH attribute in main shell'
+disabled
+
+: <<'disabled'	# TODO: known leak (approx 99568 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	unset PATH
+	PATH=$save_PATH
+	hash "${random_utils[@]}"
+done
+after=$(getmem)
+err_exit_if_leak 'unset PATH in main shell'
+disabled
+
+hash "${random_utils[@]}"
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	(hash -r)
+done
+after=$(getmem)
+err_exit_if_leak 'clear hash table (hash -r) in subshell'
+
+: <<'disabled'	# TODO: known leak (approx 123520 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	(PATH=/dev/null)
+done
+after=$(getmem)
+err_exit_if_leak 'set PATH value in subshell'
+disabled
+
+: <<'disabled'	# TODO: known leak (approx 24544 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	(PATH=/dev/null command true)
+done
+after=$(getmem)
+err_exit_if_leak 'run command with preceding PATH assignment in subshell'
+disabled
+
+: <<'disabled'	# TODO: known leak (approx 131200 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	(readonly PATH)
+done
+after=$(getmem)
+err_exit_if_leak 'set PATH attribute in subshell'
+disabled
+
+: <<'disabled'	# TODO: known leak (approx 229440 bytes after 512 iterations)
+before=$(getmem)
+for ((i=0; i < N; i++))
+do	(unset PATH)
+done
+after=$(getmem)
+err_exit_if_leak 'unset PATH in subshell'
+disabled
+
 # ======
 exit $((Errors<125?Errors:125))
--- a/src/cmd/ksh93/tests/subshell.sh
+++ b/src/cmd/ksh93/tests/subshell.sh
@ -719,19 +719,24 @@ v=${ eval 'al'; alias al='echo subshare'; } && [[ $v == 'mainalias' && $(eval 'a
 # Resetting a subshell's hash table should not affect the parent shell
 check_hash_table()
 {
-	[[ $(hash) ]] || err_\exit $1 $'resetting the hash table in a subshell affects the parent shell\'s hash table'
+	[[ -n ${ hash; } ]] || err_\exit $1 "resetting the hash table in a subshell affects the parent shell's hash table"
 	# Ensure the hash table isn't empty before the next test is run
 	hash -r chmod
 }
-
 (hash -r); check_hash_table $LINENO					# err_exit (count me)
 (PATH="$PATH"); check_hash_table $LINENO				# err_exit (count me)
 (unset PATH); check_hash_table $LINENO					# err_exit (count me)
 (nameref PATH_TWO=PATH; unset PATH_TWO); check_hash_table $LINENO	# err_exit (count me)

 # Adding a utility to a subshell's hash table should not affect the parent shell
-(hash cat)
-[[ $(hash) == "chmod=$(whence -p chmod)" ]] || err_exit $'changes to a subshell\'s hash table affect the parent shell'
+hash -r
+(hash ls)
+[[ -z ${ hash; } ]] || err_exit "changes to a subshell's hash table affect the parent shell"
+
+# Running a command in a subshell should not affect the parent shell's hash table
+hash -r
+(ls /dev/null >/dev/null)
+[[ -z ${ hash; } ]] || err_exit "command run in subshell added to parent shell's hash table"

 # ======
 # Variables set in functions inside of a virtual subshell should not affect the
@ -747,17 +752,19 @@ EOF
 v=$($SHELL $testvars) && [[ "$v" == "a= b= c=0" ]] || err_exit 'variables set in subshells are not confined to the subshell'

 # ======
-# Setting PATH in virtual subshell should trigger a fork; restoring PATH after leaving virtual subshell should not.
-SHELL=$SHELL "$SHELL" -c '
+got=$("$SHELL" -c '
+	PATH=/dev/null/1
 	(
 		echo "DEBUG ${.sh.pid}"
+		PATH=/dev/null/2
 		readonly PATH
 		echo "DEBUG ${.sh.pid}"
 	)
-	echo "DEBUG ${.sh.pid}"
+	[[ $PATH == /dev/null/1 ]] && echo "DEBUG ${.sh.pid}" || echo "DEBUG -1 == wrong"
 	: extra command to disable "-c" exec optimization
-' | awk '/^DEBUG/ { pid[NR] = $2; }  END { exit !(pid[1] == pid[2] && pid[2] == pid[3]); }' \
-|| err_exit "setting PATH to readonly in subshell triggers an erroneous fork"
+' 2>&1) \
+&& awk '/^DEBUG/ { pid[NR] = $2; }  END { exit !(pid[1] == pid[2] && pid[2] == pid[3]); }' <<<"$got" \
+|| err_exit "setting PATH to a readonly value in a subshell either fails or forks (got $(printf %q "$got"))"

 # ======
 # Test command substitution with external command in here-document