1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-24 23:14:14 +00:00
cde/src/cmd/ksh93/tests/leaks.sh
Martijn Dekker 222515bf08 Implement hash tables for virtual subshells (re: 102868f8, 9d428f8f)
The forking fix implemented in 102868f8 and 9d428f8f, which stops
the main shell's hash table from being cleared if PATH is changed
in a subshell, can cause a significant performance penalty for
certain scripts that do something like

    ( PATH=... command foo )

in a subshell, especially if done repeatedly. This is because the
hash table is cleared (and hence a subshell forks) even for
temporary PATH assignments preceding commands.

It also just plain doesn't work. For instance:

    $ hash -r; (ls) >/dev/null; hash
    ls=/bin/ls

Simply running an external command in a subshell caches the path in
the hash table that is shared with a main shell. To remedy this, we
would have to fork the subshell before forking any external
command. And that would be an unacceptable performance regression.

Virtual subshells do not need to fork when changing PATH if they
get their own hash tables. This commit adds these. The code for
alias subshell trees (which was removed in ec888867 because they
were broken and unneeded) provided the beginning of a template for
their implementation.

src/cmd/ksh93/sh/subshell.c:
- struct subshell: Add strack pointer to subshell hash table.
- Add sh_subtracktree(): return pointer to subshell hash table.
- sh_subfuntree(): Refactor a bit for legibility.
- sh_subshell(): Add code for cleaning up subshell hash table.

src/cmd/ksh93/sh/name.c:
- nv_putval(): Remove code to fork a subshell upon resetting PATH.
- nv_rehash(): When in a subshell, invalidate a hash table entry
  for a subshell by creating the subshell scope if needed, then
  giving that entry the NV_NOALIAS attribute to invalidate it.

src/cmd/ksh93/sh/path.c: path_search():
- To set a tracked alias/hash table entry, use sh_subtracktree()
  and pass the HASH_NOSCOPE flag to nv_search() so that any new
  entries are added to the current subshell table (if any) and do
  not influence any parent scopes.

src/cmd/ksh93/bltins/typeset.c: b_alias():
- b_alias(): For hash table entries, use sh_subtracktree() instead
  of forking a subshell. Keep forking for normal aliases.
- setall(): To set a tracked alias/hash table entry, pass the
  HASH_NOSCOPE flag to nv_search() so that any new entries are
  added to the current subshell table (if any) and do not influence
  any parent scopes.

src/cmd/ksh93/sh/init.c: put_restricted():
- Update code for clearing the hash table (when changing $PATH) to
  use sh_subtracktree().

src/cmd/ksh93/bltins/cd_pwd.c:
- When invalidating path name bindings to relative paths, use the
  subshell hash tree if applicable by calling sh_subtracktree().
- rehash(): Call nv_rehash() instead of _nv_unset()ting the hash
  table entry; this is needed to work correctly in subshells.

src/cmd/ksh93/tests/leaks.sh:
- Add leak tests for various PATH-related operations in the main
  shell and in a virtual subshell.
- Several pre-existing memory leaks are exposed by the new tests
  (I've confirmed these in 93u+). The tests are disabled and marked
  TODO for now, as these bugs have not yet been fixed.

src/cmd/ksh93/tests/subshell.sh:
- Update.

Resolves: https://github.com/ksh93/ksh/issues/66
2021-01-07 22:18:25 +00:00

413 lines
11 KiB
Bash
Executable file

########################################################################
# #
# This software is part of the ast package #
# Copyright (c) 1982-2012 AT&T Intellectual Property #
# and is licensed under the #
# Eclipse Public License, Version 1.0 #
# by AT&T Intellectual Property #
# #
# A copy of the License is available at #
# http://www.eclipse.org/org/documents/epl-v10.html #
# (with md5 checksum b35adb5213ca9657e911e9befb180842) #
# #
# Information and Software Systems Research #
# AT&T Research #
# Florham Park NJ #
# #
# David Korn <dgk@research.att.com> #
# #
########################################################################
function err_exit
{
print -u2 -n "\t"
print -u2 -r ${Command}[$1]: "${@:2}"
let Errors+=1
}
alias err_exit='err_exit $LINENO'
Command=${0##*/}
integer Errors=0
[[ -d $tmp && -w $tmp && $tmp == "$PWD" ]] || { err\_exit "$LINENO" '$tmp not set; run this from shtests. Aborting.'; exit 1; }
# Determine method for running tests.
# The 'vmstate' builtin can be used if ksh was compiled with vmalloc.
if builtin vmstate 2>/dev/null &&
n=$(vmstate --format='%(busy_size)u') &&
let "($n) == ($n) && n > 0" # non-zero number?
then N=512 # number of iterations for each test
unit=bytes
tolerance=$((4*N)) # tolerate 4 bytes per iteration to account for vmalloc artefacts
function getmem
{
vmstate --format='%(busy_size)u'
}
# Otherwise, make do with the nonstandard 'rss' (real resident size) keyword
# of the 'ps' command (the standard 'vsz', virtual size, is not usable).
elif n=$(ps -o rss= -p "$$" 2>/dev/null) &&
let "($n) == ($n) && n > 0"
then N=16384
unit=KiB
tolerance=$((8*N/1024)) # tolerate 8 bytes per iteration to account for malloc/ps artefacts
function getmem
{
ps -o rss= -p "$$"
}
else err\_exit "$LINENO" 'WARNING: cannot find method to measure memory usage; skipping tests'
exit 0
fi
# test for variable reset leak #
function test_reset
{
integer i N=$1
for ((i = 0; i < N; i++))
do u=$i
done
}
# Initialise variables used below to avoid false leaks
before=0 after=0 i=0 u=0
# Check results.
# The function has 'err_exit' in the name so that shtests counts each call as at test.
function err_exit_if_leak
{
if ((after > before + tolerance))
then err\_exit "$1" "$2 (leaked approx $((after - before)) $unit after $N iterations)"
fi
}
alias err_exit_if_leak='err_exit_if_leak "$LINENO"'
# one round to get to steady state -- sensitive to -x
test_reset $N
test_reset $N
before=$(getmem)
test_reset $N
after=$(getmem)
err_exit_if_leak "variable value reset memory leak"
# buffer boundary tests
for exp in 65535 65536
do got=$($SHELL -c 'x=$(printf "%.*c" '$exp' x); print ${#x}' 2>&1)
[[ $got == $exp ]] || err_exit "large command substitution failed -- expected $exp, got $got"
done
data="(v=;sid=;di=;hi=;ti='1328244300';lv='o';id='172.3.161.178';var=(k='conn_num._total';u=;fr=;l='Number of Connections';n='22';t='number';))"
read -C stat <<< "$data"
for ((i=0; i < 8; i++)) # steady state first
do print -r -- "$data" | while read -u$n -C stat; do :; done {n}<&0-
done
before=$(getmem)
for ((i=0; i < N; i++))
do print -r -- "$data"
done | while read -u$n -C stat
do :
done {n}<&0-
after=$(getmem)
err_exit_if_leak "memory leak with read -C when deleting compound variable"
# extra 'read's to get to steady state
for ((i=0; i < 10; i++))
do read -C stat <<< "$data"
done
before=$(getmem)
for ((i=0; i < N; i++))
do read -C stat <<< "$data"
done
after=$(getmem)
err_exit_if_leak "memory leak with read -C when using <<<"
# ======
# Unsetting an associative array shouldn't cause a memory leak
# See https://www.mail-archive.com/ast-users@lists.research.att.com/msg01016.html
typeset -A stuff
before=$(getmem)
for (( i=0; i < N; i++ ))
do
unset stuff[xyz]
typeset -A stuff[xyz]
stuff[xyz][elem0]="data0"
stuff[xyz][elem1]="data1"
stuff[xyz][elem2]="data2"
stuff[xyz][elem3]="data3"
stuff[xyz][elem4]="data4"
done
unset stuff
after=$(getmem)
err_exit_if_leak 'unset of associative array causes memory leak'
# ======
# Memory leak when resetting PATH and clearing hash table
# ...steady memory state:
command -v ls >/dev/null # add something to hash table
PATH=/dev/null true # set/restore PATH & clear hash table
# ...test for leak:
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null true # set/restore PATH & clear hash table
command -v ls # do PATH search, add to hash table
done >/dev/null
after=$(getmem)
err_exit_if_leak 'memory leak on PATH reset before PATH search'
# ...test for another leak that only shows up when building with nmake:
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null true # set/restore PATH & clear hash table
done >/dev/null
after=$(getmem)
err_exit_if_leak 'memory leak on PATH reset'
# ======
# Defining a function in a virtual subshell
# https://github.com/ksh93/ksh/issues/114
unset -f foo
before=$(getmem)
for ((i=0; i < N; i++))
do (function foo { :; }; foo)
done
after=$(getmem)
err_exit_if_leak 'ksh function defined in virtual subshell'
typeset -f foo >/dev/null && err_exit 'ksh function leaks out of subshell'
unset -f foo
before=$(getmem)
for ((i=0; i < N; i++))
do (foo() { :; }; foo)
done
after=$(getmem)
err_exit_if_leak 'POSIX function defined in virtual subshell'
typeset -f foo >/dev/null && err_exit 'POSIX function leaks out of subshell'
# ======
# Sourcing a dot script in a virtual subshell
echo 'echo "$@"' > $tmp/dot.sh
before=$(getmem)
for ((i=0; i < N; i++))
do (. "$tmp/dot.sh" dot one two three >/dev/null)
done
after=$(getmem)
err_exit_if_leak 'script dotted in virtual subshell'
echo 'echo "$@"' > $tmp/dot.sh
before=$(getmem)
for ((i=0; i < N; i++))
do (source "$tmp/dot.sh" source four five six >/dev/null)
done
after=$(getmem)
err_exit_if_leak 'script sourced in virtual subshell'
# ======
# Multiple leaks when using arrays in functions (Red Hat #921455)
# Fix based on: https://src.fedoraproject.org/rpms/ksh/blob/642af4d6/f/ksh-20120801-memlik.patch
# TODO: both of these tests still leak (although much less after the patch) when run in a non-C locale.
saveLANG=$LANG; LANG=C # comment out to test remaining leak (1/2)
function _hash
{
typeset w=([abc]=1 [def]=31534 [xyz]=42)
print -u2 $w 2>&-
# accessing the var will leak
}
before=$(getmem)
for ((i=0; i < N; i++))
do _hash
done
after=$(getmem)
err_exit_if_leak 'associative array in function'
function _array
{
typeset w=(1 31534 42)
print -u2 $w 2>&-
# unset w will prevent leak
}
before=$(getmem)
for ((i=0; i < N; i++))
do _array
done
after=$(getmem)
err_exit_if_leak 'indexed array in function'
LANG=$saveLANG # comment out to test remaining leak (2/2)
# ======
# Memory leak in typeset (Red Hat #1036470)
# Fix based on: https://src.fedoraproject.org/rpms/ksh/blob/642af4d6/f/ksh-20120801-memlik3.patch
# The fix was backported from ksh 93v- beta.
function myFunction
{
typeset toPrint="something"
echo "${toPrint}"
}
state=$(myFunction)
before=$(getmem)
for ((i=0; i < N; i++))
do state=$(myFunction)
done
after=$(getmem)
err_exit_if_leak 'typeset in function called by command substitution'
# ======
# Check that unsetting an alias frees both the node and its value
before=$(getmem)
for ((i=0; i < N; i++))
do alias "test$i=command$i"
unalias "test$i"
done
after=$(getmem)
err_exit_if_leak 'unalias'
# ======
# Red Hat bug rhbz#982142: command substitution leaks
# case1: Nested command substitutions
# (reportedly already fixed in 93u+, but let's keep the test)
before=$(getmem)
for ((i=0; i < N; i++))
do a=`true 1 + \`true 1 + 1\`` # was: a=`expr 1 + \`expr 1 + 1\``
done
after=$(getmem)
err_exit_if_leak 'nested command substitutions'
# case2: Command alias
alias ls='true -ltr' # was: alias ls='ls -ltr'
before=$(getmem)
for ((i=0; i < N; i++))
do eval 'a=`ls`'
done
after=$(getmem)
unalias ls
err_exit_if_leak 'alias in command substitution'
# case3: Function call via autoload
cat >$tmp/func1 <<\EOF
function func1
{
echo "func1 call";
}
EOF
FPATH=$tmp
autoload func1
a=`func1` # steady memory state
before=$(getmem)
for ((i=0; i < N; i++))
do a=`func1`
done
after=$(getmem)
unset -f func1
unset -v FPATH
err_exit_if_leak 'function call via autoload in command substitution'
# ======
# add some random utilities to the hash table to detect memory leak on hash table reset when changing PATH
random_utils=(chmod cp mv awk sed diff comm cut sort uniq date env find mkdir rmdir pr sleep)
save_PATH=$PATH
hash "${random_utils[@]}"
before=$(getmem)
for ((i=0; i < N; i++))
do hash -r
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'clear hash table (hash -r) in main shell'
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'set PATH value in main shell'
before=$(getmem)
for ((i=0; i < N; i++))
do PATH=/dev/null command true
done
after=$(getmem)
err_exit_if_leak 'run command with preceding PATH assignment in main shell'
: <<'disabled' # TODO: known leak (approx 73552 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do typeset -A PATH
unset PATH
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'set PATH attribute in main shell'
disabled
: <<'disabled' # TODO: known leak (approx 99568 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do unset PATH
PATH=$save_PATH
hash "${random_utils[@]}"
done
after=$(getmem)
err_exit_if_leak 'unset PATH in main shell'
disabled
hash "${random_utils[@]}"
before=$(getmem)
for ((i=0; i < N; i++))
do (hash -r)
done
after=$(getmem)
err_exit_if_leak 'clear hash table (hash -r) in subshell'
: <<'disabled' # TODO: known leak (approx 123520 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (PATH=/dev/null)
done
after=$(getmem)
err_exit_if_leak 'set PATH value in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 24544 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (PATH=/dev/null command true)
done
after=$(getmem)
err_exit_if_leak 'run command with preceding PATH assignment in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 131200 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (readonly PATH)
done
after=$(getmem)
err_exit_if_leak 'set PATH attribute in subshell'
disabled
: <<'disabled' # TODO: known leak (approx 229440 bytes after 512 iterations)
before=$(getmem)
for ((i=0; i < N; i++))
do (unset PATH)
done
after=$(getmem)
err_exit_if_leak 'unset PATH in subshell'
disabled
# ======
exit $((Errors<125?Errors:125))