1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-15 04:32:24 +00:00

Fixes for -G/--globstar (re: 5312a59d)

The fix for '.' and '..' in regular globbing broke '.' and '..' in
globstar. No globstar pattern that contains '.' or '..' as any
pathname component still matched. This commit fixes that.

This commit also makes symlink/** mostly work, which it never has
done in any ksh93 version. It is correct and expected that symlinks
found by patterns are not resolved, but symlinks were not resolved
even when specified as explicit non-pattern pathname components.
For example, /tmp/** breaks if /tmp is a symlink (e.g. on macOS),
which looks like a bug.

src/lib/libast/include/glob.h,
src/lib/libast/misc/glob.c: glob_dir():
- Make symlink/** work. we can check if the string pointed to by
  pat is exactly equal to *. If so, we are doing regular globbing
  for that particular pathname element, and it's okay to resolve
  symlinks. If not (if it's **), we're doing globstar and we should
  not be matching symlinks.
- Let's also introduce proper identification of symlinks (GLOB_SYM)
  and not lump them in with other special files (GLOB_DEV).
- Fix the bug with literal '.' and '..' components in globstar
  patterns. In preceding code, the matchdir pointer gets set to the
  complete glob pattern if we're doing globstar for the current
  pathname element, null if not. The pat pointer gets set to the
  elements of the pattern that are still left to be processed;
  already-done elements are trimmed from it by increasing the
  pointer. So, to do the right thing, we need to make sure that '.'
  or '..' is skipped if, and only if, it is the final element in
  the pattern (i.e., if pat does not contain a slash) and is not
  specified literally as '.' or '..', i.e., only if '.' or '..' was
  actually resolved from a glob pattern. After this change,
  '**/.*', '**/../.*', etc. do the right thing, showing all your
  hidden files and directories without undesirable '.' and '..'
  results; '.' and '..' are skipped as final elements, unless you
  literally specify '**/.', '**/..', '**/foo/bar/..', etc.

src/cmd/ksh93/COMPATIBILITY:
- Note the symlink/** globstar change.

src/cmd/ksh93/sh.1:
- Try to document the current globstar behaviour more exhausively.

src/cmd/ksh93/tests/glob.sh:
- Add tests. Try to cover all the corner cases.

src/cmd/ksh93/tests/shtests:
- Since tests in glob.sh do not use err_exit, they were not
  counted. Special-case glob.sh for counting the tests: count the
  lines starting with a test_* function call.

Resolves: https://github.com/ksh93/ksh/issues/146
This commit is contained in:
Martijn Dekker 2021-03-06 18:45:42 +00:00
parent 89c69b076d
commit aad74597f7
8 changed files with 108 additions and 15 deletions

10
NEWS
View file

@ -3,6 +3,16 @@ For full details, see the git log at: https://github.com/ksh93/ksh
Any uppercase BUG_* names are modernish shell bug IDs.
2021-03-07:
- Fixed a bug with -G/--globstar introduced on 2020-08-09: patterns did not
match anything if any pathname component was '.' or '..', e.g. '**/./glob.c'
never matched. The 2020-08-09 fix does still apply to patterns like '.*'.
- Enhancement to -G/--globstar: symbolic links to directories are now followed
if they match a normal (non-**) glob pattern. For example, if '/lnk' is a
symlink to a directory, '/lnk/**' and '/l?k/**' now work as you would expect.
2021-03-06:
- Fixed an old expansion bug: expansions of type ${var=value} and ${var:=value}

View file

@ -92,6 +92,10 @@ For more details, see the NEWS file and for complete details, see the git log.
16. Unbalanced quotes and backticks now correctly produce a syntax error
in -c scripts, 'eval', and backtick-style command substitutions.
17. -G/--globstar: Symbolic links to directories are now followed if they
match a normal (non-**) glob pattern. For example, if '/lnk' is a
symlink to a directory, '/lnk/**' and '/l?k/**' now work as expected.
____________________________________________________________________________
KSH-93 VS. KSH-88

View file

@ -20,7 +20,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2021-03-06" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2021-03-07" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -6942,13 +6942,37 @@ Requires
to truncate a file when turned on.
.TP 8
.B \-G
Causes the pattern
Enables recursive file name generation.
This adds the double-star pattern
.B **
by itself to match files and zero or more directories and sub-directories
when used for file name generation.
If followed by a
.B /
only directories and sub-directories are matched.
to the file generation mechanism (see
.I "File Name Generation\^"
above).
By itself, it matches the recursive contents of the current directory,
which is to say, all files and directories in the current directory
and in all its subdirectories, sub-subdirectories, and so on.
If the pathname pattern ends in
.BR **/ ,
only directories and subdirectories are matched,
including symbolic links that point to directories.
A prefixed directory name is not included in the results unless that
directory was itself found by a pattern. For example,
.B dir/**
matches the recursive contents of
.B dir
but not
.B dir
itself, whereas
.B di[r]/**
matches both
.B dir
itself and the recursive contents of
.BR dir .
Symbolic links to non-directories are not followed.
Symbolic links to directories are followed if they are specified literally
or match a pattern as described under
.IR "File Name Generation\^" ,
but not if they result from a double-star pattern.
.TP 8
.B \-H
Enable \f3!\fP-style history expansion similar to

View file

@ -66,7 +66,7 @@ function test_glob
fi
fi
if [[ $got != "$expected" ]]
then 'err_exit' $lineno "glob -- expected '$expected', got '$got'"
then 'err_exit' $lineno "glob${ [[ -o globstar ]] && print star; } -- expected '$expected', got '$got'"
fi
}
alias test_glob='test_glob $LINENO'
@ -377,4 +377,50 @@ test_sub '//@(!(a))/[\1]' '[aha]'
test_sub '/@(!(aha))/[\1]' '[ah]a'
test_sub '//@(!(aha))/[\1]' '[ah][a]'
# ======
# Recursive double-star globbing (globstar) tests
set --glob --globstar
mkdir -p d_un/d_duo/d_tres/d_quatro d_un/d_duo/d_3/d_4
touch d_un/d_duo/.tres
ln -s d_duo d_un/d_sym
# As of commit 5312a59d, globstar failed to expand **/. or **/.. or **/./file or **/../file
# https://github.com/ksh93/ksh/issues/146#issuecomment-790845391
test_glob \
'<d_un/.> <d_un/d_duo/.> <d_un/d_duo/d_3/.> <d_un/d_duo/d_3/d_4/.> <d_un/d_duo/d_tres/.> <d_un/d_duo/d_tres/d_quatro/.>' \
d_un/**/.
test_glob \
'<d_un/..> <d_un/d_duo/..> <d_un/d_duo/d_3/..> <d_un/d_duo/d_3/d_4/..> <d_un/d_duo/d_tres/..> <d_un/d_duo/d_tres/d_quatro/..>' \
d_un/**/..
test_glob \
'<d_un/./d_duo> <d_un/./d_sym> <d_un/d_duo/./d_3> <d_un/d_duo/./d_tres> <d_un/d_duo/d_3/./d_4> <d_un/d_duo/d_tres/./d_quatro>' \
d_un/**/./d_*
test_glob \
'<d_un/../d_un> <d_un/d_duo/../d_duo> <d_un/d_duo/../d_sym> <d_un/d_duo/d_3/../d_3> <d_un/d_duo/d_3/../d_tres>'\
' <d_un/d_duo/d_3/d_4/../d_4> <d_un/d_duo/d_tres/../d_3> <d_un/d_duo/d_tres/../d_tres> <d_un/d_duo/d_tres/d_quatro/../d_quatro>' \
d_un/**/../d_*
test_glob '<d_un/d_duo/.tres>' d_un/**/.*
test_glob '<d_un/d_duo/d_3/../.tres> <d_un/d_duo/d_tres/../.tres>' d_un/*/**/../.*
test_glob \
'<d_un/d_duo/d_3/../.tres> <d_un/d_duo/d_tres/../.tres> <d_un/d_sym/d_3/../.tres> <d_un/d_sym/d_tres/../.tres>' \
d_un/**/*/../.*
test_glob '<d_un/./d_duo/./d_3/./.././.tres> <d_un/./d_duo/./d_tres/./.././.tres>' d_un/./**/./*/./.././.*
# New in 93u+m 2021-03-06: follow symlink to directory if specified literally or matched by a regular glob pattern component
# https://github.com/ksh93/ksh/issues/146#issuecomment-792142794
test_glob '<d_un/d_sym/d_3> <d_un/d_sym/d_3/d_4> <d_un/d_sym/d_tres> <d_un/d_sym/d_tres/d_quatro>' d_un/d_sym/**
test_glob '<d_un/d_sym> <d_un/d_sym/d_3> <d_un/d_sym/d_3/d_4> <d_un/d_sym/d_tres> <d_un/d_sym/d_tres/d_quatro>' d_un/d_sy[m]/**
test_glob '<d_un/d_sym/d_3/d_4>' d_un/d_sym/d_3/**
test_glob '<d_un/d_sym/d_3/d_4>' d_un/d_sy[m]/d_3/**
test_glob '<d_un/d_duo> <d_un/d_duo/d_3> <d_un/d_duo/d_3/d_4> <d_un/d_duo/d_tres> <d_un/d_duo/d_tres/d_quatro>' **/d_duo/**
test_glob '<d_un/d_sym> <d_un/d_sym/d_3> <d_un/d_sym/d_3/d_4> <d_un/d_sym/d_tres> <d_un/d_sym/d_tres/d_quatro>' **/d_sym/**
test_glob '<d_un/d_sym> <d_un/d_sym/d_3> <d_un/d_sym/d_3/d_4> <d_un/d_sym/d_tres> <d_un/d_sym/d_tres/d_quatro>' **/d_s[y]m/**
test_glob '<d_un/d_sym> <d_un/d_sym/d_3> <d_un/d_sym/d_3/d_4> <d_un/d_sym/d_tres> <d_un/d_sym/d_tres/d_quatro>' **/d_*ym/**
test_glob '<d_un/d_sym//d_3> <d_un/d_sym//d_3/d_4> <d_un/d_sym//d_tres> <d_un/d_sym//d_tres/d_quatro>' **/d_sym//**
test_glob '<d_un/d_sym//d_3> <d_un/d_sym//d_3/d_4> <d_un/d_sym//d_tres> <d_un/d_sym//d_tres/d_quatro>' **/d_[s]ym//**
test_glob '<d_un/d_sym//d_3> <d_un/d_sym//d_3/d_4> <d_un/d_sym//d_tres> <d_un/d_sym//d_tres/d_quatro>' **/d_*ym//**
set --noglobstar
# ======
exit $((Errors<125?Errors:125))

View file

@ -8,7 +8,7 @@ valgrindflags='--xml=yes --log-file=/dev/null --track-origins=yes --read-var-inf
USAGE=$'
[-s8?
@(#)$Id: shtests (ksh 93u+m) 2021-02-04 $
@(#)$Id: shtests (ksh 93u+m) 2021-03-06 $
]
[-author?David Korn <dgk@research.att.com>]
[-author?Glenn Fowler <gsf@research.att.com>]
@ -341,7 +341,10 @@ do [[ $i == *.sh ]] || i+='.sh'
(( ++total_e ))
continue
fi
t=$(grep -c err_exit $i)
t=$( case $i in
glob.sh) grep -c '^[[:blank:]]*test_[a-z]\{3,\}' $i ;;
*) grep -c err_exit $i ;;
esac )
if (( t > 2 ))
then (( t = t - 2 ))
fi

View file

@ -121,10 +121,11 @@ struct _glob_
/* gl_type return */
#define GLOB_NOTFOUND 0 /* does not exist */
#define GLOB_DEV 1 /* exists but not DIR EXE REG */
#define GLOB_DIR 2 /* directory */
#define GLOB_EXE 3 /* executable regular file */
#define GLOB_REG 4 /* regular file */
#define GLOB_SYM 5 /* symbolic link */
#define GLOB_DEV 1 /* exists but none of the above */
/* error return values */
#define GLOB_ABORTED 1

View file

@ -125,6 +125,8 @@ gl_type(glob_t* gp, const char* path, int flags)
type = 0;
else if (S_ISDIR(st.st_mode))
type = GLOB_DIR;
else if (S_ISLNK(st.st_mode))
type = GLOB_SYM;
else if (!S_ISREG(st.st_mode))
type = GLOB_DEV;
else if (st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))
@ -479,7 +481,9 @@ skip:
break;
prefix = streq(dirname, ".") ? (char*)0 : dirname;
}
if ((!starstar && !gp->gl_starstar || (*gp->gl_type)(gp, dirname, GLOB_STARSTAR) == GLOB_DIR) && (dirf = (*gp->gl_diropen)(gp, dirname)))
if ((!starstar && !gp->gl_starstar || (t1 = (*gp->gl_type)(gp, dirname, GLOB_STARSTAR)) == GLOB_DIR
|| t1 == GLOB_SYM && pat[0]=='*' && pat[1]=='\0') /* follow symlinks to dirs for non-globstar components */
&& (dirf = (*gp->gl_diropen)(gp, dirname)))
{
if (!(gp->re_flags & REG_ICASE) && ((*gp->gl_attr)(gp, dirname, 0) & GLOB_ICASE))
{
@ -528,13 +532,14 @@ skip:
*restore2 = gp->gl_delim;
while ((name = (*gp->gl_dirnext)(gp, dirf)) && !*gp->gl_intr)
{
if (name[0] == '.' && (!name[1] || name[1] == '.' && !name[2]))
continue; /* do not ever match '.' or '..' */
if (!(matchdir && (pat[0] == '.' && (!pat[1] || pat[1] == '.' && !pat[2]) || strchr(pat,'/')))
&& name[0] == '.' && (!name[1] || name[1] == '.' && !name[2]))
continue; /* never match '.' or '..' as final element unless specified literally */
if (notdir = (gp->gl_status & GLOB_NOTDIR))
gp->gl_status &= ~GLOB_NOTDIR;
if (ire && !regexec(ire, name, 0, NiL, 0))
continue;
if (matchdir && !notdir)
if (matchdir && (name[0] != '.' || name[1] && (name[1] != '.' || name[2])) && !notdir)
addmatch(gp, prefix, name, matchdir, NiL, anymeta);
if (!regexec(pre, name, 0, NiL, 0))
{