From aad74597f7e7bf5ee3a58d59f964e5880731fb20 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Sat, 6 Mar 2021 18:45:42 +0000 Subject: [PATCH] Fixes for -G/--globstar (re: 5312a59d) The fix for '.' and '..' in regular globbing broke '.' and '..' in globstar. No globstar pattern that contains '.' or '..' as any pathname component still matched. This commit fixes that. This commit also makes symlink/** mostly work, which it never has done in any ksh93 version. It is correct and expected that symlinks found by patterns are not resolved, but symlinks were not resolved even when specified as explicit non-pattern pathname components. For example, /tmp/** breaks if /tmp is a symlink (e.g. on macOS), which looks like a bug. src/lib/libast/include/glob.h, src/lib/libast/misc/glob.c: glob_dir(): - Make symlink/** work. we can check if the string pointed to by pat is exactly equal to *. If so, we are doing regular globbing for that particular pathname element, and it's okay to resolve symlinks. If not (if it's **), we're doing globstar and we should not be matching symlinks. - Let's also introduce proper identification of symlinks (GLOB_SYM) and not lump them in with other special files (GLOB_DEV). - Fix the bug with literal '.' and '..' components in globstar patterns. In preceding code, the matchdir pointer gets set to the complete glob pattern if we're doing globstar for the current pathname element, null if not. The pat pointer gets set to the elements of the pattern that are still left to be processed; already-done elements are trimmed from it by increasing the pointer. So, to do the right thing, we need to make sure that '.' or '..' is skipped if, and only if, it is the final element in the pattern (i.e., if pat does not contain a slash) and is not specified literally as '.' or '..', i.e., only if '.' or '..' was actually resolved from a glob pattern. After this change, '**/.*', '**/../.*', etc. do the right thing, showing all your hidden files and directories without undesirable '.' and '..' results; '.' and '..' are skipped as final elements, unless you literally specify '**/.', '**/..', '**/foo/bar/..', etc. src/cmd/ksh93/COMPATIBILITY: - Note the symlink/** globstar change. src/cmd/ksh93/sh.1: - Try to document the current globstar behaviour more exhausively. src/cmd/ksh93/tests/glob.sh: - Add tests. Try to cover all the corner cases. src/cmd/ksh93/tests/shtests: - Since tests in glob.sh do not use err_exit, they were not counted. Special-case glob.sh for counting the tests: count the lines starting with a test_* function call. Resolves: https://github.com/ksh93/ksh/issues/146 --- NEWS | 10 +++++++ src/cmd/ksh93/COMPATIBILITY | 4 +++ src/cmd/ksh93/include/version.h | 2 +- src/cmd/ksh93/sh.1 | 36 ++++++++++++++++++++----- src/cmd/ksh93/tests/glob.sh | 48 ++++++++++++++++++++++++++++++++- src/cmd/ksh93/tests/shtests | 7 +++-- src/lib/libast/include/glob.h | 3 ++- src/lib/libast/misc/glob.c | 13 ++++++--- 8 files changed, 108 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 9a9e9570b..8ddc9ec84 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,16 @@ For full details, see the git log at: https://github.com/ksh93/ksh Any uppercase BUG_* names are modernish shell bug IDs. +2021-03-07: + +- Fixed a bug with -G/--globstar introduced on 2020-08-09: patterns did not + match anything if any pathname component was '.' or '..', e.g. '**/./glob.c' + never matched. The 2020-08-09 fix does still apply to patterns like '.*'. + +- Enhancement to -G/--globstar: symbolic links to directories are now followed + if they match a normal (non-**) glob pattern. For example, if '/lnk' is a + symlink to a directory, '/lnk/**' and '/l?k/**' now work as you would expect. + 2021-03-06: - Fixed an old expansion bug: expansions of type ${var=value} and ${var:=value} diff --git a/src/cmd/ksh93/COMPATIBILITY b/src/cmd/ksh93/COMPATIBILITY index 7c7585c9e..ef8c156e6 100644 --- a/src/cmd/ksh93/COMPATIBILITY +++ b/src/cmd/ksh93/COMPATIBILITY @@ -92,6 +92,10 @@ For more details, see the NEWS file and for complete details, see the git log. 16. Unbalanced quotes and backticks now correctly produce a syntax error in -c scripts, 'eval', and backtick-style command substitutions. +17. -G/--globstar: Symbolic links to directories are now followed if they + match a normal (non-**) glob pattern. For example, if '/lnk' is a + symlink to a directory, '/lnk/**' and '/l?k/**' now work as expected. + ____________________________________________________________________________ KSH-93 VS. KSH-88 diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index 485454d96..6c6490bd5 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -20,7 +20,7 @@ #define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */ #define SH_RELEASE_SVER "1.0.0-alpha" /* semantic version number: https://semver.org */ -#define SH_RELEASE_DATE "2021-03-06" /* must be in this format for $((.sh.version)) */ +#define SH_RELEASE_DATE "2021-03-07" /* must be in this format for $((.sh.version)) */ #define SH_RELEASE_CPYR "(c) 2020-2021 Contributors to ksh " SH_RELEASE_FORK /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */ diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index ec0e48973..30fcbe597 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -6942,13 +6942,37 @@ Requires to truncate a file when turned on. .TP 8 .B \-G -Causes the pattern +Enables recursive file name generation. +This adds the double-star pattern .B ** -by itself to match files and zero or more directories and sub-directories -when used for file name generation. -If followed by a -.B / -only directories and sub-directories are matched. +to the file generation mechanism (see +.I "File Name Generation\^" +above). +By itself, it matches the recursive contents of the current directory, +which is to say, all files and directories in the current directory +and in all its subdirectories, sub-subdirectories, and so on. +If the pathname pattern ends in +.BR **/ , +only directories and subdirectories are matched, +including symbolic links that point to directories. +A prefixed directory name is not included in the results unless that +directory was itself found by a pattern. For example, +.B dir/** +matches the recursive contents of +.B dir +but not +.B dir +itself, whereas +.B di[r]/** +matches both +.B dir +itself and the recursive contents of +.BR dir . +Symbolic links to non-directories are not followed. +Symbolic links to directories are followed if they are specified literally +or match a pattern as described under +.IR "File Name Generation\^" , +but not if they result from a double-star pattern. .TP 8 .B \-H Enable \f3!\fP-style history expansion similar to diff --git a/src/cmd/ksh93/tests/glob.sh b/src/cmd/ksh93/tests/glob.sh index c96003d24..17c5cf4fd 100755 --- a/src/cmd/ksh93/tests/glob.sh +++ b/src/cmd/ksh93/tests/glob.sh @@ -66,7 +66,7 @@ function test_glob fi fi if [[ $got != "$expected" ]] - then 'err_exit' $lineno "glob -- expected '$expected', got '$got'" + then 'err_exit' $lineno "glob${ [[ -o globstar ]] && print star; } -- expected '$expected', got '$got'" fi } alias test_glob='test_glob $LINENO' @@ -377,4 +377,50 @@ test_sub '//@(!(a))/[\1]' '[aha]' test_sub '/@(!(aha))/[\1]' '[ah]a' test_sub '//@(!(aha))/[\1]' '[ah][a]' +# ====== +# Recursive double-star globbing (globstar) tests +set --glob --globstar +mkdir -p d_un/d_duo/d_tres/d_quatro d_un/d_duo/d_3/d_4 +touch d_un/d_duo/.tres +ln -s d_duo d_un/d_sym + +# As of commit 5312a59d, globstar failed to expand **/. or **/.. or **/./file or **/../file +# https://github.com/ksh93/ksh/issues/146#issuecomment-790845391 +test_glob \ + ' ' \ + d_un/**/. +test_glob \ + ' ' \ + d_un/**/.. +test_glob \ + ' ' \ + d_un/**/./d_* +test_glob \ + ' '\ +' ' \ + d_un/**/../d_* +test_glob '' d_un/**/.* +test_glob ' ' d_un/*/**/../.* +test_glob \ + ' ' \ + d_un/**/*/../.* +test_glob ' ' d_un/./**/./*/./.././.* + +# New in 93u+m 2021-03-06: follow symlink to directory if specified literally or matched by a regular glob pattern component +# https://github.com/ksh93/ksh/issues/146#issuecomment-792142794 +test_glob ' ' d_un/d_sym/** +test_glob ' ' d_un/d_sy[m]/** +test_glob '' d_un/d_sym/d_3/** +test_glob '' d_un/d_sy[m]/d_3/** +test_glob ' ' **/d_duo/** +test_glob ' ' **/d_sym/** +test_glob ' ' **/d_s[y]m/** +test_glob ' ' **/d_*ym/** +test_glob ' ' **/d_sym//** +test_glob ' ' **/d_[s]ym//** +test_glob ' ' **/d_*ym//** + +set --noglobstar + +# ====== exit $((Errors<125?Errors:125)) diff --git a/src/cmd/ksh93/tests/shtests b/src/cmd/ksh93/tests/shtests index de88b2192..922caa03f 100755 --- a/src/cmd/ksh93/tests/shtests +++ b/src/cmd/ksh93/tests/shtests @@ -8,7 +8,7 @@ valgrindflags='--xml=yes --log-file=/dev/null --track-origins=yes --read-var-inf USAGE=$' [-s8? -@(#)$Id: shtests (ksh 93u+m) 2021-02-04 $ +@(#)$Id: shtests (ksh 93u+m) 2021-03-06 $ ] [-author?David Korn ] [-author?Glenn Fowler ] @@ -341,7 +341,10 @@ do [[ $i == *.sh ]] || i+='.sh' (( ++total_e )) continue fi - t=$(grep -c err_exit $i) + t=$( case $i in + glob.sh) grep -c '^[[:blank:]]*test_[a-z]\{3,\}' $i ;; + *) grep -c err_exit $i ;; + esac ) if (( t > 2 )) then (( t = t - 2 )) fi diff --git a/src/lib/libast/include/glob.h b/src/lib/libast/include/glob.h index aa82c2acb..301ff531b 100644 --- a/src/lib/libast/include/glob.h +++ b/src/lib/libast/include/glob.h @@ -121,10 +121,11 @@ struct _glob_ /* gl_type return */ #define GLOB_NOTFOUND 0 /* does not exist */ -#define GLOB_DEV 1 /* exists but not DIR EXE REG */ #define GLOB_DIR 2 /* directory */ #define GLOB_EXE 3 /* executable regular file */ #define GLOB_REG 4 /* regular file */ +#define GLOB_SYM 5 /* symbolic link */ +#define GLOB_DEV 1 /* exists but none of the above */ /* error return values */ #define GLOB_ABORTED 1 diff --git a/src/lib/libast/misc/glob.c b/src/lib/libast/misc/glob.c index b9fcea38d..267283b2d 100644 --- a/src/lib/libast/misc/glob.c +++ b/src/lib/libast/misc/glob.c @@ -125,6 +125,8 @@ gl_type(glob_t* gp, const char* path, int flags) type = 0; else if (S_ISDIR(st.st_mode)) type = GLOB_DIR; + else if (S_ISLNK(st.st_mode)) + type = GLOB_SYM; else if (!S_ISREG(st.st_mode)) type = GLOB_DEV; else if (st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) @@ -479,7 +481,9 @@ skip: break; prefix = streq(dirname, ".") ? (char*)0 : dirname; } - if ((!starstar && !gp->gl_starstar || (*gp->gl_type)(gp, dirname, GLOB_STARSTAR) == GLOB_DIR) && (dirf = (*gp->gl_diropen)(gp, dirname))) + if ((!starstar && !gp->gl_starstar || (t1 = (*gp->gl_type)(gp, dirname, GLOB_STARSTAR)) == GLOB_DIR + || t1 == GLOB_SYM && pat[0]=='*' && pat[1]=='\0') /* follow symlinks to dirs for non-globstar components */ + && (dirf = (*gp->gl_diropen)(gp, dirname))) { if (!(gp->re_flags & REG_ICASE) && ((*gp->gl_attr)(gp, dirname, 0) & GLOB_ICASE)) { @@ -528,13 +532,14 @@ skip: *restore2 = gp->gl_delim; while ((name = (*gp->gl_dirnext)(gp, dirf)) && !*gp->gl_intr) { - if (name[0] == '.' && (!name[1] || name[1] == '.' && !name[2])) - continue; /* do not ever match '.' or '..' */ + if (!(matchdir && (pat[0] == '.' && (!pat[1] || pat[1] == '.' && !pat[2]) || strchr(pat,'/'))) + && name[0] == '.' && (!name[1] || name[1] == '.' && !name[2])) + continue; /* never match '.' or '..' as final element unless specified literally */ if (notdir = (gp->gl_status & GLOB_NOTDIR)) gp->gl_status &= ~GLOB_NOTDIR; if (ire && !regexec(ire, name, 0, NiL, 0)) continue; - if (matchdir && !notdir) + if (matchdir && (name[0] != '.' || name[1] && (name[1] != '.' || name[2])) && !notdir) addmatch(gp, prefix, name, matchdir, NiL, anymeta); if (!regexec(pre, name, 0, NiL, 0)) {