1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00
cde/src/cmd/ksh93/sh/expand.c
Martijn Dekker 71934570bf Add --globcasedetect shell option for globbing and completion
One of the best-kept secrets of libast/ksh93 is that the code
includes support for case-insensitive file name generation (a.k.a.
pathname expansion, a.k.a. globbing) as well as case-insensitive
file name completion on interactive shells, depending on whether
the file system is case-insensitive or not. This is transparently
determined for each directory, so a path pattern that spans
multiple file systems can be part case-sensitive and part case-
insensitive. In more precise terms, each slash-separated path name
component pattern P is treated as ~(i:P) if its parent directory
exists on a case-insensitive file system. I recently discovered
this while dealing with <https://github.com/ksh93/ksh/issues/223>.

However, that support is dead code on almost all current systems.
It depends on pathconf(2) having a _PC_PATH_ATTRIBUTES selector.
The 'c' attribute is supposedly returned if the given directory is
on a case insensitive file system. There are other attributes as
well (at least 'l', see src/lib/libcmd/rm.c). However, I have been
unable to find any system, current or otherwise, that has
_PC_PATH_ATTRIBUTES. Google and mailing list searches yield no
relevant results at all. If anyone knows of such a system, please
add a comment to this commit on GitHub, or email me.

An exception is Cygwin/Windows, on which the "c" attribute was
simply hardcoded, so globbing/completion is always case-
insensitive. As of Windows 10, that is wrong, as it added the
possibility to mount case-sensitive file systems.

On the other hand, this was never activated on the Mac, even
though macOS has always used a case-insensitive file like Windows.
But, being UNIX, it can also mount case-sensitive file systems.

Finally, Linux added the possibility to create individual case-
insensitive ext4 directories fairly recently, in version 5.2.
https://www.collabora.com/news-and-blog/blog/2020/08/27/using-the-linux-kernel-case-insensitive-feature-in-ext4/

So, since this functionality latently exists in the code base, and
three popular OSs now have relevant file system support, we might
as well make it usable on those systems. It's a nice idea, as it
intuitively makes sense for globbing and completion behaviour to
auto-adapt to file system case insensitivity on a per-directory
basis. No other shell does this, so it's a nice selling point, too.

However, the way it is coded, this is activated unconditionally on
supported systems. That is not a good idea. It will surprise users.
Since globbing is used with commands like 'rm', we do not want
surprises. So this commit makes it conditional upon a new shell
option called 'globcasedetect'. This option is only compiled into
ksh on systems where we can actually detect FS case insensitivity.

To implement this, libast needs some public API additions first.

*** libast changes ***

src/lib/libast/features/lib:
- Add probes for the linux/fs.h and sys/ioctl.h headers.
  Linux needs these to use ioctl(2) in pathicase(3) (see below).

src/lib/libast/path/pathicase.c,
src/lib/libast/include/ast.h,
src/lib/libast/man/path.3,
src/lib/libast/Mamfile:
- Add new pathicase(3) public API function. This uses whatever
  OS-specific method it can detect at compile time to determine if
  a particular path is on a case-insensitive file system. If no
  method is available, it only sets errno to ENOSYS and returns -1.
  Currently known to work on: macOS, Cygwin, Linux 5.2+, QNX 7.0+.
- On systems (if any) that have the mysterious _PC_PATH_ATTRIBUTES
  selector for pathconf(2), call astconf(3) and check for the 'c'
  attribute to determine case insensitivity. This should preserve
  compatibility with any such system.

src/lib/libast/port/astconf.c:
- dynamic[]: As case-insensitive globbing is now optional on all
  systems, do not set the 'c' attribute by default on _WINIX
  (Cygwin/Windows) systems.
- format(): On systems that do not have _PC_PATH_ATTRIBUTES, call
  pathicase(3) to determine the value for the "c" (case
  insensitive) attribute only. This is for compatibility as it is
  more efficient to call pathicase(3) directly.

src/lib/libast/misc/glob.c,
src/lib/libast/include/glob.h:
- Add new GLOB_DCASE public API flag to glob(3). This is like
  GLOB_ICASE (case-insensitive matching) except it only makes the
  match case-insensitive if the file system for the current
  pathname component is determined to be case-insensitive.
- gl_attr(): For efficiency, call pathicase(3) directly instead of
  via astconf(3).
- glob_dir(): Only call gl_attr() to determine file system case
  insensitivity if the GLOB_DCASE flag was passed. This makes case
  insensitive globbing optional on all systems.
- glob(): The options bitmask needs to be widened to fit the new
  GLOB_DCASE option. Define this centrally in a new GLOB_FLAGMASK
  macro so it is easy to change it along with GLOB_MAGIC (which
  uses the remaining bits for a sanity check bit pattern).

src/lib/libast/path/pathexists.c:
- For efficiency, call pathicase(3) directly instead of via
  astconf(3).

*** ksh changes ***

src/cmd/ksh93/features/options,
src/cmd/ksh93/SHOPT.sh:
- Add new SHOPT_GLOBCASEDET compile-time option. Set it to probe
  (empty) by default so that the shell option is compiled in on
  supported systems only, which is determined by new iffe feature
  test that checks if pathicase(3) returns an ENOSYS error.

src/cmd/ksh93/data/options.c,
src/cmd/ksh93/include/shell.h:
- Add -o globcasedetect shell option if compiling with
  SHOPT_GLOBCASEDET.

src/cmd/ksh93/sh/expand.c: path_expand():
- Pass the new GLOB_DCASE flag to glob(3) if the
  globcasedetect/SH_GLOBCASEDET shell option is set.

src/cmd/ksh93/edit/completion.c:
- While file listing/completion is based on globbing and
  automatically becomes case-insensitive when globbing does, it
  needs some additional handling to make a string comparison
  case-insensitive in corresponding cases. Otherwise, partial
  completions may be deleted from the command line upon pressing
  tab. This code was already in ksh 93u+ and just needs to be
  made conditional upon SHOPT_GLOBCASEDET and globcasedetect.
- For efficiency, call pathicase(3) directly instead of via
  astconf(3).

src/cmd/ksh93/sh.1:
- Document the new globcasedetect shell option.
2021-03-22 18:45:19 +00:00

383 lines
8.4 KiB
C

/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1982-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.eclipse.org/org/documents/epl-v10.html *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* File name expansion
*
* David Korn
* AT&T Labs
*
*/
#if KSHELL
# include "defs.h"
# include "variables.h"
# include "test.h"
#else
# include <ast.h>
# include <ctype.h>
# include <setjmp.h>
#endif /* KSHELL */
#include <glob.h>
#include <ls.h>
#include <stak.h>
#include <ast_dir.h>
#include "io.h"
#include "path.h"
#if KSHELL
# define argbegin argnxt.cp
static const char *sufstr;
static int suflen;
static int scantree(Dt_t*,const char*, struct argnod**);
#else
# define sh_sigcheck(sig) (0)
# define sh_access access
# define suflen 0
#endif /* KSHELL */
/*
* This routine builds a list of files that match a given pathname
* Uses external routine strgrpmatch() to match each component
* A leading . must match explicitly
*
*/
#ifndef GLOB_AUGMENTED
# define GLOB_AUGMENTED 0
#endif
static char *nextdir(glob_t *gp, char *dir)
{
Shell_t *shp = sh_getinterp();
Pathcomp_t *pp = (Pathcomp_t*)gp->gl_handle;
if(!dir)
pp = path_get(shp,"");
else
pp = pp->next;
gp->gl_handle = (void*)pp;
if(pp)
return(pp->name);
return(0);
}
int path_expand(Shell_t *shp,const char *pattern, struct argnod **arghead)
{
glob_t gdata;
register struct argnod *ap;
register glob_t *gp= &gdata;
register int flags,extra=0;
sh_stats(STAT_GLOBS);
memset(gp,0,sizeof(gdata));
flags = GLOB_GROUP|GLOB_AUGMENTED|GLOB_NOCHECK|GLOB_NOSORT|GLOB_STACK|GLOB_LIST|GLOB_DISC;
if(sh_isoption(SH_MARKDIRS))
flags |= GLOB_MARK;
if(sh_isoption(SH_GLOBSTARS))
flags |= GLOB_STARSTAR;
#if SHOPT_GLOBCASEDET
if(sh_isoption(SH_GLOBCASEDET))
flags |= GLOB_DCASE;
#endif
if(sh_isstate(SH_COMPLETE))
{
#if KSHELL
extra += scantree(shp->alias_tree,pattern,arghead);
extra += scantree(shp->fun_tree,pattern,arghead);
gp->gl_nextdir = nextdir;
#endif /* KSHELL */
flags |= GLOB_COMPLETE;
flags &= ~GLOB_NOCHECK;
}
gp->gl_fignore = nv_getval(sh_scoped(shp,FIGNORENOD));
if(suflen)
gp->gl_suffix = sufstr;
gp->gl_intr = &shp->trapnote;
suflen = 0;
if(memcmp(pattern,"~(N",3)==0)
flags &= ~GLOB_NOCHECK;
glob(pattern, flags, 0, gp);
sh_sigcheck(shp);
for(ap= (struct argnod*)gp->gl_list; ap; ap = ap->argnxt.ap)
{
ap->argchn.ap = ap->argnxt.ap;
if(!ap->argnxt.ap)
ap->argchn.ap = *arghead;
}
if(gp->gl_list)
*arghead = (struct argnod*)gp->gl_list;
return(gp->gl_pathc+extra);
}
#if KSHELL
/*
* scan tree and add each name that matches the given pattern
*/
static int scantree(Dt_t *tree, const char *pattern, struct argnod **arghead)
{
register Namval_t *np;
register struct argnod *ap;
register int nmatch=0;
register char *cp;
np = (Namval_t*)dtfirst(tree);
for(;np && !nv_isnull(np);(np = (Namval_t*)dtnext(tree,np)))
{
if(strmatch(cp=nv_name(np),pattern))
{
(void)stakseek(ARGVAL);
stakputs(cp);
ap = (struct argnod*)stakfreeze(1);
ap->argbegin = NIL(char*);
ap->argchn.ap = *arghead;
ap->argflag = ARG_RAW|ARG_MAKE;
*arghead = ap;
nmatch++;
}
}
return(nmatch);
}
/*
* file name completion
* generate the list of files found by adding an suffix to end of name
* The number of matches is returned
*/
int path_complete(Shell_t *shp,const char *name,register const char *suffix, struct argnod **arghead)
{
sufstr = suffix;
suflen = strlen(suffix);
return(path_expand(shp,name,arghead));
}
#endif
#if SHOPT_BRACEPAT
static int checkfmt(Sfio_t* sp, void* vp, Sffmt_t* fp)
{
return -1;
}
int path_generate(Shell_t *shp,struct argnod *todo, struct argnod **arghead)
/*@
assume todo!=0;
return count satisfying count>=1;
@*/
{
register char *cp;
register int brace;
register struct argnod *ap;
struct argnod *top = 0;
struct argnod *apin;
char *pat, *rescan;
char *format;
char comma, range=0;
int first, last, incr, count = 0;
char tmp[32], end[1];
todo->argchn.ap = 0;
again:
apin = ap = todo;
todo = ap->argchn.ap;
cp = ap->argval;
range = comma = brace = 0;
/* first search for {...,...} */
while(1) switch(*cp++)
{
case '{':
if(brace++==0)
pat = cp;
break;
case '}':
if(--brace>0)
break;
if(brace==0 && comma && *cp!='(')
goto endloop1;
comma = brace = 0;
break;
case '.':
if(brace==1 && *cp=='.')
{
char *endc;
incr = 1;
if(isdigit(*pat) || *pat=='+' || *pat=='-')
{
first = strtol(pat,&endc,0);
if(endc==(cp-1))
{
last = strtol(cp+1,&endc,0);
if(*endc=='.' && endc[1]=='.')
incr = strtol(endc+2,&endc,0);
else if(last<first)
incr = -1;
if(incr)
{
if(*endc=='%')
{
Sffmt_t fmt;
memset(&fmt, 0, sizeof(fmt));
fmt.version = SFIO_VERSION;
fmt.form = endc;
fmt.extf = checkfmt;
sfprintf(sfstdout, "%!", &fmt);
if(!(fmt.flags&(SFFMT_LLONG|SFFMT_LDOUBLE)))
switch (fmt.fmt)
{
case 'c':
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X':
format = endc;
endc = fmt.form;
break;
}
}
else
format = "%d";
if(*endc=='}')
{
cp = endc+1;
range = 2;
goto endloop1;
}
}
}
}
else if((cp[2]=='}' || cp[2]=='.' && cp[3]=='.') && ((*pat>='a' && *pat<='z' && cp[1]>='a' && cp[1]<='z') || (*pat>='A' && *pat<='Z' && cp[1]>='A' && cp[1]<='Z')))
{
first = *pat;
last = cp[1];
cp += 2;
if(*cp=='.')
{
incr = strtol(cp+2,&endc,0);
cp = endc;
}
else if(first>last)
incr = -1;
if(incr && *cp=='}')
{
cp++;
range = 1;
goto endloop1;
}
}
cp++;
}
break;
case ',':
if(brace==1)
comma = 1;
break;
case '\\':
cp++;
break;
case 0:
/* insert on stack */
ap->argchn.ap = top;
top = ap;
if(todo)
goto again;
for(; ap; ap=apin)
{
apin = ap->argchn.ap;
if(!sh_isoption(SH_NOGLOB))
brace=path_expand(shp,ap->argval,arghead);
else
{
ap->argchn.ap = *arghead;
*arghead = ap;
brace=1;
}
if(brace)
{
count += brace;
(*arghead)->argflag |= ARG_MAKE;
}
}
return(count);
}
endloop1:
rescan = cp;
cp = pat-1;
*cp = 0;
while(1)
{
brace = 0;
if(range)
{
if(range==1)
{
pat[0] = first;
cp = &pat[1];
}
else
{
*(rescan - 1) = 0;
sfsprintf(pat=tmp,sizeof(tmp),format,first);
*(rescan - 1) = '}';
*(cp = end) = 0;
}
if(incr*(first+incr) > last*incr)
*cp = '}';
else
first += incr;
}
/* generate each pattern and put on the todo list */
else while(1) switch(*++cp)
{
case '\\':
cp++;
break;
case '{':
brace++;
break;
case ',':
if(brace==0)
goto endloop2;
break;
case '}':
if(--brace<0)
goto endloop2;
}
endloop2:
brace = *cp;
*cp = 0;
sh_sigcheck(shp);
ap = (struct argnod*)stakseek(ARGVAL);
ap->argflag = ARG_RAW;
ap->argchn.ap = todo;
stakputs(apin->argval);
stakputs(pat);
stakputs(rescan);
todo = ap = (struct argnod*)stakfreeze(1);
if(brace == '}')
break;
if(!range)
pat = cp+1;
}
goto again;
}
#endif /* SHOPT_BRACEPAT */