1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-15 04:32:24 +00:00

posix mode: disable effect of repeating whitespace char in $IFS

ksh has a little-known field splitting feature that conflicts with
POSIX: if a single-byte whitespace character (cf. isspace(3)) is
repated in $IFS, then field splitting is done as if that character
wasn't a whitespace character. An exmaple with the tab character:

  $ (IFS=$'\t'; val=$'\tone\t\ttwo\t'; set -- $val; echo $#)
  2
  $ (IFS=$'\t\t'; val=$'\tone\t\ttwo\t'; set -- $val; echo $#)
  4
The latter being the same as, for example
  $ (IFS=':'; val='1️⃣2️⃣'; set -- $val; echo $#)
  4

However, this is incompatible with the POSIX spec and with every
other shell except zsh, in which repeating a character in IFS does
not have any effect. So the POSIX mode must disable this.

src/cmd/ksh93/include/defs.h,
src/cmd/ksh93/sh/init.c:
- Add sh_invalidate_ifs() function that invalidates the IFS state
  table by setting the ifsnp discipline struct member to NULL,
  which will cause the next get_ifs() call to regenerate it.
- get_ifs(): Treat a repeated char as S_DELIM even if whitespace,
  unless --posix is on.

src/cmd/ksh93/sh/args.c:
- sh_argopts(): Call sh_invalidate_ifs() when enabling or disabling
  the POSIX option. This is needed to make the change in field
  splitting behaviour take immediate effect instead of taking
  effect at the next assignment to IFS.
This commit is contained in:
Martijn Dekker 2022-03-08 20:20:57 +01:00
parent fae1932e62
commit 9e2a8c6925
5 changed files with 33 additions and 5 deletions

3
NEWS
View file

@ -7,6 +7,9 @@ Any uppercase BUG_* names are modernish shell bug IDs.
- The 'enum' command can now create more than one type per invocation.
- The POSIX compatibility mode has been amended to disable the special handling
of a repeated $IFS whitespace character as non-whitespace.
2022-02-23:
- When reading input from the keyboard, ksh now turns off nonblocking I/O

View file

@ -127,6 +127,7 @@ extern Sfdouble_t sh_arith(const char*);
extern void *sh_arithcomp(char*);
extern pid_t sh_fork(int,int*);
extern pid_t _sh_fork(pid_t, int ,int*);
extern void sh_invalidate_ifs(void);
extern char *sh_mactrim(char*,int);
extern int sh_macexpand(struct argnod*,struct argnod**,int);
extern int sh_macfun(const char*,int);

View file

@ -2033,7 +2033,7 @@ Each single occurrence of
an
.SM
.B IFS
character in the string to be split,
character in the string to be split
that is not in the \f2isspace\^\fP character class, and any
adjacent characters in
.SM
@ -2043,13 +2043,16 @@ One or more
characters in
.SM
.B IFS
that belong to the \f2isspace\^\fP character class,
that belong to the \f2isspace\^\fP character class
delimit a field.
In addition, if the same \f2isspace\^\fP character appears
consecutively inside
.BR IFS ,
.B IFS
and the
.B posix
shell option is not on,
this character is treated as if it were not in the \f2isspace\^\fP
class, so that if
class - for exmaple, if
.BR IFS
consists of two
.B tab
@ -7685,6 +7688,12 @@ In addition, while on, the \fBposix\fR option
disables exporting variable type attributes to the environment for other ksh
processes to import;
.IP \[bu]
disables the special handling of repeated
.I isspace
class characters in the
.B IFS
variable;
.IP \[bu]
causes file descriptors > 2 to be left open when invoking another program;
.IP \[bu]
disables the \fB&>\fR redirection shorthand;

View file

@ -245,6 +245,7 @@ int sh_argopts(int argc,register char *argv[])
off_option(&newflags,SH_BRACEEXPAND);
#endif
on_option(&newflags,SH_LETOCTAL);
sh_invalidate_ifs();
}
on_option(&newflags,o);
off_option(&sh.offoptions,o);
@ -262,6 +263,7 @@ int sh_argopts(int argc,register char *argv[])
on_option(&newflags,SH_BRACEEXPAND);
#endif
off_option(&newflags,SH_LETOCTAL);
sh_invalidate_ifs();
}
if(o==SH_XTRACE)
trace = 0;

View file

@ -549,6 +549,18 @@ static void put_ifs(register Namval_t* np,const char *val,int flags,Namfun_t *fp
}
}
/* Invalidate IFS state table */
void sh_invalidate_ifs(void)
{
Namval_t *np = sh_scoped(IFSNOD);
if(np)
{
struct ifs *ip = (struct ifs*)np->nvfun;
if(ip)
ip->ifsnp = 0;
}
}
/*
* This is the lookup function for IFS
* It keeps the sh.ifstable up to date
@ -574,7 +586,8 @@ static char* get_ifs(register Namval_t* np, Namfun_t *fp)
continue;
}
n = S_DELIM;
if(c== *cp)
/* Treat a repeated char as S_DELIM even if whitespace, unless --posix is on */
if(c==*cp && !sh_isoption(SH_POSIX))
cp++;
else if(c=='\n')
n = S_NL;