1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-02-12 19:22:41 +00:00

Improve and document fast filescan loops (SHOPT_FILESCAN)

From README:

FILESCAN on  Experimental option that allows fast reading of files
             using while < file;do ...; done and allowing fields in
             each line to be accessed as positional parameters.

As SHOPT_FILESCAN has been enabled by default since ksh 93l
2001-06-01, the filescan loop is now documented in the manual page
and the compile-time option is no longer considered experimental.

We must disable this at runtime if --posix is active because it
breaks a portable use case: POSIXly, 'while <file; do stuff; done'
repeatedly excutes 'stuff' while 'file' can successfully be opened
for reading, without actually reading from 'file'.

This also backports a bugfix from the 93v- beta. Reproducer:

$ echo 'one two three' >foo
$ while <foo; do printf '[%s] ' "$@"; echo; done
[one two three]

Expected output:
[one] [two] [three]

The bug is that "$@" acts like "$*", joining all the positional
parameters into one word though it should be generating one word
for each.

src/cmd/ksh93/sh/macro.c: varsub():
- Backport fix for the bug described above. I do not understand the
  opaque macro.c code well enough yet to usefully describe the fix.

src/cmd/ksh93/sh/xec.c: sh_exec():
- Improved sanity check for filescan loop: do not recognise it if
  the simple command includes variable assignments, more than one
  redirection, or an output or append redirection.
- Disable filescan loops if --posix is active.
- Another 93v- fix: handle interrupts (errno==EINTR) when closing
  the input file.
This commit is contained in:
Martijn Dekker 2022-02-15 17:54:45 +00:00
parent 82ff91e9d9
commit 95d695cb5a
9 changed files with 102 additions and 15 deletions

19
NEWS
View file

@ -3,6 +3,25 @@ For full details, see the git log at: https://github.com/ksh93/ksh/tree/1.0
Any uppercase BUG_* names are modernish shell bug IDs.
2022-02-15:
- A bug was fixed in fast filescan loops (like 'while <file; do ...; done',
enabled by default by the SHOPT_FILESCAN compile-time option) in which
"$@" incorrectly acted like "$*" within the do...done block.
- Improved sanity check for filescan loops: they are now only recognised if
'while' is followed by a single input redirection without any command
name/arguments or variable assignments.
- As SHOPT_FILESCAN has been enabled by default since ksh 93l 2001-06-01,
the fast filescan loop is now documented in the manual page and the
compile-time option is no longer considered experimental.
- Filescan loops are now disabled at runtime if the --posix option is active
as they break a portable use case: POSIXly, 'while <file; do stuff; done'
repeatedly excutes 'stuff' while 'file' can successfully be opened for
reading, without actually reading from 'file'.
2022-02-12:
- In multibyte locales such as UTF-8, shell input is no longer corrupted when

View file

@ -74,9 +74,10 @@ The options have the following defaults and meanings:
ESH on Compile with emacs command line editing. The original
emacs line editor code was provided by Mike Veach at IH.
FILESCAN on Experimental option that allows fast reading of files
using while < file;do ...; done and allowing fields in
each line to be accessed as positional parameters.
FILESCAN on Allows fast reading of files using:
while < file; do ...; done
Each line is stored in $REPLY and fields in each
line can be accessed as positional parameters.
FIXEDARRAY on When using typeset, a name in the format NAME[N]
creates a fixed-size array and any attempt to access a

View file

@ -19,7 +19,7 @@ SHOPT DYNAMIC=1 # dynamic loading for builtins
SHOPT ECHOPRINT= # make echo equivalent to print
SHOPT EDPREDICT=0 # History pattern search menu (type #<pattern>, then ESC <number> TAB). Experimental.
SHOPT ESH=1 # emacs/gmacs edit mode
SHOPT FILESCAN=1 # fast file scan
SHOPT FILESCAN=1 # fast file scan: while <file; do [use $REPLY or positional parameters]; done
SHOPT FIXEDARRAY=1 # fixed dimension indexed array
SHOPT GLOBCASEDET= # -o globcasedetect: adapt globbing/completion to case-insensitive file systems
SHOPT HISTEXPAND=1 # csh-style history file expansions

View file

@ -357,7 +357,6 @@ struct Shell_s
Shinit_f userinit;
Shbltin_f bltinfun;
Shbltin_t bltindata;
char *cur_line;
int offsets[10];
Sfio_t **sftable;
unsigned char *fdstatus;
@ -384,6 +383,9 @@ struct Shell_s
char exittrap;
char errtrap;
char end_fn;
#if SHOPT_FILESCAN
char *cur_line;
#endif
#if !SHOPT_DEVFD
char *fifo; /* FIFO name for current process substitution */
Dt_t *fifo_tree; /* for cleaning up process substitution FIFOs */

View file

@ -21,7 +21,7 @@
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
#define SH_RELEASE_SVER "1.0.0-beta.2" /* semantic version number: https://semver.org */
#define SH_RELEASE_DATE "2022-02-12" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_DATE "2022-02-15" /* must be in this format for $((.sh.version)) */
#define SH_RELEASE_CPYR "(c) 2020-2022 Contributors to ksh " SH_RELEASE_FORK
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */

View file

@ -497,6 +497,44 @@ may be used in place of
to negate
the loop termination test.
.TP
.PD 0
\f3while\fP \f2inputredirection\^\fP \f3;do\fP \f2list\^\fP \f3;done\fP
.PD
Filescan loop. This is defined by a lone input redirection following
.B while
(see
.I Input/Output
below).
It is faster than using the
.B read
built-in command in a regular
.B while
loop.
The shell reads lines from the file or stream opened by
.I inputredirection\^
until the end is reached or the loop is broken.
For each line read,
the command
.I list\^
is executed with the line's contents assigned to the
.B REPLY
variable and the line's fields split into the positional parameters
(see
.I Field Splitting\^
and
.I Positional Parameters\^
below).
Within the
.IR list\^ ,
standard input is redirected to
.BR /dev/null .
If the
.B posix
compatibility shell option is on,
this loop type is disabled and
.I inputredirection\^
is processed like a lone redirection in any other context.
.TP
\f3((\fP\f2expression\^\fP\f3))\fP
.br
The
@ -7621,6 +7659,9 @@ causes file descriptors > 2 to be left open when invoking another program;
.IP \[bu]
disables the \fB&>\fR redirection shorthand;
.IP \[bu]
disables fast filescan loops of type
\f3while\fP \f2inputredirection\^\fP \f3;do\fP \f2list\^\fP \f3;done\fP;
.IP \[bu]
makes the \fB<>\fR redirection operator default to redirecting standard input
if no file descriptor number precedes it;
.IP \[bu]

View file

@ -1156,8 +1156,13 @@ retry1:
#if SHOPT_FILESCAN
if(sh.cur_line)
{
v = getdolarg(1,(int*)0);
dolmax = MAX_ARGN;
v = getdolarg(1,&vsize);
if(c=='*' || !mp->quoted)
{
dolmax = 1;
vsize = -1;
}
}
else
#endif /* SHOPT_FILESCAN */
@ -1914,8 +1919,6 @@ retry2:
#if SHOPT_FILESCAN
if(sh.cur_line)
{
if(dolmax==MAX_ARGN && isastchar(mode))
break;
if(!(v=getdolarg(dolg,&vsize)))
{
dolmax = dolg;

View file

@ -835,13 +835,14 @@ static void unset_instance(Namval_t *nq, Namval_t *node, struct Namref *nr,long
#if SHOPT_FILESCAN
static Sfio_t *openstream(struct ionod *iop, int *save)
{
int savein, fd = sh_redirect(iop,3);
int err = errno, savein, fd = sh_redirect(iop,3);
Sfio_t *sp;
savein = dup(0);
if(fd==0)
fd = savein;
sp = sfnew(NULL,NULL,SF_UNBOUND,fd,SF_READ);
close(0);
while(close(0)<0 && errno==EINTR)
errno = err;
open(e_devnull,O_RDONLY);
sh.offsets[0] = -1;
sh.offsets[1] = 0;
@ -2242,11 +2243,17 @@ int sh_exec(register const Shnode_t *t, int flags)
goto endwhile;
#endif /* SHOPT_OPTIMIZE */
#if SHOPT_FILESCAN
if(type==TWH && tt->tre.tretyp==TCOM && !tt->com.comarg && tt->com.comio)
/* Recognize filescan loop for a lone input redirection following 'while' */
if(type==TWH /* 'while' (not 'until') */
&& tt->tre.tretyp==TCOM /* one simple command follows 'while'... */
&& !tt->com.comarg /* ...with no command name or arguments... */
&& !tt->com.comset /* ...and no variable assignments list... */
&& tt->com.comio /* ...and one I/O redirection... */
&& !tt->com.comio->ionxt /* ...but not more than one... */
&& !(tt->com.comio->iofile & (IOPUT|IOAPP)) /* ...and not > or >> */
&& !sh_isoption(SH_POSIX)) /* not in POSIX compilance mode */
{
iop = openstream(tt->com.comio,&savein);
if(tt->com.comset)
nv_setlist(tt->com.comset,NV_IDENT|NV_ASSIGN,0);
}
#endif /* SHOPT_FILESCAN */
sh.st.loopcnt++;
@ -2291,8 +2298,10 @@ int sh_exec(register const Shnode_t *t, int flags)
#if SHOPT_FILESCAN
if(iop)
{
int err=errno;
sfclose(iop);
close(0);
while(close(0)<0 && errno==EINTR)
errno = err;
dup(savein);
sh.cur_line = 0;
}

View file

@ -961,5 +961,17 @@ got=$(
[[ $got == 'test' ]] || err_exit "File descriptor is unexpectedly closed after exec in shared-state command substitution" \
"(expected 'test', got $(printf %q "$got"))"
# ======
# Test positional parameters in filescan loop
# In 93u+, "$@" wrongly acted like "$*"; fix was backported from 93v- beta
if ((SHOPT_FILESCAN))
then
echo 'one/two/three' >foo
got=$(IFS=/; while <foo; do printf '[%s] ' "$REPLY" "$#" "$1" "$2" "$3" "$*" "$@" $* $@; done)
exp='[one/two/three] [3] [one] [two] [three] [one/two/three] [one] [two] [three] [one] [two] [three] [one] [two] [three] '
[[ $got == "$exp" ]] || err_exit '$REPLY or positional parameters incorrect in filescan loop' \
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
fi
# ======
exit $((Errors<125?Errors:125))