mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
Improve and document fast filescan loops (SHOPT_FILESCAN)
From README: FILESCAN on Experimental option that allows fast reading of files using while < file;do ...; done and allowing fields in each line to be accessed as positional parameters. As SHOPT_FILESCAN has been enabled by default since ksh 93l 2001-06-01, the filescan loop is now documented in the manual page and the compile-time option is no longer considered experimental. We must disable this at runtime if --posix is active because it breaks a portable use case: POSIXly, 'while <file; do stuff; done' repeatedly excutes 'stuff' while 'file' can successfully be opened for reading, without actually reading from 'file'. This also backports a bugfix from the 93v- beta. Reproducer: $ echo 'one two three' >foo $ while <foo; do printf '[%s] ' "$@"; echo; done [one two three] Expected output: [one] [two] [three] The bug is that "$@" acts like "$*", joining all the positional parameters into one word though it should be generating one word for each. src/cmd/ksh93/sh/macro.c: varsub(): - Backport fix for the bug described above. I do not understand the opaque macro.c code well enough yet to usefully describe the fix. src/cmd/ksh93/sh/xec.c: sh_exec(): - Improved sanity check for filescan loop: do not recognise it if the simple command includes variable assignments, more than one redirection, or an output or append redirection. - Disable filescan loops if --posix is active. - Another 93v- fix: handle interrupts (errno==EINTR) when closing the input file.
This commit is contained in:
parent
82ff91e9d9
commit
95d695cb5a
9 changed files with 102 additions and 15 deletions
19
NEWS
19
NEWS
|
@ -3,6 +3,25 @@ For full details, see the git log at: https://github.com/ksh93/ksh/tree/1.0
|
||||||
|
|
||||||
Any uppercase BUG_* names are modernish shell bug IDs.
|
Any uppercase BUG_* names are modernish shell bug IDs.
|
||||||
|
|
||||||
|
2022-02-15:
|
||||||
|
|
||||||
|
- A bug was fixed in fast filescan loops (like 'while <file; do ...; done',
|
||||||
|
enabled by default by the SHOPT_FILESCAN compile-time option) in which
|
||||||
|
"$@" incorrectly acted like "$*" within the do...done block.
|
||||||
|
|
||||||
|
- Improved sanity check for filescan loops: they are now only recognised if
|
||||||
|
'while' is followed by a single input redirection without any command
|
||||||
|
name/arguments or variable assignments.
|
||||||
|
|
||||||
|
- As SHOPT_FILESCAN has been enabled by default since ksh 93l 2001-06-01,
|
||||||
|
the fast filescan loop is now documented in the manual page and the
|
||||||
|
compile-time option is no longer considered experimental.
|
||||||
|
|
||||||
|
- Filescan loops are now disabled at runtime if the --posix option is active
|
||||||
|
as they break a portable use case: POSIXly, 'while <file; do stuff; done'
|
||||||
|
repeatedly excutes 'stuff' while 'file' can successfully be opened for
|
||||||
|
reading, without actually reading from 'file'.
|
||||||
|
|
||||||
2022-02-12:
|
2022-02-12:
|
||||||
|
|
||||||
- In multibyte locales such as UTF-8, shell input is no longer corrupted when
|
- In multibyte locales such as UTF-8, shell input is no longer corrupted when
|
||||||
|
|
|
@ -74,9 +74,10 @@ The options have the following defaults and meanings:
|
||||||
ESH on Compile with emacs command line editing. The original
|
ESH on Compile with emacs command line editing. The original
|
||||||
emacs line editor code was provided by Mike Veach at IH.
|
emacs line editor code was provided by Mike Veach at IH.
|
||||||
|
|
||||||
FILESCAN on Experimental option that allows fast reading of files
|
FILESCAN on Allows fast reading of files using:
|
||||||
using while < file;do ...; done and allowing fields in
|
while < file; do ...; done
|
||||||
each line to be accessed as positional parameters.
|
Each line is stored in $REPLY and fields in each
|
||||||
|
line can be accessed as positional parameters.
|
||||||
|
|
||||||
FIXEDARRAY on When using typeset, a name in the format NAME[N]
|
FIXEDARRAY on When using typeset, a name in the format NAME[N]
|
||||||
creates a fixed-size array and any attempt to access a
|
creates a fixed-size array and any attempt to access a
|
||||||
|
|
|
@ -19,7 +19,7 @@ SHOPT DYNAMIC=1 # dynamic loading for builtins
|
||||||
SHOPT ECHOPRINT= # make echo equivalent to print
|
SHOPT ECHOPRINT= # make echo equivalent to print
|
||||||
SHOPT EDPREDICT=0 # History pattern search menu (type #<pattern>, then ESC <number> TAB). Experimental.
|
SHOPT EDPREDICT=0 # History pattern search menu (type #<pattern>, then ESC <number> TAB). Experimental.
|
||||||
SHOPT ESH=1 # emacs/gmacs edit mode
|
SHOPT ESH=1 # emacs/gmacs edit mode
|
||||||
SHOPT FILESCAN=1 # fast file scan
|
SHOPT FILESCAN=1 # fast file scan: while <file; do [use $REPLY or positional parameters]; done
|
||||||
SHOPT FIXEDARRAY=1 # fixed dimension indexed array
|
SHOPT FIXEDARRAY=1 # fixed dimension indexed array
|
||||||
SHOPT GLOBCASEDET= # -o globcasedetect: adapt globbing/completion to case-insensitive file systems
|
SHOPT GLOBCASEDET= # -o globcasedetect: adapt globbing/completion to case-insensitive file systems
|
||||||
SHOPT HISTEXPAND=1 # csh-style history file expansions
|
SHOPT HISTEXPAND=1 # csh-style history file expansions
|
||||||
|
|
|
@ -357,7 +357,6 @@ struct Shell_s
|
||||||
Shinit_f userinit;
|
Shinit_f userinit;
|
||||||
Shbltin_f bltinfun;
|
Shbltin_f bltinfun;
|
||||||
Shbltin_t bltindata;
|
Shbltin_t bltindata;
|
||||||
char *cur_line;
|
|
||||||
int offsets[10];
|
int offsets[10];
|
||||||
Sfio_t **sftable;
|
Sfio_t **sftable;
|
||||||
unsigned char *fdstatus;
|
unsigned char *fdstatus;
|
||||||
|
@ -384,6 +383,9 @@ struct Shell_s
|
||||||
char exittrap;
|
char exittrap;
|
||||||
char errtrap;
|
char errtrap;
|
||||||
char end_fn;
|
char end_fn;
|
||||||
|
#if SHOPT_FILESCAN
|
||||||
|
char *cur_line;
|
||||||
|
#endif
|
||||||
#if !SHOPT_DEVFD
|
#if !SHOPT_DEVFD
|
||||||
char *fifo; /* FIFO name for current process substitution */
|
char *fifo; /* FIFO name for current process substitution */
|
||||||
Dt_t *fifo_tree; /* for cleaning up process substitution FIFOs */
|
Dt_t *fifo_tree; /* for cleaning up process substitution FIFOs */
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
|
|
||||||
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
|
#define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */
|
||||||
#define SH_RELEASE_SVER "1.0.0-beta.2" /* semantic version number: https://semver.org */
|
#define SH_RELEASE_SVER "1.0.0-beta.2" /* semantic version number: https://semver.org */
|
||||||
#define SH_RELEASE_DATE "2022-02-12" /* must be in this format for $((.sh.version)) */
|
#define SH_RELEASE_DATE "2022-02-15" /* must be in this format for $((.sh.version)) */
|
||||||
#define SH_RELEASE_CPYR "(c) 2020-2022 Contributors to ksh " SH_RELEASE_FORK
|
#define SH_RELEASE_CPYR "(c) 2020-2022 Contributors to ksh " SH_RELEASE_FORK
|
||||||
|
|
||||||
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */
|
/* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */
|
||||||
|
|
|
@ -497,6 +497,44 @@ may be used in place of
|
||||||
to negate
|
to negate
|
||||||
the loop termination test.
|
the loop termination test.
|
||||||
.TP
|
.TP
|
||||||
|
.PD 0
|
||||||
|
\f3while\fP \f2inputredirection\^\fP \f3;do\fP \f2list\^\fP \f3;done\fP
|
||||||
|
.PD
|
||||||
|
Filescan loop. This is defined by a lone input redirection following
|
||||||
|
.B while
|
||||||
|
(see
|
||||||
|
.I Input/Output
|
||||||
|
below).
|
||||||
|
It is faster than using the
|
||||||
|
.B read
|
||||||
|
built-in command in a regular
|
||||||
|
.B while
|
||||||
|
loop.
|
||||||
|
The shell reads lines from the file or stream opened by
|
||||||
|
.I inputredirection\^
|
||||||
|
until the end is reached or the loop is broken.
|
||||||
|
For each line read,
|
||||||
|
the command
|
||||||
|
.I list\^
|
||||||
|
is executed with the line's contents assigned to the
|
||||||
|
.B REPLY
|
||||||
|
variable and the line's fields split into the positional parameters
|
||||||
|
(see
|
||||||
|
.I Field Splitting\^
|
||||||
|
and
|
||||||
|
.I Positional Parameters\^
|
||||||
|
below).
|
||||||
|
Within the
|
||||||
|
.IR list\^ ,
|
||||||
|
standard input is redirected to
|
||||||
|
.BR /dev/null .
|
||||||
|
If the
|
||||||
|
.B posix
|
||||||
|
compatibility shell option is on,
|
||||||
|
this loop type is disabled and
|
||||||
|
.I inputredirection\^
|
||||||
|
is processed like a lone redirection in any other context.
|
||||||
|
.TP
|
||||||
\f3((\fP\f2expression\^\fP\f3))\fP
|
\f3((\fP\f2expression\^\fP\f3))\fP
|
||||||
.br
|
.br
|
||||||
The
|
The
|
||||||
|
@ -7621,6 +7659,9 @@ causes file descriptors > 2 to be left open when invoking another program;
|
||||||
.IP \[bu]
|
.IP \[bu]
|
||||||
disables the \fB&>\fR redirection shorthand;
|
disables the \fB&>\fR redirection shorthand;
|
||||||
.IP \[bu]
|
.IP \[bu]
|
||||||
|
disables fast filescan loops of type
|
||||||
|
\f3while\fP \f2inputredirection\^\fP \f3;do\fP \f2list\^\fP \f3;done\fP;
|
||||||
|
.IP \[bu]
|
||||||
makes the \fB<>\fR redirection operator default to redirecting standard input
|
makes the \fB<>\fR redirection operator default to redirecting standard input
|
||||||
if no file descriptor number precedes it;
|
if no file descriptor number precedes it;
|
||||||
.IP \[bu]
|
.IP \[bu]
|
||||||
|
|
|
@ -1156,8 +1156,13 @@ retry1:
|
||||||
#if SHOPT_FILESCAN
|
#if SHOPT_FILESCAN
|
||||||
if(sh.cur_line)
|
if(sh.cur_line)
|
||||||
{
|
{
|
||||||
v = getdolarg(1,(int*)0);
|
|
||||||
dolmax = MAX_ARGN;
|
dolmax = MAX_ARGN;
|
||||||
|
v = getdolarg(1,&vsize);
|
||||||
|
if(c=='*' || !mp->quoted)
|
||||||
|
{
|
||||||
|
dolmax = 1;
|
||||||
|
vsize = -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* SHOPT_FILESCAN */
|
#endif /* SHOPT_FILESCAN */
|
||||||
|
@ -1914,8 +1919,6 @@ retry2:
|
||||||
#if SHOPT_FILESCAN
|
#if SHOPT_FILESCAN
|
||||||
if(sh.cur_line)
|
if(sh.cur_line)
|
||||||
{
|
{
|
||||||
if(dolmax==MAX_ARGN && isastchar(mode))
|
|
||||||
break;
|
|
||||||
if(!(v=getdolarg(dolg,&vsize)))
|
if(!(v=getdolarg(dolg,&vsize)))
|
||||||
{
|
{
|
||||||
dolmax = dolg;
|
dolmax = dolg;
|
||||||
|
|
|
@ -835,13 +835,14 @@ static void unset_instance(Namval_t *nq, Namval_t *node, struct Namref *nr,long
|
||||||
#if SHOPT_FILESCAN
|
#if SHOPT_FILESCAN
|
||||||
static Sfio_t *openstream(struct ionod *iop, int *save)
|
static Sfio_t *openstream(struct ionod *iop, int *save)
|
||||||
{
|
{
|
||||||
int savein, fd = sh_redirect(iop,3);
|
int err = errno, savein, fd = sh_redirect(iop,3);
|
||||||
Sfio_t *sp;
|
Sfio_t *sp;
|
||||||
savein = dup(0);
|
savein = dup(0);
|
||||||
if(fd==0)
|
if(fd==0)
|
||||||
fd = savein;
|
fd = savein;
|
||||||
sp = sfnew(NULL,NULL,SF_UNBOUND,fd,SF_READ);
|
sp = sfnew(NULL,NULL,SF_UNBOUND,fd,SF_READ);
|
||||||
close(0);
|
while(close(0)<0 && errno==EINTR)
|
||||||
|
errno = err;
|
||||||
open(e_devnull,O_RDONLY);
|
open(e_devnull,O_RDONLY);
|
||||||
sh.offsets[0] = -1;
|
sh.offsets[0] = -1;
|
||||||
sh.offsets[1] = 0;
|
sh.offsets[1] = 0;
|
||||||
|
@ -2242,11 +2243,17 @@ int sh_exec(register const Shnode_t *t, int flags)
|
||||||
goto endwhile;
|
goto endwhile;
|
||||||
#endif /* SHOPT_OPTIMIZE */
|
#endif /* SHOPT_OPTIMIZE */
|
||||||
#if SHOPT_FILESCAN
|
#if SHOPT_FILESCAN
|
||||||
if(type==TWH && tt->tre.tretyp==TCOM && !tt->com.comarg && tt->com.comio)
|
/* Recognize filescan loop for a lone input redirection following 'while' */
|
||||||
|
if(type==TWH /* 'while' (not 'until') */
|
||||||
|
&& tt->tre.tretyp==TCOM /* one simple command follows 'while'... */
|
||||||
|
&& !tt->com.comarg /* ...with no command name or arguments... */
|
||||||
|
&& !tt->com.comset /* ...and no variable assignments list... */
|
||||||
|
&& tt->com.comio /* ...and one I/O redirection... */
|
||||||
|
&& !tt->com.comio->ionxt /* ...but not more than one... */
|
||||||
|
&& !(tt->com.comio->iofile & (IOPUT|IOAPP)) /* ...and not > or >> */
|
||||||
|
&& !sh_isoption(SH_POSIX)) /* not in POSIX compilance mode */
|
||||||
{
|
{
|
||||||
iop = openstream(tt->com.comio,&savein);
|
iop = openstream(tt->com.comio,&savein);
|
||||||
if(tt->com.comset)
|
|
||||||
nv_setlist(tt->com.comset,NV_IDENT|NV_ASSIGN,0);
|
|
||||||
}
|
}
|
||||||
#endif /* SHOPT_FILESCAN */
|
#endif /* SHOPT_FILESCAN */
|
||||||
sh.st.loopcnt++;
|
sh.st.loopcnt++;
|
||||||
|
@ -2291,8 +2298,10 @@ int sh_exec(register const Shnode_t *t, int flags)
|
||||||
#if SHOPT_FILESCAN
|
#if SHOPT_FILESCAN
|
||||||
if(iop)
|
if(iop)
|
||||||
{
|
{
|
||||||
|
int err=errno;
|
||||||
sfclose(iop);
|
sfclose(iop);
|
||||||
close(0);
|
while(close(0)<0 && errno==EINTR)
|
||||||
|
errno = err;
|
||||||
dup(savein);
|
dup(savein);
|
||||||
sh.cur_line = 0;
|
sh.cur_line = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -961,5 +961,17 @@ got=$(
|
||||||
[[ $got == 'test' ]] || err_exit "File descriptor is unexpectedly closed after exec in shared-state command substitution" \
|
[[ $got == 'test' ]] || err_exit "File descriptor is unexpectedly closed after exec in shared-state command substitution" \
|
||||||
"(expected 'test', got $(printf %q "$got"))"
|
"(expected 'test', got $(printf %q "$got"))"
|
||||||
|
|
||||||
|
# ======
|
||||||
|
# Test positional parameters in filescan loop
|
||||||
|
# In 93u+, "$@" wrongly acted like "$*"; fix was backported from 93v- beta
|
||||||
|
if ((SHOPT_FILESCAN))
|
||||||
|
then
|
||||||
|
echo 'one/two/three' >foo
|
||||||
|
got=$(IFS=/; while <foo; do printf '[%s] ' "$REPLY" "$#" "$1" "$2" "$3" "$*" "$@" $* $@; done)
|
||||||
|
exp='[one/two/three] [3] [one] [two] [three] [one/two/three] [one] [two] [three] [one] [two] [three] [one] [two] [three] '
|
||||||
|
[[ $got == "$exp" ]] || err_exit '$REPLY or positional parameters incorrect in filescan loop' \
|
||||||
|
"(expected $(printf %q "$exp"), got $(printf %q "$got"))"
|
||||||
|
fi
|
||||||
|
|
||||||
# ======
|
# ======
|
||||||
exit $((Errors<125?Errors:125))
|
exit $((Errors<125?Errors:125))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue