From 7f4c113e57594f4e8c0cea60b6a8ab82f50f62aa Mon Sep 17 00:00:00 2001 From: winlin Date: Sun, 12 Oct 2014 20:57:59 +0800 Subject: [PATCH] add st-1.9 to research --- trunk/research/st-1.9/Makefile | 469 +++ trunk/research/st-1.9/README | 394 +++ trunk/research/st-1.9/common.h | 466 +++ trunk/research/st-1.9/docs/fig.gif | Bin 0 -> 5374 bytes trunk/research/st-1.9/docs/notes.html | 434 +++ trunk/research/st-1.9/docs/reference.html | 3120 +++++++++++++++++ trunk/research/st-1.9/docs/st.html | 504 +++ trunk/research/st-1.9/docs/timeout_heap.txt | 60 + trunk/research/st-1.9/event.c | 1449 ++++++++ trunk/research/st-1.9/examples/Makefile | 115 + trunk/research/st-1.9/examples/README | 98 + trunk/research/st-1.9/examples/error.c | 168 + trunk/research/st-1.9/examples/lookupdns.c | 103 + trunk/research/st-1.9/examples/proxy.c | 541 +++ trunk/research/st-1.9/examples/res.c | 305 ++ trunk/research/st-1.9/examples/server.c | 1025 ++++++ trunk/research/st-1.9/extensions/Makefile | 91 + trunk/research/st-1.9/extensions/README | 42 + trunk/research/st-1.9/extensions/common.h | 77 + trunk/research/st-1.9/extensions/dnscache.c | 190 + trunk/research/st-1.9/extensions/dnsres.c | 305 ++ trunk/research/st-1.9/extensions/lrucache.c | 343 ++ .../st-1.9/extensions/print_stk.patch | 367 ++ trunk/research/st-1.9/extensions/stx.h | 91 + trunk/research/st-1.9/extensions/stx_fileio.c | 197 ++ trunk/research/st-1.9/extensions/stx_fileio.h | 52 + trunk/research/st-1.9/extensions/testdns.c | 112 + trunk/research/st-1.9/io.c | 778 ++++ trunk/research/st-1.9/key.c | 121 + trunk/research/st-1.9/libst.def | 51 + trunk/research/st-1.9/md.S | 431 +++ trunk/research/st-1.9/md.h | 627 ++++ trunk/research/st-1.9/osguess.sh | 45 + trunk/research/st-1.9/public.h | 184 + trunk/research/st-1.9/sched.c | 672 ++++ trunk/research/st-1.9/st.pc.in | 10 + trunk/research/st-1.9/st.spec | 79 + trunk/research/st-1.9/stk.c | 173 + trunk/research/st-1.9/sync.c | 369 ++ 39 files changed, 14658 insertions(+) create mode 100644 trunk/research/st-1.9/Makefile create mode 100644 trunk/research/st-1.9/README create mode 100644 trunk/research/st-1.9/common.h create mode 100644 trunk/research/st-1.9/docs/fig.gif create mode 100644 trunk/research/st-1.9/docs/notes.html create mode 100644 trunk/research/st-1.9/docs/reference.html create mode 100644 trunk/research/st-1.9/docs/st.html create mode 100644 trunk/research/st-1.9/docs/timeout_heap.txt create mode 100644 trunk/research/st-1.9/event.c create mode 100644 trunk/research/st-1.9/examples/Makefile create mode 100644 trunk/research/st-1.9/examples/README create mode 100644 trunk/research/st-1.9/examples/error.c create mode 100644 trunk/research/st-1.9/examples/lookupdns.c create mode 100644 trunk/research/st-1.9/examples/proxy.c create mode 100644 trunk/research/st-1.9/examples/res.c create mode 100644 trunk/research/st-1.9/examples/server.c create mode 100644 trunk/research/st-1.9/extensions/Makefile create mode 100644 trunk/research/st-1.9/extensions/README create mode 100644 trunk/research/st-1.9/extensions/common.h create mode 100644 trunk/research/st-1.9/extensions/dnscache.c create mode 100644 trunk/research/st-1.9/extensions/dnsres.c create mode 100644 trunk/research/st-1.9/extensions/lrucache.c create mode 100644 trunk/research/st-1.9/extensions/print_stk.patch create mode 100644 trunk/research/st-1.9/extensions/stx.h create mode 100644 trunk/research/st-1.9/extensions/stx_fileio.c create mode 100644 trunk/research/st-1.9/extensions/stx_fileio.h create mode 100644 trunk/research/st-1.9/extensions/testdns.c create mode 100644 trunk/research/st-1.9/io.c create mode 100644 trunk/research/st-1.9/key.c create mode 100644 trunk/research/st-1.9/libst.def create mode 100644 trunk/research/st-1.9/md.S create mode 100644 trunk/research/st-1.9/md.h create mode 100644 trunk/research/st-1.9/osguess.sh create mode 100644 trunk/research/st-1.9/public.h create mode 100644 trunk/research/st-1.9/sched.c create mode 100644 trunk/research/st-1.9/st.pc.in create mode 100644 trunk/research/st-1.9/st.spec create mode 100644 trunk/research/st-1.9/stk.c create mode 100644 trunk/research/st-1.9/sync.c diff --git a/trunk/research/st-1.9/Makefile b/trunk/research/st-1.9/Makefile new file mode 100644 index 000000000..a38ac2b62 --- /dev/null +++ b/trunk/research/st-1.9/Makefile @@ -0,0 +1,469 @@ +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is the Netscape Portable Runtime library. +# +# The Initial Developer of the Original Code is Netscape +# Communications Corporation. Portions created by Netscape are +# Copyright (C) 1994-2000 Netscape Communications Corporation. All +# Rights Reserved. +# +# Contributor(s): Silicon Graphics, Inc. +# +# Portions created by SGI are Copyright (C) 2000-2001 Silicon +# Graphics, Inc. All Rights Reserved. +# +# Alternatively, the contents of this file may be used under the +# terms of the GNU General Public License Version 2 or later (the +# "GPL"), in which case the provisions of the GPL are applicable +# instead of those above. If you wish to allow use of your +# version of this file only under the terms of the GPL and not to +# allow others to use your version of this file under the MPL, +# indicate your decision by deleting the provisions above and +# replace them with the notice and other provisions required by +# the GPL. If you do not delete the provisions above, a recipient +# may use your version of this file under either the MPL or the +# GPL. + +# This is the full version of the libst library - modify carefully +VERSION = 1.9 + +########################## +# Supported OSes: +# +#OS = AIX +#OS = CYGWIN +#OS = DARWIN +#OS = FREEBSD +#OS = HPUX +#OS = HPUX_64 +#OS = IRIX +#OS = IRIX_64 +#OS = LINUX +#OS = NETBSD +#OS = OPENBSD +#OS = OSF1 +#OS = SOLARIS +#OS = SOLARIS_64 + +# Please see the "Other possible defines" section below for +# possible compilation options. +########################## + +CC = cc +AR = ar +LD = ld +RANLIB = ranlib +LN = ln + +SHELL = /bin/sh +ECHO = /bin/echo + +BUILD = DBG +TARGETDIR = $(OS)_$(shell uname -r)_$(BUILD) + +DEFINES = -D$(OS) +CFLAGS = +SFLAGS = +ARFLAGS = -rv +LNFLAGS = -s +DSO_SUFFIX = so + +MAJOR = $(shell echo $(VERSION) | sed 's/^\([^\.]*\).*/\1/') +DESC = st.pc + +########################## +# Platform section. +# Possible targets: + +TARGETS = aix-debug aix-optimized \ + cygwin-debug cygwin-optimized \ + darwin-debug darwin-optimized \ + freebsd-debug freebsd-optimized \ + hpux-debug hpux-optimized \ + hpux-64-debug hpux-64-optimized \ + irix-n32-debug irix-n32-optimized \ + irix-64-debug irix-64-optimized \ + linux-debug linux-optimized \ + netbsd-debug netbsd-optimized \ + openbsd-debug openbsd-optimized \ + osf1-debug osf1-optimized \ + solaris-debug solaris-optimized \ + solaris-64-debug solaris-64-optimized + +# +# Platform specifics +# + +ifeq ($(OS), AIX) +AIX_VERSION = $(shell uname -v).$(shell uname -r) +TARGETDIR = $(OS)_$(AIX_VERSION)_$(BUILD) +CC = xlC +STATIC_ONLY = yes +ifeq ($(BUILD), OPT) +OTHER_FLAGS = -w +endif +ifneq ($(filter-out 4.1 4.2, $(AIX_VERSION)),) +DEFINES += -DMD_HAVE_SOCKLEN_T +endif +endif + +ifeq ($(OS), CYGWIN) +TARGETDIR = $(OS)_$(BUILD) +CC = gcc +LD = gcc +DSO_SUFFIX = dll +SLIBRARY = $(TARGETDIR)/libst.dll.a +DLIBRARY = $(TARGETDIR)/libst.dll +DEF_FILE = $(TARGETDIR)/libst.def +LDFLAGS = libst.def -shared --enable-auto-image-base -Wl,--output-def,$(DEF_FILE),--out-implib,$(SLIBRARY) +OTHER_FLAGS = -Wall +endif + +ifeq ($(OS), DARWIN) +LD = cc +SFLAGS = -fPIC -fno-common +DSO_SUFFIX = dylib +RELEASE = $(shell uname -r | cut -d. -f1) +PPC = $(shell test $(RELEASE) -le 9 && echo yes) +INTEL = $(shell test $(RELEASE) -ge 9 && echo yes) +ifeq ($(PPC), yes) +CFLAGS += -arch ppc +LDFLAGS += -arch ppc +endif +ifeq ($(INTEL), yes) +CFLAGS += -arch i386 -arch x86_64 +LDFLAGS += -arch i386 -arch x86_64 +endif +LDFLAGS += -dynamiclib -install_name /sw/lib/libst.$(MAJOR).$(DSO_SUFFIX) -compatibility_version $(MAJOR) -current_version $(VERSION) +OTHER_FLAGS = -Wall +endif + +ifeq ($(OS), FREEBSD) +SFLAGS = -fPIC +LDFLAGS = -shared -soname=$(SONAME) -lc +OTHER_FLAGS = -Wall +ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes) +DEFINES += -DMD_HAVE_KQUEUE +endif +endif + +ifeq (HPUX, $(findstring HPUX, $(OS))) +ifeq ($(OS), HPUX_64) +DEFINES = -DHPUX +CFLAGS = -Ae +DD64 +Z +else +CFLAGS = -Ae +DAportable +Z +endif +RANLIB = true +LDFLAGS = -b +DSO_SUFFIX = sl +endif + +ifeq (IRIX, $(findstring IRIX, $(OS))) +ifeq ($(OS), IRIX_64) +DEFINES = -DIRIX +ABIFLAG = -64 +else +ABIFLAG = -n32 +endif +RANLIB = true +CFLAGS = $(ABIFLAG) -mips3 +LDFLAGS = $(ABIFLAG) -shared +OTHER_FLAGS = -fullwarn +endif + +ifeq ($(OS), LINUX) +EXTRA_OBJS = $(TARGETDIR)/md.o +SFLAGS = -fPIC +LDFLAGS = -shared -soname=$(SONAME) -lc +OTHER_FLAGS = -Wall +ifeq ($(shell test -f /usr/include/sys/epoll.h && echo yes), yes) +DEFINES += -DMD_HAVE_EPOLL +endif +endif + +ifeq ($(OS), NETBSD) +SFLAGS = -fPIC +LDFLAGS = -shared -soname=$(SONAME) -lc +OTHER_FLAGS = -Wall +endif + +ifeq ($(OS), OPENBSD) +SFLAGS = -fPIC +LDFLAGS = -shared -soname=$(SONAME) -lc +OTHER_FLAGS = -Wall +ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes) +DEFINES += -DMD_HAVE_KQUEUE +endif +endif + +ifeq ($(OS), OSF1) +RANLIB = true +LDFLAGS = -shared -all -expect_unresolved "*" +endif + +ifeq (SOLARIS, $(findstring SOLARIS, $(OS))) +TARGETDIR = $(OS)_$(shell uname -r | sed 's/^5/2/')_$(BUILD) +CC = gcc +LD = gcc +RANLIB = true +LDFLAGS = -G +OTHER_FLAGS = -Wall +ifeq ($(OS), SOLARIS_64) +DEFINES = -DSOLARIS +CFLAGS += -m64 +LDFLAGS += -m64 +endif +endif + +# +# End of platform section. +########################## + + +ifeq ($(BUILD), OPT) +OTHER_FLAGS += -O +else +OTHER_FLAGS += -g +DEFINES += -DDEBUG +endif + +########################## +# Other possible defines: +# To use poll(2) instead of select(2) for events checking: +# DEFINES += -DUSE_POLL +# You may prefer to use select for applications that have many threads +# using one file descriptor, and poll for applications that have many +# different file descriptors. With USE_POLL poll() is called with at +# least one pollfd per I/O-blocked thread, so 1000 threads sharing one +# descriptor will poll 1000 identical pollfds and select would be more +# efficient. But if the threads all use different descriptors poll() +# may be better depending on your operating system's implementation of +# poll and select. Really, it's up to you. Oh, and on some platforms +# poll() fails with more than a few dozen descriptors. +# +# Some platforms allow to define FD_SETSIZE (if select() is used), e.g.: +# DEFINES += -DFD_SETSIZE=4096 +# +# To use malloc(3) instead of mmap(2) for stack allocation: +# DEFINES += -DMALLOC_STACK +# +# To provision more than the default 16 thread-specific-data keys +# (but not too many!): +# DEFINES += -DST_KEYS_MAX= +# +# To start with more than the default 64 initial pollfd slots +# (but the table grows dynamically anyway): +# DEFINES += -DST_MIN_POLLFDS_SIZE= +# +# Note that you can also add these defines by specifying them as +# make/gmake arguments (without editing this Makefile). For example: +# +# make EXTRA_CFLAGS=-DUSE_POLL +# +# (replace make with gmake if needed). +# +# You can also modify the default selection of an alternative event +# notification mechanism. E.g., to enable kqueue(2) support (if it's not +# enabled by default): +# +# gmake EXTRA_CFLAGS=-DMD_HAVE_KQUEUE +# +# or to disable default epoll(4) support: +# +# make EXTRA_CFLAGS=-UMD_HAVE_EPOLL +# +########################## + +CFLAGS += $(DEFINES) $(OTHER_FLAGS) $(EXTRA_CFLAGS) + +OBJS = $(TARGETDIR)/sched.o \ + $(TARGETDIR)/stk.o \ + $(TARGETDIR)/sync.o \ + $(TARGETDIR)/key.o \ + $(TARGETDIR)/io.o \ + $(TARGETDIR)/event.o +OBJS += $(EXTRA_OBJS) +HEADER = $(TARGETDIR)/st.h +SLIBRARY = $(TARGETDIR)/libst.a +DLIBRARY = $(TARGETDIR)/libst.$(DSO_SUFFIX).$(VERSION) +EXAMPLES = examples + +LINKNAME = libst.$(DSO_SUFFIX) +SONAME = libst.$(DSO_SUFFIX).$(MAJOR) +FULLNAME = libst.$(DSO_SUFFIX).$(VERSION) + +ifeq ($(OS), CYGWIN) +SONAME = cygst.$(DSO_SUFFIX) +SLIBRARY = $(TARGETDIR)/libst.dll.a +DLIBRARY = $(TARGETDIR)/$(SONAME) +LINKNAME = +# examples directory does not compile under cygwin +EXAMPLES = +endif + +ifeq ($(OS), DARWIN) +LINKNAME = libst.$(DSO_SUFFIX) +SONAME = libst.$(MAJOR).$(DSO_SUFFIX) +FULLNAME = libst.$(VERSION).$(DSO_SUFFIX) +endif + +ifeq ($(STATIC_ONLY), yes) +LIBRARIES = $(SLIBRARY) +else +LIBRARIES = $(SLIBRARY) $(DLIBRARY) +endif + +ifeq ($(OS),) +ST_ALL = unknown +else +ST_ALL = $(TARGETDIR) $(LIBRARIES) $(HEADER) $(EXAMPLES) $(DESC) +endif + +all: $(ST_ALL) + +unknown: + @echo + @echo "Please specify one of the following targets:" + @echo + @for target in $(TARGETS); do echo $$target; done + @echo + +st.pc: st.pc.in + sed "s/@VERSION@/${VERSION}/g" < $< > $@ + +$(TARGETDIR): + if [ ! -d $(TARGETDIR) ]; then mkdir $(TARGETDIR); fi + +$(SLIBRARY): $(OBJS) + $(AR) $(ARFLAGS) $@ $(OBJS) + $(RANLIB) $@ + rm -f obj; $(LN) $(LNFLAGS) $(TARGETDIR) obj + +$(DLIBRARY): $(OBJS:%.o=%-pic.o) + $(LD) $(LDFLAGS) $^ -o $@ + if test "$(LINKNAME)"; then \ + cd $(TARGETDIR); \ + rm -f $(SONAME) $(LINKNAME); \ + $(LN) $(LNFLAGS) $(FULLNAME) $(SONAME); \ + $(LN) $(LNFLAGS) $(FULLNAME) $(LINKNAME); \ + fi + +$(HEADER): public.h + rm -f $@ + cp public.h $@ + +$(TARGETDIR)/md.o: md.S + $(CC) $(CFLAGS) -c $< -o $@ + +$(TARGETDIR)/%.o: %.c common.h md.h + $(CC) $(CFLAGS) -c $< -o $@ + +examples:: + @echo Making $@ + @cd $@; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" OS="$(OS)" TARGETDIR="$(TARGETDIR)" + +clean: + rm -rf *_OPT *_DBG obj st.pc + +########################## +# Pattern rules: + +ifneq ($(SFLAGS),) +# Compile with shared library options if it's a C file +$(TARGETDIR)/%-pic.o: %.c common.h md.h + $(CC) $(CFLAGS) $(SFLAGS) -c $< -o $@ +endif + +# Compile assembly as normal or C as normal if no SFLAGS +%-pic.o: %.o + rm -f $@; $(LN) $(LNFLAGS) $(. Install them with: + # rpm -i libst*.rpm +Requires GNU automake and rpm 3.0.3 or later. + +Debian users: + If you run potato, please upgrade to woody. + If you run woody, "apt-get install libst-dev" will get you v1.3. + If you run testing/unstable, you will get the newest available version. + If you *must* have the newest libst in woody, you may follow these + not-recommended instructions: + 1. Add "deb-src unstable main" to your + /etc/apt/sources.list + 2. apt-get update + 3. apt-get source st + 4. cd st-1.4 (or whatever version you got) + 5. debuild + 6. dpkg -i ../*.deb + +If your application uses autoconf to search for dependencies and you +want to search for a given version of libst, you can simply add + PKG_CHECK_MODULES(MYAPP, st >= 1.3 mumble >= 0.2.23) +to your configure.ac/in. This will define @MYAPP_LIBS@ and +@MYAPP_CFLAGS@ which you may then use in your Makefile.am/in files to +link against mumble and st. + + +LICENSE + +The State Threads library is a derivative of the Netscape Portable +Runtime library (NSPR). All source code in this directory is +distributed under the terms of the Mozilla Public License (MPL) version +1.1 or the GNU General Public License (GPL) version 2 or later. For +more information about these licenses please see +http://www.mozilla.org/MPL/ and http://www.gnu.org/copyleft/. + +All source code in the "examples" directory is distributed under the BSD +style license. + + +PLATFORMS + +Please see the "docs/notes.html" file for the list of currently +supported platforms. + + +DEBUGGER SUPPORT + +It's almost impossible to print SP and PC in a portable way. The only +way to see thread's stack platform-independently is to actually jump to +the saved context. That's what the _st_iterate_threads() function does. +Do the following to iterate over all threads: + +- set the _st_iterate_threads_flag to 1 in debugger +- set breakpoint at the _st_show_thread_stack() function + (which does nothing) +- call the _st_iterate_threads() function which jumps to the + next thread +- at each break you can explore thread's stack +- continue +- when iteration is complete, you return to the original + point (you can see thread id and a message as arguments of + the _st_show_thread_stack() function). + +You can call _st_iterate_threads() in three ways: + +- Insert it into your source code at the point you want to + go over threads. +- Just run application and this function will be called at + the first context switch. +- Call it directly from the debugger at any point. + +This works with gdb and dbx. + +Example using gdb: + +(gdb) set _st_iterate_threads_flag = 1 +(gdb) b _st_show_thread_stack +... +(gdb) call _st_iterate_threads() +... +(gdb) bt +... +(gdb) c +... +(gdb) bt +... +(gdb) c +... +and so on... + +_st_iterate_threads_flag will be set to 0 automatically +after iteration is over or you can set it to 0 at any time +to stop iteration. + +Sometimes gdb complains about SIGSEGV when you call a function +directly at gdb command-line. It can be ignored -- just call the +same function right away again, it works just fine. For example: + +(gdb) set _st_iterate_threads_flag = 1 +(gdb) b _st_show_thread_stack +Breakpoint 1 at 0x809bbbb: file sched.c, line 856. +(gdb) call _st_iterate_threads() +Program received signal SIGSEGV, Segmentation fault. +.... +(gdb) # just call the function again: +(gdb) call _st_iterate_threads() +Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2 +"Iteration started") at sched.c:856 +856 } +.... + +You can use simple gdb command-line scripting to display +all threads and their stack traces at once: + +(gdb) while _st_iterate_threads_flag + >bt + >c + >end +.... + +Another script to stop at the thread with the specific thread id +(e.g., 0x40252ee4): + +(gdb) # set the flag again: +(gdb) set _st_iterate_threads_flag = 1 +(gdb) call _st_iterate_threads() +Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2 +"Iteration started") at sched.c:856 +856 } +.... +(gdb) while thread != 0x40252ee4 + >c + >end +.... +.... +Breakpoint 1, _st_show_thread_stack (thread=0x40252ee4, messg=0x0) at +sched.c:856 +856 } +(gdb) bt +.... +(gdb) # don't want to continue iteration, unset the flag: +(gdb) set _st_iterate_threads_flag = 0 +(gdb) c +Continuing. +Breakpoint 1, _st_show_thread_stack (thread=0x0, messg=0x80ae78e "Iteration +completed") + at sched.c:856 +856 } +(gdb) c +Continuing. +(gdb) return +Make selected stack frame return now? (y or n) y +#0 0x4011254e in __select () + from /lib/libc.so.6 +(gdb) detach + + +CHANGE LOG + +Changes from 1.8 to 1.9. +------------------------ +o Support 32-bit and 64-bit Intel Macs. + +o Added ST_VERSION string, and ST_VERSION_MAJOR and ST_VERSION_MINOR + [bug 1796801]. + +o Fixed some compiler warnings, based on a patch from Brian Wellington + [bug 1932741]. + + +Changes from 1.7 to 1.8. +-------------------------- +o Added support for kqueue and epoll on platforms that support them. + Added ability to choose the event notification system at program + startup. + +o Long-overdue public definitions of ST_UTIME_NO_TIMEOUT (-1ULL) and + ST_UTIME_NO_WAIT (0) [bug 1514436]. + +o Documentation patch for st_utime() [bug 1514484]. + +o Documentation patch for st_timecache_set() [bug 1514486]. + +o Documentation patch for st_netfd_serialize_accept() [bug 1514494]. + +o Added st_writev_resid() [rfe 1538344]. + +o Added st_readv_resid() [rfe 1538768] and, for symmetry, st_readv(). + + +Changes from 1.6 to 1.7. +------------------------ +o Support glibc 2.4, which breaks programs that manipulate jump buffers. + Replaced Linux IA64 special cases with new md.S that covers all + Linux. + + +Changes from 1.5.2 to 1.6. +-------------------------- +none + + +Changes from 1.5.1 to 1.5.2. +---------------------------- +o Alfred Perlstein's context switch callback feature. + +o Claus Assmann's st_recvmsg/st_sendmsg wrappers. + +o Extra stack padding for platforms that need it. + +o Ron Arts's timeout clarifications in the reference manual. + +o Raymond Bero and Anton Berezin's AMD64 FreeBSD port. + +o Claus Assmann's AMD64 SunOS 5.10 port. + +o Claus Assmann's AMD64 OpenBSD port. + +o Michael Abd-El-Malek's Mac OS X port. + +o Michael Abd-El-Malek's stack printing patch. + + +Changes from 1.5.0 to 1.5.1. +---------------------------- +o Andreas Gustafsson's USE_POLL fix. + +o Gene's st_set_utime_function() enhancement. + + +Changes from 1.4 to 1.5.0. +-------------------------- +o Andreas Gustafsson's performance patch. + +o New extensions: Improved DNS resolver, generic LRU cache, in-process + DNS cache, and a program to test the resolver and cache. + +o Support for AMD Opteron 64-bit CPUs under Linux. + +o Support for SPARC-64 under Solaris. + +o Andreas Gustafsson's support for VAX under NetBSD. + +o Changed unportable #warning directives in md.h to #error. + + +Changes from 1.3 to 1.4. +------------------------ +o Andreas Gustafsson's NetBSD port. + +o Wesley W. Terpstra's Darwin (MacOS X) port. + +o Support for many CPU architectures under Linux and *BSD. + +o Renamed private typedefs so they don't conflict with public ones any + more. + +o common.h now includes public.h for strict prototyping. + +o Joshua Levy's recommendation to make st_connect() and st_sendto() + accept const struct sockaddr pointers, as the originals do. + +o Clarified the documentation regarding blocking vs. non-blocking I/O. + +o Cygwin support. + +o Created the extensions directory. + +o Fixed warnings from ia64asm.S. + + +Changes from 1.2 to 1.3. +------------------------ +o Added st_read_resid() and st_write_resid() to allow the caller to know + how much data was transferred before an error occurred. Updated + documentation. + +o Updated project link, copyrights, and documentation regarding + timeouts. Added comment to st_connect(). + +o Optimized the _st_add_sleep_q() function in sched.c. Now we walk the + sleep queue *backward* when inserting a thread into it. When you + have lots (hundreds) of threads and several timeout values, it takes + a while to insert a thread at the appropriate point in the sleep + queue. The idea is that often this appropriate point is closer to + the end of the queue rather than the beginning. Measurements show + performance improves with this change. In any case this change + should do no harm. + +o Added a hint of when to define USE_POLL and when not to, to the + Makefile. + +o Added debugging support (files common.h and sched.c). See above. + +o Decreased the number of reallocations of _ST_POLLFDS in sched.c. + Inspired by Lev Walkin. + +o Fixed st_usleep(-1) and st_sleep(-1), and added a warning to the + documentation about too-large timeouts. + +o Linux/*BSD Alpha port. + +o Wesley W. Terpstra modernized the build process: + - properly build relocatable libraries under bsd and linux + - use library versioning + - added rpm spec file + - added debian/ files + See above for build instructions. + + +Changes from 1.1 to 1.2. +------------------------ +o Added st_randomize_stacks(). + +o Added a patch contributed by Sascha Schumann. + + +Changes from 1.0 to 1.1. +------------------------ +o Relicensed under dual MPL-GPL. + +o OpenBSD port. + +o Compile-time option to use poll() instead of select() for + event polling (see Makefile). + This is useful if you want to support a large number of open + file descriptors (larger than FD_SETSIZE) within a single + process. + +o Linux IA-64 port. + Two issues make IA-64 different from other platforms: + + - Besides the traditional call stack in memory, IA-64 uses the + general register stack. Thus each thread needs a backing store + for the register stack in addition to the memory stack. + + - Current implementation of setjmp()/longjmp() can not be used + for thread context-switching since it assumes that only one + register stack exists. Using special assembly functions for + context-switching is unavoidable. + +o Thread stack capping on IRIX. + This allows some profiling tools (such as SpeedShop) to know when + to stop unwinding the stack. Without this libexc, used by SpeedShop, + traces right off the stack and crashes. + +o Miscellaneous documentation additions. + + +COPYRIGHTS + +Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. +All Rights Reserved. diff --git a/trunk/research/st-1.9/common.h b/trunk/research/st-1.9/common.h new file mode 100644 index 000000000..4df39e9e4 --- /dev/null +++ b/trunk/research/st-1.9/common.h @@ -0,0 +1,466 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#ifndef __ST_COMMON_H__ +#define __ST_COMMON_H__ + +#include +#include +#include +#include +#include + +/* Enable assertions only if DEBUG is defined */ +#ifndef DEBUG +#define NDEBUG +#endif +#include +#define ST_ASSERT(expr) assert(expr) + +#define ST_BEGIN_MACRO { +#define ST_END_MACRO } + +#ifdef DEBUG +#define ST_HIDDEN /*nothing*/ +#else +#define ST_HIDDEN static +#endif + +#include "public.h" +#include "md.h" + + +/***************************************** + * Circular linked list definitions + */ + +typedef struct _st_clist { + struct _st_clist *next; + struct _st_clist *prev; +} _st_clist_t; + +/* Insert element "_e" into the list, before "_l" */ +#define ST_INSERT_BEFORE(_e,_l) \ + ST_BEGIN_MACRO \ + (_e)->next = (_l); \ + (_e)->prev = (_l)->prev; \ + (_l)->prev->next = (_e); \ + (_l)->prev = (_e); \ + ST_END_MACRO + +/* Insert element "_e" into the list, after "_l" */ +#define ST_INSERT_AFTER(_e,_l) \ + ST_BEGIN_MACRO \ + (_e)->next = (_l)->next; \ + (_e)->prev = (_l); \ + (_l)->next->prev = (_e); \ + (_l)->next = (_e); \ + ST_END_MACRO + +/* Return the element following element "_e" */ +#define ST_NEXT_LINK(_e) ((_e)->next) + +/* Append an element "_e" to the end of the list "_l" */ +#define ST_APPEND_LINK(_e,_l) ST_INSERT_BEFORE(_e,_l) + +/* Insert an element "_e" at the head of the list "_l" */ +#define ST_INSERT_LINK(_e,_l) ST_INSERT_AFTER(_e,_l) + +/* Return the head/tail of the list */ +#define ST_LIST_HEAD(_l) (_l)->next +#define ST_LIST_TAIL(_l) (_l)->prev + +/* Remove the element "_e" from it's circular list */ +#define ST_REMOVE_LINK(_e) \ + ST_BEGIN_MACRO \ + (_e)->prev->next = (_e)->next; \ + (_e)->next->prev = (_e)->prev; \ + ST_END_MACRO + +/* Return non-zero if the given circular list "_l" is empty, */ +/* zero if the circular list is not empty */ +#define ST_CLIST_IS_EMPTY(_l) \ + ((_l)->next == (_l)) + +/* Initialize a circular list */ +#define ST_INIT_CLIST(_l) \ + ST_BEGIN_MACRO \ + (_l)->next = (_l); \ + (_l)->prev = (_l); \ + ST_END_MACRO + +#define ST_INIT_STATIC_CLIST(_l) \ + {(_l), (_l)} + + +/***************************************** + * Basic types definitions + */ + +typedef void (*_st_destructor_t)(void *); + + +typedef struct _st_stack { + _st_clist_t links; + char *vaddr; /* Base of stack's allocated memory */ + int vaddr_size; /* Size of stack's allocated memory */ + int stk_size; /* Size of usable portion of the stack */ + char *stk_bottom; /* Lowest address of stack's usable portion */ + char *stk_top; /* Highest address of stack's usable portion */ + void *sp; /* Stack pointer from C's point of view */ +#ifdef __ia64__ + void *bsp; /* Register stack backing store pointer */ +#endif +} _st_stack_t; + + +typedef struct _st_cond { + _st_clist_t wait_q; /* Condition variable wait queue */ +} _st_cond_t; + + +typedef struct _st_thread _st_thread_t; + +struct _st_thread { + int state; /* Thread's state */ + int flags; /* Thread's flags */ + + void *(*start)(void *arg); /* The start function of the thread */ + void *arg; /* Argument of the start function */ + void *retval; /* Return value of the start function */ + + _st_stack_t *stack; /* Info about thread's stack */ + + _st_clist_t links; /* For putting on run/sleep/zombie queue */ + _st_clist_t wait_links; /* For putting on mutex/condvar wait queue */ +#ifdef DEBUG + _st_clist_t tlink; /* For putting on thread queue */ +#endif + + st_utime_t due; /* Wakeup time when thread is sleeping */ + _st_thread_t *left; /* For putting in timeout heap */ + _st_thread_t *right; /* -- see docs/timeout_heap.txt for details */ + int heap_index; + + void **private_data; /* Per thread private data */ + + _st_cond_t *term; /* Termination condition variable for join */ + + jmp_buf context; /* Thread's context */ +}; + + +typedef struct _st_mutex { + _st_thread_t *owner; /* Current mutex owner */ + _st_clist_t wait_q; /* Mutex wait queue */ +} _st_mutex_t; + + +typedef struct _st_pollq { + _st_clist_t links; /* For putting on io queue */ + _st_thread_t *thread; /* Polling thread */ + struct pollfd *pds; /* Array of poll descriptors */ + int npds; /* Length of the array */ + int on_ioq; /* Is it on ioq? */ +} _st_pollq_t; + + +typedef struct _st_eventsys_ops { + const char *name; /* Name of this event system */ + int val; /* Type of this event system */ + int (*init)(void); /* Initialization */ + void (*dispatch)(void); /* Dispatch function */ + int (*pollset_add)(struct pollfd *, int); /* Add descriptor set */ + void (*pollset_del)(struct pollfd *, int); /* Delete descriptor set */ + int (*fd_new)(int); /* New descriptor allocated */ + int (*fd_close)(int); /* Descriptor closed */ + int (*fd_getlimit)(void); /* Descriptor hard limit */ +} _st_eventsys_t; + + +typedef struct _st_vp { + _st_thread_t *idle_thread; /* Idle thread for this vp */ + st_utime_t last_clock; /* The last time we went into vp_check_clock() */ + + _st_clist_t run_q; /* run queue for this vp */ + _st_clist_t io_q; /* io queue for this vp */ + _st_clist_t zombie_q; /* zombie queue for this vp */ +#ifdef DEBUG + _st_clist_t thread_q; /* all threads of this vp */ +#endif + int pagesize; + + _st_thread_t *sleep_q; /* sleep queue for this vp */ + int sleepq_size; /* number of threads on sleep queue */ + +#ifdef ST_SWITCH_CB + st_switch_cb_t switch_out_cb; /* called when a thread is switched out */ + st_switch_cb_t switch_in_cb; /* called when a thread is switched in */ +#endif +} _st_vp_t; + + +typedef struct _st_netfd { + int osfd; /* Underlying OS file descriptor */ + int inuse; /* In-use flag */ + void *private_data; /* Per descriptor private data */ + _st_destructor_t destructor; /* Private data destructor function */ + void *aux_data; /* Auxiliary data for internal use */ + struct _st_netfd *next; /* For putting on the free list */ +} _st_netfd_t; + + +/***************************************** + * Current vp, thread, and event system + */ + +extern _st_vp_t _st_this_vp; +extern _st_thread_t *_st_this_thread; +extern _st_eventsys_t *_st_eventsys; + +#define _ST_CURRENT_THREAD() (_st_this_thread) +#define _ST_SET_CURRENT_THREAD(_thread) (_st_this_thread = (_thread)) + +#define _ST_LAST_CLOCK (_st_this_vp.last_clock) + +#define _ST_RUNQ (_st_this_vp.run_q) +#define _ST_IOQ (_st_this_vp.io_q) +#define _ST_ZOMBIEQ (_st_this_vp.zombie_q) +#ifdef DEBUG +#define _ST_THREADQ (_st_this_vp.thread_q) +#endif + +#define _ST_PAGE_SIZE (_st_this_vp.pagesize) + +#define _ST_SLEEPQ (_st_this_vp.sleep_q) +#define _ST_SLEEPQ_SIZE (_st_this_vp.sleepq_size) + +#define _ST_VP_IDLE() (*_st_eventsys->dispatch)() + + +/***************************************** + * vp queues operations + */ + +#define _ST_ADD_IOQ(_pq) ST_APPEND_LINK(&_pq.links, &_ST_IOQ) +#define _ST_DEL_IOQ(_pq) ST_REMOVE_LINK(&_pq.links) + +#define _ST_ADD_RUNQ(_thr) ST_APPEND_LINK(&(_thr)->links, &_ST_RUNQ) +#define _ST_DEL_RUNQ(_thr) ST_REMOVE_LINK(&(_thr)->links) + +#define _ST_ADD_SLEEPQ(_thr, _timeout) _st_add_sleep_q(_thr, _timeout) +#define _ST_DEL_SLEEPQ(_thr) _st_del_sleep_q(_thr) + +#define _ST_ADD_ZOMBIEQ(_thr) ST_APPEND_LINK(&(_thr)->links, &_ST_ZOMBIEQ) +#define _ST_DEL_ZOMBIEQ(_thr) ST_REMOVE_LINK(&(_thr)->links) + +#ifdef DEBUG +#define _ST_ADD_THREADQ(_thr) ST_APPEND_LINK(&(_thr)->tlink, &_ST_THREADQ) +#define _ST_DEL_THREADQ(_thr) ST_REMOVE_LINK(&(_thr)->tlink) +#endif + + +/***************************************** + * Thread states and flags + */ + +#define _ST_ST_RUNNING 0 +#define _ST_ST_RUNNABLE 1 +#define _ST_ST_IO_WAIT 2 +#define _ST_ST_LOCK_WAIT 3 +#define _ST_ST_COND_WAIT 4 +#define _ST_ST_SLEEPING 5 +#define _ST_ST_ZOMBIE 6 +#define _ST_ST_SUSPENDED 7 + +#define _ST_FL_PRIMORDIAL 0x01 +#define _ST_FL_IDLE_THREAD 0x02 +#define _ST_FL_ON_SLEEPQ 0x04 +#define _ST_FL_INTERRUPT 0x08 +#define _ST_FL_TIMEDOUT 0x10 + + +/***************************************** + * Pointer conversion + */ + +#ifndef offsetof +#define offsetof(type, identifier) ((size_t)&(((type *)0)->identifier)) +#endif + +#define _ST_THREAD_PTR(_qp) \ + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, links))) + +#define _ST_THREAD_WAITQ_PTR(_qp) \ + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, wait_links))) + +#define _ST_THREAD_STACK_PTR(_qp) \ + ((_st_stack_t *)((char*)(_qp) - offsetof(_st_stack_t, links))) + +#define _ST_POLLQUEUE_PTR(_qp) \ + ((_st_pollq_t *)((char *)(_qp) - offsetof(_st_pollq_t, links))) + +#ifdef DEBUG +#define _ST_THREAD_THREADQ_PTR(_qp) \ + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, tlink))) +#endif + + +/***************************************** + * Constants + */ + +#ifndef ST_UTIME_NO_TIMEOUT +#define ST_UTIME_NO_TIMEOUT ((st_utime_t) -1LL) +#endif + +#ifndef __ia64__ +#define ST_DEFAULT_STACK_SIZE (64*1024) +#else +#define ST_DEFAULT_STACK_SIZE (128*1024) /* Includes register stack size */ +#endif + +#ifndef ST_KEYS_MAX +#define ST_KEYS_MAX 16 +#endif + +#ifndef ST_MIN_POLLFDS_SIZE +#define ST_MIN_POLLFDS_SIZE 64 +#endif + + +/***************************************** + * Threads context switching + */ + +#ifdef DEBUG +void _st_iterate_threads(void); +#define ST_DEBUG_ITERATE_THREADS() _st_iterate_threads() +#else +#define ST_DEBUG_ITERATE_THREADS() +#endif + +#ifdef ST_SWITCH_CB +#define ST_SWITCH_OUT_CB(_thread) \ + if (_st_this_vp.switch_out_cb != NULL && \ + _thread != _st_this_vp.idle_thread && \ + _thread->state != _ST_ST_ZOMBIE) { \ + _st_this_vp.switch_out_cb(); \ + } +#define ST_SWITCH_IN_CB(_thread) \ + if (_st_this_vp.switch_in_cb != NULL && \ + _thread != _st_this_vp.idle_thread && \ + _thread->state != _ST_ST_ZOMBIE) { \ + _st_this_vp.switch_in_cb(); \ + } +#else +#define ST_SWITCH_OUT_CB(_thread) +#define ST_SWITCH_IN_CB(_thread) +#endif + +/* + * Switch away from the current thread context by saving its state and + * calling the thread scheduler + */ +#define _ST_SWITCH_CONTEXT(_thread) \ + ST_BEGIN_MACRO \ + ST_SWITCH_OUT_CB(_thread); \ + if (!MD_SETJMP((_thread)->context)) { \ + _st_vp_schedule(); \ + } \ + ST_DEBUG_ITERATE_THREADS(); \ + ST_SWITCH_IN_CB(_thread); \ + ST_END_MACRO + +/* + * Restore a thread context that was saved by _ST_SWITCH_CONTEXT or + * initialized by _ST_INIT_CONTEXT + */ +#define _ST_RESTORE_CONTEXT(_thread) \ + ST_BEGIN_MACRO \ + _ST_SET_CURRENT_THREAD(_thread); \ + MD_LONGJMP((_thread)->context, 1); \ + ST_END_MACRO + +/* + * Initialize the thread context preparing it to execute _main + */ +#ifdef MD_INIT_CONTEXT +#define _ST_INIT_CONTEXT MD_INIT_CONTEXT +#else +#error Unknown OS +#endif + +/* + * Number of bytes reserved under the stack "bottom" + */ +#define _ST_STACK_PAD_SIZE MD_STACK_PAD_SIZE + + +/***************************************** + * Forward declarations + */ + +void _st_vp_schedule(void); +void _st_vp_check_clock(void); +void *_st_idle_thread_start(void *arg); +void _st_thread_main(void); +void _st_thread_cleanup(_st_thread_t *thread); +void _st_add_sleep_q(_st_thread_t *thread, st_utime_t timeout); +void _st_del_sleep_q(_st_thread_t *thread); +_st_stack_t *_st_stack_new(int stack_size); +void _st_stack_free(_st_stack_t *ts); +int _st_io_init(void); + +st_utime_t st_utime(void); +_st_cond_t *st_cond_new(void); +int st_cond_destroy(_st_cond_t *cvar); +int st_cond_timedwait(_st_cond_t *cvar, st_utime_t timeout); +int st_cond_signal(_st_cond_t *cvar); +ssize_t st_read(_st_netfd_t *fd, void *buf, size_t nbyte, st_utime_t timeout); +ssize_t st_write(_st_netfd_t *fd, const void *buf, size_t nbyte, + st_utime_t timeout); +int st_poll(struct pollfd *pds, int npds, st_utime_t timeout); +_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, + int joinable, int stk_size); + +#endif /* !__ST_COMMON_H__ */ + diff --git a/trunk/research/st-1.9/docs/fig.gif b/trunk/research/st-1.9/docs/fig.gif new file mode 100644 index 0000000000000000000000000000000000000000..7265a05db4f516b44fcf37c3949922ff4f62999d GIT binary patch literal 5374 zcmd6m^;;9(+s6+fj8H~5N)H$!A+63y4h$3-t#pcjh%^XDj2hiF5Jtn0Zs|rqY9OIV z>BppTd;EOA&+|V#=a+L`=en=^ykD<#A8lPNWhE;pr5(i~`0r`}fXp1lEEFXqlqobe zHby3s0e}L)0D#z2zy`p10M-Ct0ssvFjv7qS0uTZ;0eIs9^qBw@4H$4hY)S;m#00#& zKt2}K;{UH2fGKK#0uVB2aR&(Sw*rj@I2`5F)W0{-0busu-cW#M1i1I$?=cfC#4DsF zA0TQYrXb+C7l2`bE&g)IGN9}RN(AV?t)Bx57!bCF2LKjYCJ^GaL5Y2@gdczg11fTZ zC@gcKHU>Pvnm}_6^eBI^rG+>D8f^q zOaOug2{-_m`YVln->BBEhvC!rba>_*{FnqGX*9lCNvt2!{Mf;rjSUl ztT61E;a}as08Ius7?|P$&&XgCCJHV=7(q)fKwv2}y{J~PY-9L7&_&QwFNz45LQX^q1wXUNT@I>N2d0~}4jGiC$2LG-|Ct1>BAfOU zE~=sYeT8wo^U6p=#eB`f z)G{@3D683ijc%eXi68fO|GF&j@)lcXTa!mu7i-tV^2z?o&-uDi`PVaj_xC0ZF2aM_ z{Xcw~ttZLk-{JN5{joimJXvkF?+{w{%0FM`_?p+j*I%oHlR@M_^0$UT{y^%^)SRGq zRGOx3O*l${BQrG)P9J5H#B@1hJ?@N-D?iatQY=4~@qDyh zs`<$KkEt4}#daAMY25Q6#Kg?kY4(jfD;aJB!^8};5vMIDuMcHg_Fm;JTlt~;blU}K z?{&8eJCCEcaUt9z+eLmV^gG4Rbx}L7-Ayn%B`!`QJEiu%^t*VgXw+_*c_wDJ99=fD zTS28oTnAffer2165M5>!UHNbRU4K@zo7*YBzlAFlN63D9CyG zh;G^pedvf-uX+gLa#N+}n8b{F{kYs{W&OmpEm!#|iBoljNlgX|`3e1R?Z3zG(OT3S z2q(iI_6a;YLl3bFGa9}(y0x$1NIe7hy7PyH;Xse6#+7e=&pTUDSC=#D*=-kL)fXfk z_Xj#29lm?~KDtzoU2PZlJKJ7pQ#S1=TK~SB$K0EctI6sC5h%|!IQ^7*)&OI0h1 zsOXoX{+zV6%4aw6?K5xLb=Vkx&z-V8mehF1kKBtvZQnR8Ivq#yA5fnaH{U2$D~`}h zIV+r$7g2w$rtNk1`kjo3f9<@@>NSm5`KAHLMTY8`gB%>w>W<52FS&~MPX~ief7KY0 ziVoGx&JGJc-@u*}`*D#^l)A&gcSCWhOZ9AymjPdG5YoC}nVRJ~WI92N$$V`z&7lXf zL~U1}g&k3i!k&42H`EweJNsFR{32Z(D$>VHGpH>I&#etR%(f(38z-JNK!Zu@Uka(2 zK(B!lCV~ULdfq}O@uK9N&`Xu;nv#*df^$=trk@u}4>mPgWZk0?wtOtwoE?WxP^Jdq z1ssXuELX0}Mtu^F=H-pjR7|stw+xTIP`ITgPmuBTnVs2h9qLfsjT6-zo9Cf;rG}iA z^o@_@;JZ6LBpd3Il)|?rkSv<4gL4VVD>mT26_Y64BWsmh`UHdV7 zSF+@R3xDr|mdeSsR~%(U40I0g8;1%_nLh%z)FsrY=I+!KDkvnG?WiYDw?~M$7LZ*8 zuHRj{x@hvCNc-G*eUD+bbk*>J&9`%gfC=X2t8At;$u(VT;c-*V zr?8c$WP({akA_(}UHlIoIhnob&9)WU#9xj@mdmy1_JKIYfR8R04~x-CPWR)(@_q?} z8O~eOZWAFpdpX}(E_XFn<%e+VuThcFLDGJSFDq=;`J9^HvWrb)ksBZMzrM)hYrGqV z@>zrOF7$KIT#39Hu+EULq$TY+8RM1nk^i)LaFIC#8>#*4r=)LBKX^2D<-&JTilgVIV%zb!dwhM)mj0F! z?b-wd43XU8`kAiRxX=26Le`-AM`XIM;6HxViIIC-ub`Qi5(I{@aYxoSlz233N<1?C z!`Ck4XU=d^(@6=(<#65&pQS}yeIjMC#`x0sMx+)~C9~_c_$%Yviw_z+wQ(-r?W6m7 zd@XB)H`OQ*OADRXYs%(1zw}VJFC@ObqFazVLKE4tDCFCS4W#GYGE$y=R(QcD zQcl%k0e`6didAoOOzW6%b!D9`zx|zZ@KC$0W!Y;;I|_zs8GS)|mKUblZ-7i|iz0m} zS!l0UvSjhnP!N=1IN7?saXflsL$2IE+N?!HMXqh-xsyC`)ZC$U;q`?^LgpWRE)|bg zcP<~~oLUUfT~J*$yY3@#hf9 zy7cz#p*L$pMj}sA)4v(+v2^+tUwmykwdAm?XQ(#UL@KCJvV5$VRxU8m-=zx)HYNMCyEOb+g{n}6u~ZoS#1<lz_AHv>M$_g`n zBB>K<@$%)BAE7SnVP80vShuCQ>OvblSrs5pt2e?>??Tm@S#QC@nmxm#r^8T=?2!II zMXp=r$Z$h>qjjZM551U;bl7eVv6|wwR;OPrzU8)RM%o;O!+ne&LEK7^5jP4=pJcc` z#YYtNgg5p}yGMmTi;9#th?Lj4YUB}7C@kq8rATT)vxnn#mOP^n+x#&{T5Ri43RdAJ z%+b5nQQoj9&F#q9`_d_np?dEkc_El=Wwx~KXmxzRJsr_f{KJw3Dfke3#x^{Q1+&f( zR19-!)-fBJj*+N~34_FT^^t_xg!i> zJ9cp!-R&87u`cGrP|Wg?&bm*0Dn8gMF}7AnVx!qFY9l^9HNK)5_9Nrrg~SA3bf|4% z{62*F%#nGM%WDhsUFd3K6Fl2zOGI?j_M}<@0x-Sl$>mI)9g&r?>c2* zOrVcd(u1Ref(U8x;+N|OQ$2?>9+{+Pv}Ejir}@%Z$Q)<*HYJ5yMW)fEV|3H)qSGtr zoXQ$Ajmpw+y6&1UxD-uNJ&)ZUEoN0oIMocN*X^Vy7-xHX#Me9V8Q)Ko(-mA4&xDKS z>QSB!iUhbGcXQa-!>V-f*r@%aAkV%`NK z=ir`Pox{A#9(lYk^Sib4Y3=gQ8wZ~4Y#p%`;ZfBa*wu zxtxaECr&l9#kA6brp#TtvUj9Ht0kkFr!t2}tfAytv9qVYv(qPovb@ig3Ycu9cvTHg zMQ~KWhpd>jR>xJ#(2Q<8P2Nc+kHN7T4_RCer3a2b+k&xUqqJIQh~QllIBf= zY6Z%_vRjKk8dF8utlw09^Wz}%yT=;^UVd3I-=U?328@%mL*@NZ9~<60BeftVrFv6h z&C@MBLXBXtS8t$KuFh-!NUef|ipyAk+P&0xF-Ayz&)HY%4ckFIFN=0K=NqT&yyJn! zkiACMk+j3c@HiJD#**T|0$Y8*1hsQD-U*Wml2>)lw>Q z$1=QHo1V7(6#YeP7K*^@-G3`6xvN?oD;sYU^J*u$-W@9!gCHzJ?gQRhX z9zo9mgks;(NJEx)xqN_=?djF)rGrn-1WmYy?h#5u19*|IXq`D64)+8RDJvE%T0I2{(zi;vYgg~pB9`f-ffIS zxwQ6Tv18{rhEEFz%`253FLh6=kI$Nq7iEY@orTB*l!Y(%JV%wxxrPLM8UKezW|D70 z)=6MA)8;)FPTa5iuycU0@VeQ~>W7u;ne zHm|aM@>G9XFWdfPY$A?PStGFj!uer*?6{@$)Z^UQ2RyxE<7t+>FO;`tOjauNEoS$> z%*wAgE9MG3m!3&coe1`u(C?dZxb3^_ToyJsS3NuC*)eZ(Ui9~wV8qJQFr$Fl*P;OJ z_inVlF#g%wO0yZlkw?qC?DIa;dXoWJ(^&xvnyNv2e37|d7jt};GI$5!9VLAYOWUr! nQO3mPTw-env3-Tu`G?rezx?*va-YTWK;ZIF?s5+c6sY|V5v;Zh literal 0 HcmV?d00001 diff --git a/trunk/research/st-1.9/docs/notes.html b/trunk/research/st-1.9/docs/notes.html new file mode 100644 index 000000000..5a24369e2 --- /dev/null +++ b/trunk/research/st-1.9/docs/notes.html @@ -0,0 +1,434 @@ + + +State Threads Library Programming Notes + + +

Programming Notes

+

+ +

+ +

+


+

+ +

Porting

+The State Threads library uses OS concepts that are available in some +form on most UNIX platforms, making the library very portable across +many flavors of UNIX. However, there are several parts of the library +that rely on platform-specific features. Here is the list of such parts: +

+

    +
  • Thread context initialization: Two ingredients of the +jmp_buf +data structure (the program counter and the stack pointer) have to be +manually set in the thread creation routine. The jmp_buf data +structure is defined in the setjmp.h header file and differs from +platform to platform. Usually the program counter is a structure member +with PC in the name and the stack pointer is a structure member +with SP in the name. One can also look in the +Netscape's NSPR library source +which already has this code for many UNIX-like platforms +(mozilla/nsprpub/pr/include/md/*.h files). +

    +Note that on some BSD-derived platforms _setjmp(3)/_longjmp(3) +calls should be used instead of setjmp(3)/longjmp(3) (that is +the calls that manipulate only the stack and registers and do not +save and restore the process's signal mask).

  • +

    +Starting with glibc 2.4 on Linux the opacity of the jmp_buf data +structure is enforced by setjmp(3)/longjmp(3) so the +jmp_buf ingredients cannot be accessed directly anymore (unless +special environmental variable LD_POINTER_GUARD is set before application +execution). To avoid dependency on custom environment, the State Threads +library provides setjmp/longjmp replacement functions for +all Intel CPU architectures. Other CPU architectures can also be easily +supported (the setjmp/longjmp source code is widely available for +many CPU architectures). +

    +

  • High resolution time function: Some platforms (IRIX, Solaris) +provide a high resolution time function based on the free running hardware +counter. This function returns the time counted since some arbitrary +moment in the past (usually machine power up time). It is not correlated in +any way to the time of day, and thus is not subject to resetting, +drifting, etc. This type of time is ideal for tasks where cheap, accurate +interval timing is required. If such a function is not available on a +particular platform, the gettimeofday(3) function can be used +(though on some platforms it involves a system call). +

    +

  • The stack growth direction: The library needs to know whether the +stack grows toward lower (down) or higher (up) memory addresses. +One can write a simple test program that detects the stack growth direction +on a particular platform.
  • +

    +

  • Non-blocking attribute inheritance: On some platforms (e.g. IRIX) +the socket created as a result of the accept(2) call inherits the +non-blocking attribute of the listening socket. One needs to consult the manual +pages or write a simple test program to see if this applies to a specific +platform.
  • +

    +

  • Anonymous memory mapping: The library allocates memory segments +for thread stacks by doing anonymous memory mapping (mmap(2)). This +mapping is somewhat different on SVR4 and BSD4.3 derived platforms. +

    +The memory mapping can be avoided altogether by using malloc(3) for +stack allocation. In this case the MALLOC_STACK macro should be +defined.

  • +
+

+All machine-dependent feature test macros should be defined in the +md.h header file. The assembly code for setjmp/longjmp +replacement functions for all CPU architectures should be placed in +the md.S file. +

+The current version of the library is ported to: +

    +
  • IRIX 6.x (both 32 and 64 bit)
  • +
  • Linux (kernel 2.x and glibc 2.x) on x86, Alpha, MIPS and MIPSEL, + SPARC, ARM, PowerPC, 68k, HPPA, S390, IA-64, and Opteron (AMD-64)
  • +
  • Solaris 2.x (SunOS 5.x) on x86, AMD64, SPARC, and SPARC-64
  • +
  • AIX 4.x
  • +
  • HP-UX 11 (both 32 and 64 bit)
  • +
  • Tru64/OSF1
  • +
  • FreeBSD on x86, AMD64, and Alpha
  • +
  • OpenBSD on x86, AMD64, Alpha, and SPARC
  • +
  • NetBSD on x86, Alpha, SPARC, and VAX
  • +
  • MacOS X (Darwin) on PowerPC (32 bit) and Intel (both 32 and 64 bit) [universal]
  • +
  • Cygwin
  • +
+

+ + +

Signals

+Signal handling in an application using State Threads should be treated the +same way as in a classical UNIX process application. There is no such +thing as per-thread signal mask, all threads share the same signal handlers, +and only asynchronous-safe functions can be used in signal handlers. +However, there is a way to process signals synchronously by converting a +signal event to an I/O event: a signal catching function does a write to +a pipe which will be processed synchronously by a dedicated signal handling +thread. The following code demonstrates this technique (error handling is +omitted for clarity): +
+
+/* Per-process pipe which is used as a signal queue. */
+/* Up to PIPE_BUF/sizeof(int) signals can be queued up. */
+int sig_pipe[2];
+
+/* Signal catching function. */
+/* Converts signal event to I/O event. */
+void sig_catcher(int signo)
+{
+  int err;
+
+  /* Save errno to restore it after the write() */
+  err = errno;
+  /* write() is reentrant/async-safe */
+  write(sig_pipe[1], &signo, sizeof(int));
+  errno = err;
+}
+
+/* Signal processing function. */
+/* This is the "main" function of the signal processing thread. */
+void *sig_process(void *arg)
+{
+  st_netfd_t nfd;
+  int signo;
+
+  nfd = st_netfd_open(sig_pipe[0]);
+
+  for ( ; ; ) {
+    /* Read the next signal from the pipe */
+    st_read(nfd, &signo, sizeof(int), ST_UTIME_NO_TIMEOUT);
+
+    /* Process signal synchronously */
+    switch (signo) {
+    case SIGHUP:
+      /* do something here - reread config files, etc. */
+      break;
+    case SIGTERM:
+      /* do something here - cleanup, etc. */
+      break;
+      /*      .
+              .
+         Other signals
+              .
+              .
+      */
+    }
+  }
+
+  return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+  struct sigaction sa;
+        .
+        .
+        .
+
+  /* Create signal pipe */
+  pipe(sig_pipe);
+
+  /* Create signal processing thread */
+  st_thread_create(sig_process, NULL, 0, 0);
+
+  /* Install sig_catcher() as a signal handler */
+  sa.sa_handler = sig_catcher;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sigaction(SIGHUP, &sa, NULL);
+
+  sa.sa_handler = sig_catcher;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sigaction(SIGTERM, &sa, NULL);
+
+        .
+        .
+        .
+      
+}
+
+
+

+Note that if multiple processes are used (see below), the signal pipe should +be initialized after the fork(2) call so that each process has its +own private pipe. +

+ + +

Intra-Process Synchronization

+Due to the event-driven nature of the library scheduler, the thread context +switch (process state change) can only happen in a well-known set of +library functions. This set includes functions in which a thread may +"block": I/O functions (st_read(), st_write(), etc.), +sleep functions (st_sleep(), etc.), and thread synchronization +functions (st_thread_join(), st_cond_wait(), etc.). As a result, +process-specific global data need not to be protected by locks since a thread +cannot be rescheduled while in a critical section (and only one thread at a +time can access the same memory location). By the same token, +non thread-safe functions (in a traditional sense) can be safely used with +the State Threads. The library's mutex facilities are practically useless +for a correctly written application (no blocking functions in critical +section) and are provided mostly for completeness. This absence of locking +greatly simplifies an application design and provides a foundation for +scalability. +

+ + +

Inter-Process Synchronization

+The State Threads library makes it possible to multiplex a large number +of simultaneous connections onto a much smaller number of separate +processes, where each process uses a many-to-one user-level threading +implementation (N of M:1 mappings rather than one M:N +mapping used in native threading libraries on some platforms). This design +is key to the application's scalability. One can think about it as if a +set of all threads is partitioned into separate groups (processes) where +each group has a separate pool of resources (virtual address space, file +descriptors, etc.). An application designer has full control of how many +groups (processes) an application creates and what resources, if any, +are shared among different groups via standard UNIX inter-process +communication (IPC) facilities.

+There are several reasons for creating multiple processes: +

+

    +
  • To take advantage of multiple hardware entities (CPUs, disks, etc.) +available in the system (hardware parallelism).
  • +

    +

  • To reduce risk of losing a large number of user connections when one of +the processes crashes. For example, if C user connections (threads) +are multiplexed onto P processes and one of the processes crashes, +only a fraction (C/P) of all connections will be lost.
  • +

    +

  • To overcome per-process resource limitations imposed by the OS. For +example, if select(2) is used for event polling, the number of +simultaneous connections (threads) per process is +limited by the FD_SETSIZE parameter (see select(2)). +If FD_SETSIZE is equal to 1024 and each connection needs one file +descriptor, then an application should create 10 processes to support 10,000 +simultaneous connections.
  • +
+

+Ideally all user sessions are completely independent, so there is no need for +inter-process communication. It is always better to have several separate +smaller process-specific resources (e.g., data caches) than to have one large +resource shared (and modified) by all processes. Sometimes, however, there +is a need to share a common resource among different processes. In that case, +standard UNIX IPC facilities can be used. In addition to that, there is a way +to synchronize different processes so that only the thread accessing the +shared resource will be suspended (but not the entire process) if that resource +is unavailable. In the following code fragment a pipe is used as a counting +semaphore for inter-process synchronization: +

+#ifndef PIPE_BUF
+#define PIPE_BUF 512  /* POSIX */
+#endif
+
+/* Semaphore data structure */
+typedef struct ipc_sem {
+  st_netfd_t rdfd;  /* read descriptor */
+  st_netfd_t wrfd;  /* write descriptor */
+} ipc_sem_t;
+
+/* Create and initialize the semaphore. Should be called before fork(2). */
+/* 'value' must be less than PIPE_BUF. */
+/* If 'value' is 1, the semaphore works as mutex. */
+ipc_sem_t *ipc_sem_create(int value)
+{
+  ipc_sem_t *sem;
+  int p[2];
+  char b[PIPE_BUF];
+
+  /* Error checking is omitted for clarity */
+  sem = malloc(sizeof(ipc_sem_t));
+
+  /* Create the pipe */
+  pipe(p);
+  sem->rdfd = st_netfd_open(p[0]);
+  sem->wrfd = st_netfd_open(p[1]);
+
+  /* Initialize the semaphore: put 'value' bytes into the pipe */
+  write(p[1], b, value);
+
+  return sem;
+}
+
+/* Try to decrement the "value" of the semaphore. */
+/* If "value" is 0, the calling thread blocks on the semaphore. */
+int ipc_sem_wait(ipc_sem_t *sem)
+{
+  char c;
+
+  /* Read one byte from the pipe */
+  if (st_read(sem->rdfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1)
+    return -1;
+
+  return 0;
+}
+
+/* Increment the "value" of the semaphore. */
+int ipc_sem_post(ipc_sem_t *sem)
+{
+  char c;
+
+  if (st_write(sem->wrfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1)
+    return -1;
+
+  return 0;
+}
+
+
+

+ +Generally, the following steps should be followed when writing an application +using the State Threads library: +

+

    +
  1. Initialize the library (st_init()).
  2. +

    +

  3. Create resources that will be shared among different processes: + create and bind listening sockets, create shared memory segments, IPC + channels, synchronization primitives, etc.
  4. +

    +

  5. Create several processes (fork(2)). The parent process should + either exit or become a "watchdog" (e.g., it starts a new process when + an existing one crashes, does a cleanup upon application termination, + etc.).
  6. +

    +

  7. In each child process create a pool of threads + (st_thread_create()) to handle user connections.
  8. +
+

+ + +

Non-Network I/O

+ +The State Threads architecture uses non-blocking I/O on +st_netfd_t objects for concurrent processing of multiple user +connections. This architecture has a drawback: the entire process and +all its threads may block for the duration of a disk or other +non-network I/O operation, whether through State Threads I/O functions, +direct system calls, or standard I/O functions. (This is applicable +mostly to disk reads; disk writes are usually performed +asynchronously -- data goes to the buffer cache to be written to disk +later.) Fortunately, disk I/O (unlike network I/O) usually takes a +finite and predictable amount of time, but this may not be true for +special devices or user input devices (including stdin). Nevertheless, +such I/O reduces throughput of the system and increases response times. +There are several ways to design an application to overcome this +drawback: + +

+

+

+ + +

Timeouts

+ +The timeout parameter to st_cond_timedwait() and the +I/O functions, and the arguments to st_sleep() and +st_usleep() specify a maximum time to wait since the last +context switch not since the beginning of the function call. + +

The State Threads' time resolution is actually the time interval +between context switches. That time interval may be large in some +situations, for example, when a single thread does a lot of work +continuously. Note that a steady, uninterrupted stream of network I/O +qualifies for this description; a context switch occurs only when a +thread blocks. + +

If a specified I/O timeout is less than the time interval between +context switches the function may return with a timeout error before +that amount of time has elapsed since the beginning of the function +call. For example, if eight milliseconds have passed since the last +context switch and an I/O function with a timeout of 10 milliseconds +blocks, causing a switch, the call may return with a timeout error as +little as two milliseconds after it was called. (On Linux, +select()'s timeout is an upper bound on the amount of +time elapsed before select returns.) Similarly, if 12 ms have passed +already, the function may return immediately. + +

In almost all cases I/O timeouts should be used only for detecting a +broken network connection or for preventing a peer from holding an idle +connection for too long. Therefore for most applications realistic I/O +timeouts should be on the order of seconds. Furthermore, there's +probably no point in retrying operations that time out. Rather than +retrying simply use a larger timeout in the first place. + +

The largest valid timeout value is platform-dependent and may be +significantly less than INT_MAX seconds for select() +or INT_MAX milliseconds for poll(). Generally, you +should not use timeouts exceeding several hours. Use +ST_UTIME_NO_TIMEOUT (-1) as a special value to +indicate infinite timeout or indefinite sleep. Use +ST_UTIME_NO_WAIT (0) to indicate no waiting at all. + +

+


+

+ + + diff --git a/trunk/research/st-1.9/docs/reference.html b/trunk/research/st-1.9/docs/reference.html new file mode 100644 index 000000000..3c9c7bd78 --- /dev/null +++ b/trunk/research/st-1.9/docs/reference.html @@ -0,0 +1,3120 @@ + + +State Threads Library Reference + + + +

State Threads Library Reference

+ +
+
Types
+
st_thread_t
+
st_cond_t
+
st_mutex_t
+
st_utime_t
+
st_netfd_t
+
st_switch_cb_t
+

+

Error Handling
+

+

Library Initialization
+

+

st_init()
+
st_getfdlimit()
+
st_set_eventsys()
+
st_get_eventsys()
+
st_get_eventsys_name()
+
st_set_utime_function()
+
st_timecache_set()
+
st_randomize_stacks()
+

+

st_switch_cb_t type
+
st_set_switch_in_cb()
+
st_set_switch_out_cb()
+

+

Thread Control and Identification
+

+

st_thread_t type
+
st_thread_create()
+
st_thread_exit()
+
st_thread_join()
+
st_thread_self()
+
st_thread_interrupt()
+
st_sleep()
+
st_usleep()
+
st_randomize_stacks()
+

+

Per-Thread Private Data
+

+

st_key_create()
+
st_key_getlimit()
+
st_thread_setspecific()
+
st_thread_getspecific()
+

+

Synchronization
+

+

st_cond_t type
+
st_cond_new()
+
st_cond_destroy()
+
st_cond_wait()
+
st_cond_timedwait()
+
st_cond_signal()
+
st_cond_broadcast()
+

+

st_mutex_t type
+
st_mutex_new()
+
st_mutex_destroy()
+
st_mutex_lock()
+
st_mutex_trylock()
+
st_mutex_unlock()
+

+

Timing
+

+

st_utime_t type
+
st_utime()
+
st_set_utime_function()
+
st_timecache_set()
+
st_time()
+

+

I/O Functions
+

+

st_netfd_t type
+
st_netfd_open()
+
st_netfd_open_socket()
+
st_netfd_free()
+
st_netfd_close()
+
st_netfd_fileno()
+
st_netfd_setspecific()
+
st_netfd_getspecific()
+
st_netfd_serialize_accept()
+
+
st_netfd_poll()
+

+

st_accept()
+
st_connect()
+
st_read()
+
st_read_fully()
+
st_read_resid()
+
st_readv()
+
st_readv_resid()
+
st_write()
+
st_write_resid()
+
st_writev()
+
st_writev_resid()
+
st_recvfrom()
+
st_sendto()
+
st_recvmsg()
+
st_sendmsg()
+

+

st_open()
+
st_poll()
+

+

Program Structure
+

+

List of Blocking Functions
+

+

+

+


+

+ + + +

Types

+ +The State Thread library defines the following types in the st.h +header file: +

+

+
st_thread_t
+
st_cond_t
+
st_mutex_t
+
st_utime_t
+
st_netfd_t
+
+

+


+

+ + +

st_thread_t

+ +Thread type. +

+

Syntax
+ +
+#include <st.h>
+
+typedef void *  st_thread_t;
+
+

+

Description
+ +A thread is represented and identified by a pointer to an opaque data +structure. This pointer is a required parameter for most of the functions +that operate on threads. +

+The thread identifier remains valid until the thread returns from its root +function and, if the thread was created joinable, is joined. +

+


+

+ + +

st_cond_t

+ +Condition variable type. +

+

Syntax
+ +
+#include <st.h>
+
+typedef void *  st_cond_t;
+
+

+

Description
+ +A condition variable is an opaque object identified by a pointer. +Condition variables provide synchronization primitives to wait for or wake +up threads waiting for certain conditions to be satisfied. +

+In the State Threads library there is no need to lock a mutex before +waiting on a condition variable. +

+


+

+ + +

st_mutex_t

+ +Mutex type. +

+

Syntax
+ +
+#include <st.h>
+
+typedef void *  st_mutex_t;
+
+

+

Description
+ +A mutex is an opaque object identified by a pointer. +Mutual exclusion locks (mutexes) are used to serialize the execution of +threads through critical sections of code. +

+If application using the State Threads library is written with no +I/O or control yielding in critical sections (that is no +blocking functions in critical sections), then there is +no need for mutexes.

+These mutexes can only be used for intra-process thread synchronization. +They cannot be used for inter-process synchronization. +

+


+

+ + +

st_utime_t

+ +High resolution time type ("u" stands for "micro"). +

+

Syntax
+ +
+#include <st.h>
+
+typedef unsigned long long  st_utime_t;
+
+

+

Description
+ +This datatype (unsigned 64-bit integer) represents high-resolution real time +expressed in microseconds since some arbitrary time in the past. It is not +correlated in any way to the time of day. +

+


+

+ + +

st_netfd_t

+ +File descriptor type. +

+

Syntax
+ +
+#include <st.h>
+
+typedef void *  st_netfd_t;
+
+

+

Description
+ +This datatype typically represents any open end point of network +communication (socket, end point of a pipe, FIFO, etc.) but can +encapsulate any open file descriptor. Objects of this type are +identified by a pointer to an opaque data structure. + +

+


+

+ + +

st_switch_cb_t

+ +Context switch callback function type. +

+

Syntax
+ +
+#include <st.h>
+
+typedef void (*st_switch_cb_t)(void);
+
+

+

Description
+ +This datatype is a convenience type for describing a pointer +to a function that will be called when a thread is set to stop +or set to run. +This feature is available only when ST_SWITCH_CB is defined +in <st.h>. + +

+


+

+ + +

Error Handling

+ + +All State Threads library non-void functions return on success either a +non-negative integer or a pointer to a newly created object (constructor-type +functions). On failure they return either -1 or a NULL +pointer respectively and set global errno to indicate the error. +It is safe to use errno because it is set right before the function +return and only one thread at a time can modify its value.

+The perror(3) function can be used to produce an error message on the +standard error output. +

+


+

+ + +

Library Initialization

+ +

+

+
st_init()
+
st_getfdlimit()
+
st_set_eventsys()
+
st_get_eventsys()
+
st_get_eventsys_name()
+

+These functions operate on a callback function of type +st_switch_cb_t: +

st_set_switch_in_cb()
+
st_set_switch_out_cb()
+
+

+


+

+ + +

st_init()

+ +Initializes the runtime. +

+

Syntax
+ +
+#include <st.h>
+
+int st_init(void);
+
+

+

Parameters
+None. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error. +

+

Description
+This function initializes the library runtime. It should be called near +the beginning of the application's main() function before any other +State Threads library function is called.

+Among other things, this function limits the number of open file descriptors +to the OS imposed per-process maximum number or, if select(2) is +used, to FD_SETSIZE, whichever is less (getrlimit(2)). +This limit can be +retrieved by st_getfdlimit(). It also sets the +disposition of the SIGPIPE signal to SIG_IGN (to be ignored) +(signal(5)). +

+Unlike POSIX threads, a new process created by the fork(2) system +call is an exact copy of the calling process and all state threads +which are running in the parent do exist in the child. That means that +st_init() may be called either before or after multiple processes +are created by fork(2). +

+If the library runtime is not properly initialized (e.g., st_init() +is accidentally omitted), then the process will receive either an arithmetic +exception (SIGFPE or SIGTRAP) or segmentation fault (SIGSEGV) signal upon +new thread creation or the first context switch, respectively. +

+


+

+ +

st_getfdlimit()

+ +Returns the maximum number of file descriptors that the calling process +can open. +

+

Syntax
+ +
+#include <st.h>
+
+int st_getfdlimit(void);
+
+

+

Parameters
+None. +

+

Returns
+The maximum number of file descriptors that the calling process can open. +If this function is called before the library is successfully initialized by +st_init(), a value of -1 is returned. +

+

Description
+This function returns the limit on the number of open file descriptors which +is set by the st_init() function. +

+


+

+ + +

st_set_eventsys()

+ +Sets event notification mechanism. +

+

Syntax
+ +
+#include <st.h>
+
+int st_set_eventsys(int eventsys);
+
+

+

Parameters
+st_set_eventsys() has the following parameter:

+eventsys

+An integer value identifying selected event notification mechanism. The +following values are defined in the st.h header file: +

+ + + + + + + + + + + + + + + +
ST_EVENTSYS_DEFAULTUse default event notification mechanism. Usually it's select(2) +but if the library was compiled with the USE_POLL macro defined +then the default is poll(2).
ST_EVENTSYS_SELECTUse select(2) as an event notification mechanism.
ST_EVENTSYS_POLLUse poll(2) as an event notification mechanism.
ST_EVENTSYS_ALTUse an alternative event notification mechanism. The actual +mechanism selected depends on OS support. For example, epoll(4) +will be used on Linux if supported and kqueue(2) will be used +on FreeBSD/OpenBSD. If the OS supports no alternative event +notification mechanism, setting ST_EVENTSYS_ALT has no effect +and the ST_EVENTSYS_DEFAULT mechanism will be used.
+

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + + + +
EINVAL +The supplied eventsys parameter has an invalid value. +
EBUSY +The event notification mechanism has already been set. +
+

+

Description
+This function sets the event notification mechanism that will be used by +the State Threads library. To have any effect, it must be called +before the st_init() function which performs +the actual initialization. If st_set_eventsys() is not called, +st_init() will set the ST_EVENTSYS_DEFAULT +mechanism. The mechanism cannot be changed once set. +

+There are no strict rules for selecting an event notification +mechanism. The "best" one depends on how your application behaves. +Try a few to see which one works best for you. As a rule of +thumb, you should use the ST_EVENTSYS_ALT mechanism if your +application deals with a very large number of network connections of +which only a few are active at once. +

+


+

+ +

st_get_eventsys()

+ +Returns the integer value identifying the event notification mechanism +being used by the State Threads library. +

+

Syntax
+ +
+#include <st.h>
+
+int st_get_eventsys(void);
+
+

+

Parameters
+None. +

+

Returns
+The integer value identifying the current event notification mechanism. +This value can be one of the following (see st_set_eventsys()): +ST_EVENTSYS_SELECT, ST_EVENTSYS_POLL, or +ST_EVENTSYS_ALT. Future versions of the library may return other +values. If a mechanism hasn't been set yet, a value of -1 is returned. +

+

Description
+This function returns the integer value identifying the event notification +mechanism which is actually being used by the State Threads library. +

+


+

+ +

st_get_eventsys_name()

+ +Returns the name of the event notification mechanism being used by the +State Threads library. +

+

Syntax
+ +
+#include <st.h>
+
+const char *st_get_eventsys_name(void);
+
+

+

Parameters
+None. +

+

Returns
+The string identifying the current event notification mechanism. If a +mechanism hasn't been set yet (see st_set_eventsys()), an empty string is +returned. Possible return values are "select", +"poll", "kqueue", or "epoll". Future versions +of the library may return other values. +

+

Description
+This function returns the string identifying the event notification +mechanism which is actually being used by the State Threads library. +

+


+

+ + +

st_set_switch_in_cb()

+ + +

st_set_switch_out_cb()

+
+Set the optional callback function for thread switches. +

+

Syntax
+ +
+#include <st.h>
+
+st_switch_cb_t st_set_switch_in_cb(st_switch_cb_t cb);
+st_switch_cb_t st_set_switch_out_cb(st_switch_cb_t cb);
+
+

+

Parameters
+st_set_switch_in_cb() and st_set_switch_out_cb() have the +following parameter:

+cb

+A function to be called when a thread is resumed and stopped respectively.

+

Returns
+The previous callback function pointer. +

+

Description
+These functions set the callback for when a thread is resumed and stopped +respectively. After being called any thread switch will call the callback. +Use a NULL pointer to disable the callback (this is the default). +Use st_thread_self() or thread +specific data to differentiate between threads.

+These functions can be called at any time.

+This feature is available only when ST_SWITCH_CB is defined +in <st.h>. +

+


+

+ + +

Thread Control and Identification

+ +

+These functions operate on a thread object of type +st_thread_t. +

+

+
st_thread_create()
+
st_thread_exit()
+
st_thread_join()
+
st_thread_self()
+
st_thread_interrupt()
+
st_sleep()
+
st_usleep()
+
st_randomize_stacks()
+
+

+


+

+ +

st_thread_create()

+ +Creates a new thread. +

+

Syntax
+ +
+#include <st.h>
+
+st_thread_t st_thread_create(void *(*start)(void *arg), void *arg,
+                             int joinable, int stack_size);
+
+
+

+

Parameters
+st_thread_create() has the following parameters:

+start

+A pointer to the thread's start function, which is called as the root of the +new thread. Return from this function terminates a thread.

+arg

+A pointer to the root function's only parameter.

+joinable

+Specifies whether the thread is joinable or unjoinable. If this parameter +is zero, the thread is unjoinable. Otherwise, it is joinable. +See also st_thread_join().

+stack_size

+Specifies your preference for the size of the stack, in bytes, associated +with the newly created thread. If you pass zero in this parameter, the +default stack size will be used. The default stack size is 128 KB on IA-64 +and 64 KB on all other platforms. On IA-64 only a half of stack_size +bytes is used for the memory stack. The other half is used for the register +stack backing store. +

+

Returns
+Upon successful completion, a new thread identifier is returned (this +identifier remains valid until the thread returns from its start function). +Otherwise, NULL is returned and errno is set +to indicate the error. +

+

Description
+This function creates a new thread. Note that the total number of threads +created by the application is limited by the amount of swap space available. +Upon thread creation, stack_size bytes are reserved on the swap +space. The stack pages are not actually used (valid) until touched by the +application. +

+


+

+ +

st_thread_exit()

+ +Terminates the calling thread. +

+

Syntax
+ +
+#include <st.h>
+
+void st_thread_exit(void *retval);
+
+

+

Parameters
+st_thread_exit() has the following parameters:

+retval

+If the thread is joinable, then the value retval may be retrieved +by st_thread_join(). If a thread returns from its +start function, it acts as if it had called st_thread_exit() with +retval as the value returned. +

+

Returns
+Nothing. +

+

Description
+This function terminates the calling thread. When a thread exits, per-thread +private data is destroyed by invoking the destructor function for any +non-NULL thread specific values associated with active keys (see +st_key_create()). This function is implicitly called +when a thread returns from its start function.

+When the last thread terminates the process exits with a zero status value. +

+


+

+ +

st_thread_join()

+ +Blocks the calling thread until a specified thread terminates. +

+

Syntax
+ +
+#include <st.h>
+
+int st_thread_join(st_thread_t thread, void **retvalp);
+
+

+

Parameters
+st_thread_join() has the following parameters:

+thread

+A valid identifier for the thread that is to be joined.

+retvalp

+If this parameter is not NULL, then the exit value of the +thread will be placed in the location referenced by this parameter +(see st_thread_exit()). +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + + + +
EINVALTarget thread is unjoinable.
EINVALOther thread already waits on the same +joinable thread.
EDEADLKTarget thread is the same as the +calling thread.
EINTRCurrent thread was interrupted by +st_thread_interrupt().
+

+

Description
+This function is used to synchronize the termination of a thread and possibly +retrieve its exit value. Several threads cannot wait for the same thread +to complete - one of the calling threads operates successfully, and the others +terminate with the error. The calling thread is not blocked if the target +thread has already terminated. +

+


+

+ +

st_thread_self()

+ +Identifies the calling thread. +

+

Syntax
+ +
+#include <st.h>
+
+st_thread_t st_thread_self(void);
+
+

+

Parameters
+None. +

+

Returns
+Always returns a valid reference to the calling thread - a self-identity. +

+

Description
+This function identifies the calling thread. This is the same identifier +that the creating thread obtains from +st_thread_create(). +

+


+

+ +

st_thread_interrupt()

+ +Interrupts a target thread. +

+

Syntax
+ +
+#include <st.h>
+
+void st_thread_interrupt(st_thread_t thread);
+
+

+

Parameters
+st_thread_interrupt() has the following parameters:

+thread

+A valid identifier for the thread being interrupted. +

+

Returns
+Nothing. +

+

Description
+This function interrupts (unblocks) a target thread that is blocked in one +of the blocking functions. A function that was interrupted +returns an error and sets errno to EINTR. It is up to +the target thread to act upon an interrupt (e.g., it may exit or just +abort the current transaction).

+Note: State Threads library functions are never interrupted by a +caught signal. A blocking library function returns an error and sets +errno to EINTR only if the current thread was +interrupted via st_thread_interrupt(). +

+If a target thread is already runnable or running (e.g., it is a newly +created thread or calling thread itself), this function will prevent it +from subsequent blocking. In other words, the interrupt will be "delivered" +only when a target thread is about to block. +

+


+

+ +

st_sleep(), st_usleep()

+ +Suspends current thread for a specified amount of time. +

+

Syntax
+ +
+#include <st.h>
+
+int st_sleep(int secs);
+
+int st_usleep(st_utime_t usecs);
+
+

+

Parameters
+st_sleep() has the following parameters:

+secs

+The number of seconds you want the thread to sleep for. +

+st_usleep() has the following parameters:

+usecs

+The number of microseconds you want the thread to sleep for. This parameter +is a variable of type st_utime_t. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
+

+

Description
+These functions suspend the calling thread from execution for a specified +number of seconds (st_sleep()) or microseconds (st_usleep()). +

+ +If zero is passed as a parameter to st_sleep(), or +ST_UTIME_NO_WAIT (0) is passed to +st_usleep(), the calling thread yields, thus potentially +allowing another thread to run. + +

+ +If -1 is passed as a parameter to st_sleep(), or +ST_UTIME_NO_TIMEOUT (-1) is passed to +st_usleep(), the calling thread will be suspended permanently. +It can be resumed again by interrupting it via st_thread_interrupt(). + +

+


+

+ +

st_randomize_stacks()

+ +Turns stack base address randomization on or off. +

+

Syntax
+ +
+#include <st.h>
+
+int st_randomize_stacks(int on);
+
+

+

Parameters
+st_randomize_stacks() has the following parameters:

+on

+If this parameter has a non-zero value, the State Threads library +randomizes the base addresses of stacks allocated for threads created +after this call. Otherwise new threads' stacks are typically page +aligned. +

+

Returns
+The previous state of stack randomization (a value of 0 if it +was off and a non-zero value otherwise). +

+

Description
+Randomizing state threads' stack bases may improve cache performance on +some systems when large numbers of state threads all perform roughly the +same work, as when they all start from the same root function. On many +modern systems the performance increase is negligible. You should +compare your application's performance with this feature on and off to +see if you really need it. +

+When randomization is enabled, new stacks are allocated one page larger +to accomodate the randomization. +

+This call affects only threads created afterward. It has no effect on +existing threads. +

+


+

+ + +

Per-Thread Private Data

+ +These functions allow to associate private data with each of the threads in +a process. +

+

+
st_key_create()
+
st_key_getlimit()
+
st_thread_setspecific()
+
st_thread_getspecific()
+
+

+


+

+ +

st_key_create()

+ +Creates a key (non-negative integer) that can be used by all +threads in the process to get and set thread-specific data. +

+

Syntax
+ +
+#include <st.h>
+
+int st_key_create(int *keyp, void (*destructor)(void *));
+
+

+

Parameters
+st_key_create() has the following parameters:

+keyp

+The newly created key is returned in the memory pointed to by this parameter. +The new key can be used with +st_thread_setspecific() and +st_thread_getspecific().

+destructor

+Specifies an optional destructor function for the private data associated +with the key. This function can be specified as NULL. +Upon thread exit (see st_thread_exit()), if a key +has a non-NULL destructor and has a non-NULL value +associated with that key, then the destructor function will be +called with the associated value. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EAGAINThe limit on the total number of keys per +process has been exceeded (see st_key_getlimit()). +
+

+

Description
+If this function is successful, every thread in the same process is capable +of associating private data with the new key. After a new key is created, all +active threads have the value NULL associated with that key. +After a new thread is created, the value NULL is associated with +all keys for that thread. If a non-NULL destructor function is +registered with a new key, it will be called at one of two times, as long as +the private data is not NULL: + +

+The key maintains independent data values for each binding thread. A thread +can get access only to its own thread-specific data. There is no way to +deallocate a private data key once it is allocated. +

+


+

+ +

st_key_getlimit()

+ +Returns the key limit. +

+

Syntax
+ +
+#include <st.h>
+
+int st_key_getlimit(void);
+
+

+

Parameters
+None. +

+

Returns
+The limit on the total number of keys per process. +

+

Description
+This function can be used to obtain the limit on the total number of keys +per process (see st_key_create()). +

+


+

+ +

st_thread_setspecific()

+ +Sets per-thread private data. +

+

Syntax
+ +
+#include <st.h>
+
+int st_thread_setspecific(int key, void *value);
+
+

+

Parameters
+st_thread_setspecific() has the following parameters:

+key

+This parameter represents a key with which thread-specific data is associated. +

+value

+The per-thread private data, or more likely, a pointer to the data which is +associated with key. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + +
EINVALThe specified key is invalid.
+

+

Description
+This function associates a thread-specific value with key. +Different threads may bind different values to the same key.

+If the thread already has non-NULL private data associated with +key, and if the destructor function for that key is not +NULL, this destructor function will be called before setting the +new data value. +

+


+

+ +

st_thread_getspecific()

+ +Retrieves the per-thread private data for the current thread. +

+

Syntax
+ +
+#include <st.h>
+
+void *st_thread_getspecific(int key);
+
+

+

Parameters
+st_thread_getspecific() has the following parameters:

+key

+This parameter represents a key with which thread-specific data is associated. +

+

Returns
+The thread-specific data associated with key. If no data is +associated with key, then NULL is returned. +

+

Description
+This function returns the calling thread's value that is bound to the +specified key (see +st_thread_setspecific()). +

+


+

+ + +

Synchronization

+ +

+These functions operate on condition variables +and mutual exclusion locks (mutexes).

+Functions are provided to wait on a condition variable and to wake up +(signal) threads that are waiting on the condition variable. +

+

+
st_cond_new()
+
st_cond_destroy()
+
st_cond_wait()
+
st_cond_timedwait()
+
st_cond_signal()
+
st_cond_broadcast()
+

+

st_mutex_new()
+
st_mutex_destroy()
+
st_mutex_lock()
+
st_mutex_trylock()
+
st_mutex_unlock()
+
+

+


+

+ +

st_cond_new()

+ +Creates a new condition variable. +

+

Syntax
+ +
+#include <st.h>
+
+st_cond_t st_cond_new(void);
+
+

+

Parameters
+None. +

+

Returns
+Upon successful completion, a new condition variable identifier is returned. +Otherwise, NULL is returned and errno is set +to indicate the error. +

+

Description
+This function creates a new condition variable. +

+


+

+ +

st_cond_destroy()

+ +Destroys a condition variable. +

+

Syntax
+ +
+#include <st.h>
+
+int st_cond_destroy(st_cond_t cvar);
+
+

+

Parameters
+st_cond_destroy() has the following parameters:

+cvar

+An identifier of the condition variable object to be destroyed. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + +
EBUSYThe condition variable is currently being +used by one or more threads.
+

+

Description
+This function destroys a condition variable. The caller is responsible for +ensuring that the condition variable is no longer in use. +

+


+

+ +

st_cond_wait()

+ +Waits on a condition. +

+

Syntax
+ +
+#include <st.h>
+
+int st_cond_wait(st_cond_t cvar);
+
+

+

Parameters
+st_cond_wait() has the following parameters:

+cvar

+The condition variable on which to wait. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
+

+

Description
+This function is used to block on a condition variable. A return from this +function does not guarantee that the condition or event for which the caller +was waiting actually occurred. It is the responsibility of the caller +to recheck the condition wait predicate before proceeding.

+Note: The State Threads library scheduling guarantees that the +condition cannot change between the checking and blocking, therefore there +is no need for mutex protection. You must not call any +blocking functions between the condition checking and +the st_cond_wait() call. +

+


+

+ +

st_cond_timedwait()

+ +Waits on a condition. +

+

Syntax
+ +
+#include <st.h>
+
+int st_cond_timedwait(st_cond_t cvar, st_utime_t timeout);
+
+

+

Parameters
+st_cond_timedwait() has the following parameters:

+cvar

+The condition variable on which to wait.

+timeout

+If the number of microseconds specified by this parameter passes before the +waiting thread is signalled, an error is returned. This parameter is a +variable of type st_utime_t. Note that this +time value is a time delta; it is not an absolute time. +Also note that timeouts are measured since +the last context switch. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred before the thread was +awakened by st_cond_signal() or +st_cond_broadcast().
+

+

Description
+This function works the same way as st_cond_wait(), +except that an error is returned if the number of microseconds specified by +timeout passes before the waiting thread is signalled. +

+


+

+ +

st_cond_signal()

+ +Unblocks a thread waiting on a condition variable. +

+

Syntax
+ +
+#include <st.h>
+
+int st_cond_signal(st_cond_t cvar);
+
+

+

Parameters
+st_cond_signal() has the following parameters:

+cvar

+The condition variable to signal. +

+

Returns
+Always zero. +

+

Description
+This function unblocks (signals) one of the threads that are blocked on +cvar at the time of the call. If no thread is waiting on the +condition variable, the signal operation is a no-op. +

+


+

+ +

st_cond_broadcast()

+ +Unblocks all threads waiting on a condition variable. +

+

Syntax
+ +
+#include <st.h>
+
+int st_cond_broadcast(st_cond_t cvar);
+
+

+

Parameters
+st_cond_broadcast() has the following parameters:

+cvar

+The condition variable to broadcast. +

+

Returns
+Always zero. +

+

Description
+This function unblocks all threads blocked on the specified condition +variable at the time of the call. If no threads are waiting, this operation +is a no-op. +

+


+

+ + +

st_mutex_new()

+ +Creates a new mutual exclusion lock (mutex). +

+

Syntax
+ +
+#include <st.h>
+
+st_mutex_t st_mutex_new(void);
+
+

+

Parameters
+None. +

+

Returns
+Upon successful completion, a new mutex identifier is returned. +Otherwise, NULL is returned and errno is set to +indicate the error. +

+

Description
+This function creates a new opaque mutual exclusion lock (see +st_mutex_t). +

+


+

+ +

st_mutex_destroy()

+ +Destroys a specified mutex object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_mutex_destroy(st_mutex_t lock);
+
+

+

Parameters
+st_mutex_destroy() has the following parameters:

+lock

+An identifier of the mutex object to be destroyed. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + +
EBUSYThe mutex is currently being used by +other threads.
+

+

Description
+This function destroys a mutex. The caller is responsible for ensuring +that the mutex is no longer in use. +

+


+

+ +

st_mutex_lock()

+ +Locks a specified mutex object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_mutex_lock(st_mutex_t lock);
+
+

+

Parameters
+st_mutex_lock() has the following parameters:

+lock

+An identifier of the mutex object to be locked. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + + +
EDEADLKThe current thread already owns the mutex. +
EINTRThe current thread was interrupted by +st_thread_interrupt().
+

+

Description
+A thread that calls this function will block until it can gain exclusive +ownership of a mutex, and retains ownership until it calls +st_mutex_unlock(). +

+


+

+ +

st_mutex_trylock()

+ +Attempts to acquire a mutex. +

+

Syntax
+ +
+#include <st.h>
+
+int st_mutex_trylock(st_mutex_t lock);
+
+

+

Parameters
+st_mutex_trylock() has the following parameters:

+lock

+An identifier of the mutex object to be locked. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EBUSYThe mutex is currently held by another +thread.
+

+

Description
+This function attempts to acquire a mutex. If the mutex object is locked +(by any thread, including the current thread), the call returns immediately +with an error. +

+


+

+ +

st_mutex_unlock()

+ +Releases a specified mutex object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_mutex_unlock(st_mutex_t lock);
+
+

+

Parameters
+st_mutex_unlock() has the following parameters:

+lock

+An identifier of the mutex object to be unlocked. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EPERMThe current thread does not own the mutex. +
+

+

Description
+This function releases a specified mutex object previously acquired by +st_mutex_lock() or +st_mutex_trylock(). Only the thread that locked +a mutex should unlock it. +

+


+

+ + +

Timing

+ +

+

+
st_utime()
+
st_set_utime_function()
+
st_timecache_set()
+
st_time()
+
+

+


+

+ +

st_utime()

+ +Returns current high-resolution time. +

+

Syntax
+ +
+#include <st.h>
+
+st_utime_t st_utime(void);
+
+

+

Parameters
+None. +

+

Returns
+Current high-resolution time value of type +st_utime_t. +

+

Description
+This function returns the current high-resolution time. Time is +expressed as microseconds since some arbitrary time in the past. It is +not correlated in any way to the time of day. See also st_utime_t and st_time(). +

+


+

+ +

st_set_utime_function()

+ +Set high-resolution time function. +

+

Syntax
+ +
+#include <st.h>
+
+int st_set_utime_function(st_utime_t (*func)(void));
+
+

+

Parameters
+st_set_utime_function() has the following parameters:

+func

+This function will be called to get high-resolution time instead of the +default st_utime() function. It must return +number of microseconds since some arbitrary time in the past. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to EINVAL to indicate the error. +

+

Description
+This function may be called to replace the default implementation of the +st_utime() function. It must be called before the ST +library has been initialized (see st_init()). +The user-provided function func will be invoked whenever +st_utime() is called to obtain current high-resolution time. +Replacing default implementation may be useful, for example, for taking +advantage of high performance CPU cycle counters. +

+


+

+ +

st_timecache_set()

+ +Turns the time caching on or off. +

+

Syntax
+ +
+#include <st.h>
+
+int st_timecache_set(int on);
+
+

+

Parameters
+st_timecache_set() has the following parameters:

+on

+If this parameter has a non-zero value, the time caching is turned on +(enabled). Otherwise, the time caching is turned off (disabled). +By default time caching is disabled. +

+

Returns
+The previous state of time caching (a value of 0 if it was off and +a value of 1 otherwise). +

+

Description
+The State Threads library has the ability to "cache" the time value that is +reported by the time(2) system call. If the time caching is enabled +by calling this function with a non-zero argument, then the result value +of time(2) will be stored and updated at most once per second. The +cached time can be retrieved by st_time(). +By default time caching is disabled. +You may enable or disable time caching at any time but generally +you enable it once (if desired) during program initialization.

+Note: There are some pathological cases (e.g., very heavy loads during +application benchmarking) when a single thread runs for a long time without +giving up control and the cached time value is not updated properly. If you +always need "real-time" time values, don't enable the time caching. +

+


+

+ +

st_time()

+ +Returns the value of time in seconds since 00:00:00 UTC, January 1, 1970. +

+

Syntax
+ +
+#include <st.h>
+
+time_t st_time(void);
+
+

+

Parameters
+None. +

+

Returns
+The value of time in seconds since 00:00:00 UTC, January 1, 1970 as reported +by the time(2) system call. +

+

Description
+If the time caching was enabled by +st_timecache_set(), then this function returns +the cached result. Otherwise, it just calls time(2). +

+


+

+ + +

I/O Functions

+ +

+Most State Threads library I/O functions look like corresponding C library +functions with two exceptions: +

    +
  • They operate on file descriptor objects of type +st_netfd_t.
  • +
  • They take an additional argument of type +st_utime_t which represents an inactivity +timeout: if no I/O is possible during this amount of time, I/O functions +return an error code and set errno to ETIME. + +The boundary values ST_UTIME_NO_WAIT (0) and +ST_UTIME_NO_TIMEOUT (-1) for this argument indicate +that the thread should wait no time (function returns immediately) or +wait forever (never time out), respectively. + +Note that timeouts are measured since the +last context switch. +
  • +
+

+

+
st_netfd_open()
+
st_netfd_open_socket()
+
st_netfd_free()
+
st_netfd_close()
+
st_netfd_fileno()
+
st_netfd_setspecific()
+
st_netfd_getspecific()
+
st_netfd_serialize_accept()
+
st_netfd_poll()
+

+

st_accept()
+
st_connect()
+
st_read()
+
st_read_fully()
+
st_read_resid()
+
st_readv()
+
st_read_resid()
+
st_write()
+
st_write_resid()
+
st_writev()
+
st_writev_resid()
+
st_recvfrom()
+
st_sendto()
+
st_recvmsg()
+
st_sendmsg()
+
st_open()
+
st_poll()
+
+

+


+

+ +

st_netfd_open()

+ +Creates a new file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+st_netfd_t st_netfd_open(int osfd);
+
+

+

Parameters
+st_netfd_open() has the following parameters:

+osfd

+ +Any open OS file descriptor; can be obtained from calls to +functions including, but not restricted to, pipe(2), socket(3), +socketpair(3), fcntl(2), dup(2), etc. + + +

+

Returns
+Upon successful completion, a new file descriptor object identifier is +returned. Otherwise, NULL is returned and errno is set +to indicate the error. +

+

Description
+This function creates a new file descriptor object of type +st_netfd_t.

+ +Note: Among other things, this function sets a non-blocking +flag on the underlying OS file descriptor. You should not modify this +flag directly. Also, once an st_netfd_t +has been created with a given file descriptor, you should avoid +passing that descriptor to normal I/O or stdio functions. Since the +O_NONBLOCK flag is shared across dup(2), this applies to +dup()'ed file descriptors as well - for instance, if you pass +standard output or standard input to st_netfd_open(), then +you should use st_write() instead of write +or fprintf when writing to standard error as well - since all +three descriptors could point to the same terminal. If necessary, you +can still use write directly if you remember to check +errno for EAGAIN, but fprintf and other +stdio functions should be avoided completely because, at least on +Linux, the stdio library cannot be made to work reliably with +non-blocking files. (This only applies to file descriptors which are +passed to st_netfd_open() or st_netfd_open_socket(), or which are +related to such descriptors through dup(); other file +descriptors are untouched by State Threads.) +

+


+

+ +

st_netfd_open_socket()

+ +Creates a new file descriptor object from a socket. +

+

Syntax
+ +
+#include <st.h>
+
+st_netfd_t st_netfd_open_socket(int osfd);
+
+

+

Parameters
+st_netfd_open_socket() has the following parameters:

+osfd

+An open OS file descriptor which is a socket initially obtained from a +socket(3) or socketpair(3) call. +

+

Returns
+Upon successful completion, a new file descriptor object identifier is +returned. Otherwise, NULL is returned and errno is set +to indicate the error. +

+

Description
+This function creates a new file descriptor object of type +st_netfd_t which represents an open end +point of network communication.

+Unlike the st_netfd_open() function which may be used +on OS file descriptors of any origin, st_netfd_open_socket() must +be used only on sockets. It is slightly more efficient than +st_netfd_open().

+Note: Among other things, this function sets a non-blocking flag +on the underlying OS socket. You should not modify this flag directly. +See st_netfd_open(). +

+


+

+ +

st_netfd_free()

+ +Frees a file descriptor object without closing the underlying OS file +descriptor. +

+

Syntax
+ +
+#include <st.h>
+
+void st_netfd_free(st_netfd_t fd);
+
+

+

Parameters
+st_netfd_free() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t). +

+

Returns
+Nothing. +

+

Description
+This function frees the memory and other resources identified by the +fd parameter without closing the underlying OS file descriptor. +Any non-NULL descriptor-specific data is destroyed by invoking +the specified destructor function (see st_netfd_setspecific()).

A thread should +not free file descriptor objects that are in use by other threads +because it may lead to unpredictable results (e.g., a freed file +descriptor may be reused without other threads knowing that). +

+


+

+ +

st_netfd_close()

+ +Closes a file descriptor. +

+

Syntax
+ +
+#include <st.h>
+
+int st_netfd_close(st_netfd_t fd);
+
+

+

Parameters
+st_netfd_close() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t). +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error. +

+

Description
+This function closes the underlying OS file descriptor, frees the memory and +other resources identified by the fd parameter. Any non-NULL +descriptor-specific data is destroyed by invoking the specified destructor +function (see st_netfd_setspecific()).

+A thread should not close file descriptor objects that are in use by other +threads because it may lead to unpredictable results (e.g., a closed +file descriptor may be reused without other threads knowing that). +

+


+

+ +

st_netfd_fileno()

+ +Returns an underlying OS file descriptor. +

+

Syntax
+ +
+#include <st.h>
+
+int st_netfd_fileno(st_netfd_t fd);
+
+

+

Parameters
+st_netfd_fileno() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t). +

+

Returns
+An underlying OS file descriptor. +

+

Description
+This function returns the integer OS file descriptor associated with the named +file descriptor object. +

+


+

+ +

st_netfd_setspecific()

+ +Sets per-descriptor private data. +

+

Syntax
+ +
+#include <st.h>
+
+void st_netfd_setspecific(st_netfd_t fd, void *value,
+                          void (*destructor)(void *));
+
+

+

Parameters
+st_netfd_setspecific() has the following parameters:

+fd

+A valid file descriptor object identifier (see +st_netfd_t). +

+value

+The per-descriptor private data, or more likely, a pointer to the data which +is being associated with the named file descriptor object. +

+destructor

+Specifies an optional destructor function for the private data associated +with fd. This function can be specified as NULL. +If value is not NULL, then this destructor function will +be called with value as an argument upon freeing the file descriptor +object (see st_netfd_free() and +st_netfd_close()). +

+

Returns
+Nothing. +

+

Description
+This function allows to associate any data with the specified file +descriptor object (network connection). If a non-NULL destructor +function is registered, it will be called at one of two times, as long as +the associated data is not NULL: +
    +
  • when private data is replaced by calling +st_netfd_setspecific() again +
  • upon freeing the file descriptor object (see +st_netfd_free() and +st_netfd_close()) +
+

+


+

+ +

st_netfd_getspecific()

+ +Retrieves the per-descriptor private data. +

+

Syntax
+ +
+#include <st.h>
+
+void *st_netfd_getspecific(st_netfd_t fd);
+
+

+

Parameters
+st_netfd_getspecific() has the following parameters:

+fd

+A valid file descriptor object identifier (see +st_netfd_t). +

+

Returns
+The data associated with the named file descriptor object. If no data is +associated with fd, then NULL is returned. +

+

Description
+This function allows to retrieve the data that was associated with the +specified file descriptor object (see +st_netfd_setspecific()). +

+


+

+ +

st_netfd_serialize_accept()

+ +Serializes all subsequent accept(3) calls on a specified file +descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_netfd_serialize_accept(st_netfd_t fd);
+
+

+

Parameters
+st_netfd_serialize_accept() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) which has been successfully created +from a valid listening socket by st_netfd_open() or +st_netfd_open_socket(). +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error. +

+

Description
+On some platforms (e.g., Solaris 2.5 and possibly other SVR4 implementations) +accept(3) calls from different processes on +the same listening socket (see bind(3), listen(3)) must be +serialized. This function causes all subsequent accept(3) calls +made by st_accept() on the specified file descriptor +object to be serialized. +

+st_netfd_serialize_accept() must be called before +creating multiple server processes via fork(2). If the application +does not create multiple processes to accept network connections on +the same listening socket, there is no need to call this function. +

+Deciding whether or not to serialize accepts is tricky. On some +platforms (IRIX, Linux) it's not needed at all and +st_netfd_serialize_accept() is a no-op. On other platforms +it depends on the version of the OS (Solaris 2.6 doesn't need it but +earlier versions do). Serializing accepts does incur a slight +performance penalty so you want to enable it only if necessary. Read +your system's manual pages for accept(2) and select(2) +to see if accept serialization is necessary on your system. +

+st_netfd_serialize_accept() allocates resources that are +freed upon freeing of the specified file descriptor object (see +st_netfd_free() and +st_netfd_close()). +

+


+

+ +

st_netfd_poll()

+ +Waits for I/O on a single file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_netfd_poll(st_netfd_t fd, int how, st_utime_t timeout);
+
+

+

Parameters
+st_netfd_poll() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t). +

+how

+Specifies I/O events of interest. This parameter can be constructed by +OR-ing any combination of the following event flags which are defined +in the poll.h header file:

+ + + + + +
POLLINfd is readable.
POLLOUTfd is is writable.
POLLPRIfd has an exception condition.
+

+timeout

+Amount of time in microseconds the call will block waiting for I/O +to become ready. This parameter is a variable of type +st_utime_t. If this time expires without any +I/O becoming ready, st_netfd_poll() returns an error and sets +errno to ETIME. +Note that timeouts are measured since the +last context switch. +

+

Returns
+If the named file descriptor object is ready for I/O within the specified +amount of time, a value of 0 is returned. Otherwise, a value +of -1 is returned and errno is set to indicate the error: +

+ + + + +
EBADFThe underlying OS file descriptor is invalid. +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred without any I/O +becoming ready.
+

+

Description
+This function returns as soon as I/O is ready on the named file +descriptor object or the specified amount of time expires. The +how parameter should be set to the I/O events (readable, +writable, exception, or some combination) that the caller is interested +in. If the value of timeout is ST_UTIME_NO_TIMEOUT +(-1), this function blocks until a requested I/O event occurs +or until the call is interrupted by st_thread_interrupt().

+Despite having an interface like poll(2), this function uses +the same event notification mechanism as the rest of the library. For +instance if an alternative event nofication mechanism was set using st_set_eventsys(), this function uses that +mechanism to check for events.

+Note: if kqueue(2) is used as an alternative event +notification mechanism (see st_set_eventsys()), the POLLPRI +event flag is not supported and st_netfd_poll() will return an error +if it's set (errno will be set to EINVAL). +

+


+

+ +

st_accept()

+ +Accepts a connection on a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+st_netfd_t st_accept(st_netfd_t fd, struct sockaddr *addr, int *addrlen,
+                     st_utime_t timeout);
+
+

+

Parameters
+st_accept() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing the rendezvous socket +on which the caller is willing to accept new connections. This object has been +created from a valid listening socket by +st_netfd_open() or +st_netfd_open_socket().

+addr

+If this value is non-zero, it is a result parameter that is filled +in with the address of the connecting entity, as known to the communications +layer (see accept(3)).

+addrlen

+This parameter should initially contain the amount of space pointed to by +addr; on return it will contain the actual length (in bytes) of the +address returned (see accept(3)).

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the accept operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+Upon successful completion, a new file descriptor object identifier +representing the newly accepted connection is returned. Otherwise, +NULL is returned and errno is set to indicate the error. +Possible errno values are the same as set by the accept(3) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no pending +connection was accepted.
+

+

Description
+This function accepts the first connection from the queue of pending +connections and creates a new file descriptor object for the newly +accepted connection. The rendezvous socket can still be used to accept +more connections.

+st_accept() blocks the calling thread until either a new connection +is successfully accepted or an error occurs. If no pending connection can +be accepted before the time limit, this function returns NULL +and sets errno to ETIME. +

+


+

+ +

st_connect()

+ +Initiates a connection on a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_connect(st_netfd_t fd, struct sockaddr *addr, int addrlen,
+               st_utime_t timeout);
+
+

+

Parameters
+st_connect() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing a socket.

+addr

+A pointer to the address of the peer to which the socket is to be connected. +

+addrlen

+This parameter specifies the amount of space pointed to by addr. +

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the connect operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+Upon successful completion, a value of 0 is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error. Possible errno values are the same as set +by the connect(3) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and connection setup +was not completed.
+

+

Description
+This function is usually invoked on a file descriptor object representing +a TCP socket. Upon completion it establishes a TCP connection to the peer. +If the underlying OS socket is not bound, it will be bound to an arbitrary +local address (see connect(3)).

+st_connect() blocks the calling thread until either the connection +is successfully established or an error occurs. If the connection setup +cannot complete before the specified time limit, this function fails with +errno set to ETIME. +

+


+

+ +

st_read()

+ +Reads data from a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+ssize_t st_read(st_netfd_t fd, void *buf, size_t nbyte, st_utime_t timeout);
+
+

+

Parameters
+st_read() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+buf

+A pointer to a buffer to hold the data read in. On output the buffer +contains the data.

+nbyte

+The size of buf in bytes.

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the read operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes actually +read is returned (a value of 0 means the network connection is +closed or end of file is reached). Otherwise, a value of -1 is +returned and errno is set to indicate the error. +Possible errno values are the same as set by the read(2) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was read. +
+

+

Description
+This function blocks the calling thread until it encounters an end-of-stream +indication, some positive number of bytes (but no more than nbyte +bytes) are read in, a timeout occurs, or an error occurs. +

+


+

+ +

st_read_fully()

+ +Reads the specified amount of data in full from a file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+ssize_t st_read_fully(st_netfd_t fd, void *buf, size_t nbyte,
+                      st_utime_t timeout);
+
+

+

Parameters
+st_read_fully() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+buf

+A pointer to a buffer to hold the data read in. On output the buffer +contains the data.

+nbyte

+The amount of data to be read in full (in bytes). It must not exceed the +size of buf.

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes actually +read is returned (a value less than nbyte means the network +connection is closed or end of file is reached). Otherwise, a value of +-1 is returned and errno is set to indicate the error. +Possible errno values are the same as set by the read(2) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+

Description
+This function blocks the calling thread until the specified amount of data +is read in full, it encounters an end-of-stream indication, a timeout occurs, +or an error occurs. +

+


+

+ +

st_read_resid()

+ +Reads the specified amount of data in full from a file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_read_resid(st_netfd_t fd, void *buf, size_t *resid,
+		  st_utime_t timeout);
+
+

+

Parameters
+st_read_resid() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+buf

+A pointer to a buffer to hold the data read in. On output the buffer +contains the data.

+resid

+A pointer to a number of bytes. +On entry, the amount of data to be read in full. +It must not exceed the size of buf. +On return, the amount of data remaining to be read. +(A non-zero returned value means some but not all of the data was read.)

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success, zero is returned. *resid may be zero, indicating +a complete read, or non-zero, indicating the network +connection is closed or end of file is reached. +

+Otherwise, a value of -1 is returned, *resid is non-zero, +and errno is set to indicate the error. +Possible errno values are the same as set by the read(2) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+

Description
+This function blocks the calling thread until the specified amount of data +is read in full, it encounters an end-of-stream indication, a timeout occurs, +or an error occurs. It differs from st_read_fully() only in that +it allows the caller to know how many bytes were transferred before an error +occurred. +

+


+

+ +

st_readv()

+ +Reads data from a specified file descriptor object into multiple buffers. +

+

Syntax
+ +
+#include <st.h>
+
+ssize_t st_readv(st_netfd_t fd, const struct iovec *iov, int iov_size,
+		 st_utime_t timeout);
+
+

+

Parameters
+st_readv() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+iov

+An array of iovec structures that identify the buffers for holding +the data read in. +On return the buffers contain the data.

+iov_size

+The number of iovec structures in the iov array.

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the read operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes actually +read is returned (a value of 0 means the network connection is +closed or end of file is reached). Otherwise, a value of -1 is +returned and errno is set to indicate the error. +Possible errno values are the same as set by the readv(2) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was read. +
+

+

Description
+This function blocks the calling thread until it encounters an end-of-stream +indication, some positive number of bytes (but no more than fit in the buffers) +are read in, a timeout occurs, or an error occurs. +

+


+

+ +

st_readv_resid()

+ +Reads the specified amount of data in full from a file descriptor object +into multiple buffers. +

+

Syntax
+ +
+#include <st.h>
+
+int st_readv_resid(st_netfd_t fd, struct iovec **iov, int *iov_size,
+		   st_utime_t timeout);
+
+

+

Parameters
+st_readv_resid() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+iov

+A pointer to an array of iovec structures. +On entry, the iovecs identify the buffers for holding the data read in. +On return, the incomplete iovecs. +This function modifies both the pointer and the array to which it points.

+iov_size

+A pointer to a number of iovec structures. +On entry, the number of iovec structures pointed to by *iov. +On return, the number of incomplete or unused iovec structures. +(A non-zero returned value means some but not all of the data was read.)

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success, zero is returned. *iov_size may be zero, indicating +a complete read, or non-zero, indicating the network connection is +closed or end of file is reached. *iov points to the first +iovec after the end of the original array on a complete read, or to the +first incomplete iovec on an incomplete read. +

+Otherwise, a value of -1 is returned, *iov_size is non-zero, +and errno is set to indicate the error. *iov points to the +first unused iovec. +Possible errno values are the same as set by the readv(2) +call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

All of the iovecs before *iov are modified such that +iov_base points to the end of the original buffer and +iov_len is zero. +

+

Description
+This function blocks the calling thread until the specified amount of data +is read in full, it encounters an end-of-stream indication, a timeout occurs, +or an error occurs. Like st_read_resid() it blocks the thread until +all of the requested data is read or an error occurs. Use +st_readv() to read up to the requested amount of data. +

+


+

+ +

st_write()

+ +Writes a buffer of data to a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+ssize_t st_write(st_netfd_t fd, const void *buf, size_t nbyte,
+                 st_utime_t timeout);
+
+

+

Parameters
+st_write() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+buf

+A pointer to the buffer holding the data to be written.

+nbyte

+The amount of data in bytes to be written from the buffer.

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer equal to nbyte is returned. +Otherwise, a value of -1 is returned and errno is set +to indicate the error. Possible errno values are the same as set +by the write(2) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+

Description
+This function blocks the calling thread until all the data is written, +a timeout occurs, or the write operation fails. The return value is equal to +either nbyte (on success) or -1 (on failure). Note that if +st_write() returns -1, some data (less than nbyte +bytes) may have been written before an error occurred. +

+


+

+ +

st_write_resid()

+ +Writes a buffer of data to a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_write_resid(st_netfd_t fd, const void *buf, size_t *resid,
+                   st_utime_t timeout);
+
+

+

Parameters
+st_write_resid() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+buf

+A pointer to the buffer holding the data to be written.

+resid

+A pointer to a number of bytes. +On entry, the amount of data to be written from the buffer. +On return, the amount of data remaining to be written. +(A non-zero returned value means some but not all of the data was written.)

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success, zero is returned and *resid is zero. +Otherwise, a value of -1 is returned, *resid is non-zero, +and errno is set +to indicate the error. Possible errno values are the same as set +by the write(2) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+

Description
+This function blocks the calling thread until all the data is written, +a timeout occurs, or the write operation fails. It differs from +st_write() only in that it allows the caller to know how many bytes +were transferred before an error occurred. +

+


+

+ +

st_writev()

+ +Writes data to a specified file descriptor object from multiple buffers. +

+

Syntax
+ +
+#include <st.h>
+
+ssize_t st_writev(st_netfd_t fd, const struct iovec *iov, int iov_size,
+                  st_utime_t timeout);
+
+

+

Parameters
+st_writev() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+iov

+An array of iovec structures that describe the buffers to write +from (see writev(2)).

+iov_size

+Number of iovec structures in the iov array.

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer equal to the sum of all the buffer lengths +is returned. Otherwise, a value of -1 is returned and errno +is set to indicate the error. Possible errno values are the same as +set by the writev(2) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+

Description
+This function blocks the calling thread until all the data is written, +a timeout occurs, or the write operation fails. The return value is equal to +either the sum of all the buffer lengths (on success) or -1 (on +failure). Note that if st_writev() returns -1, part of the +data may have been written before an error occurred. +

+


+

+ +

st_writev_resid()

+ +Writes multiple buffers of data to a specified file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_writev_resid(st_netfd_t fd, struct iovec **iov, int *iov_size,
+		    st_utime_t timeout);
+
+

+

Parameters
+st_writev_resid() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t).

+iov

+A pointer to an array of iovec structures. +On entry, the iovecs identify the buffers holding the data to write. +On return, the incomplete iovecs. +This function modifies both the pointer and the array to which it points.

+iov_size

+A pointer to a number of iovec structures. +On entry, the number of iovec structures pointed to by *iov. +On return, the number of incomplete or unused iovec structures. +(A non-zero returned value means some but not all of the data was written.)

+timeout

+A value of type st_utime_t specifying the +inactivity timeout (in microseconds). +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success, zero is returned, *iov_size is zero, and *iov +points to the first iovec after the end of the original array. +Otherwise, a value of -1 is returned, *iov_size is non-zero, +*iov points to the first incomplete iovec, and errno is set +to indicate the error. Possible errno values are the same as set +by the writev(2) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred. +
+

+All of the iovecs before *iov are modified such that +iov_base points to the end of the original buffer and +iov_len is zero. +

+

Description
+This function blocks the calling thread until all the data is written, +a timeout occurs, or the write operation fails. It differs from +st_writev() only in that it allows the caller to know how many bytes +were transferred before an error occurred. +

+


+

+ +

st_recvfrom()

+ +Receives bytes from a file descriptor object and stores the sending peer's +address. +

+

Syntax
+ +
+#include <st.h>
+
+int st_recvfrom(st_netfd_t fd, void *buf, int len, struct sockaddr *from,
+                int *fromlen, st_utime_t timeout);
+
+

+

Parameters
+st_recvfrom() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing a UDP socket.

+buf

+A pointer to a buffer to hold the data received.

+len

+The size of buf in bytes.

+from

+If this parameter is not a NULL pointer, the source address of the +message is filled in (see recvfrom(3)).

+fromlen

+This is a value-result parameter, initialized to the size of the buffer +associated with from, and modified on return to indicate the actual +size of the address stored there.

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the receive operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the length of the received +message in bytes is returned. Otherwise, a value of -1 is returned +and errno is set to indicate the error. Possible errno +values are the same as set by the recvfrom(3) call with two +exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was received. +
+

+

Description
+This function receives up to a specified number of bytes from the specified +file descriptor object representing a UDP socket.

+st_recvfrom() blocks the calling thread until one or more bytes are +transferred, a timeout has occurred, or there is an error. No more than +len bytes will be transferred. +

+


+

+ +

st_sendto()

+ +Sends bytes to a specified destination. +

+

Syntax
+ +
+#include <st.h>
+
+int st_sendto(st_netfd_t fd, const void *msg, int len, struct sockaddr *to,
+              int tolen, st_utime_t timeout);
+
+

+

Parameters
+st_sendto() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing a UDP socket.

+msg

+A pointer to a buffer containing the message to be sent.

+len

+The length of the message to be sent (in bytes).

+to

+A pointer to the address of the destination (see sendto(3)).

+tolen

+This parameter specifies the size of the destination address.

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the send operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes sent is +returned. Otherwise, a value of -1 is returned and errno is +set to indicate the error. Possible errno values are the same as +set by the sendto(3) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was sent. +
+

+

Description
+This function sends a specified number of bytes from a file descriptor +object representing a UDP socket to the specified destination address. +If no buffer space is available at the underlying OS socket to hold the +message to be transmitted, then st_sendto() blocks the calling +thread until the space becomes available, a timeout occurs, or an error +occurs. +

+


+

+ +

st_recvmsg()

+ +Receives a message from a file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_recvmsg(st_netfd_t fd, struct msghdr *msg, int flags,
+               st_utime_t timeout);
+
+

+

Parameters
+st_recvmsg() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing a UDP socket.

+msg

+A pointer to a msghdr structure to describe the data received.

+flags

+Control flags for recvmsg(3).

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the receive operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes received +is returned. Otherwise, a value of -1 is returned +and errno is set to indicate the error. Possible errno +values are the same as set by the recvmsg(3) call with two +exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was received. +
+

+

Description
+This function receives bytes from the specified file descriptor object +representing a UDP socket. The operation is controlled by the in/out +msg parameter.

+st_recvmsg() blocks the calling thread until one or more bytes are +transferred, a timeout has occurred, or there is an error. +

+


+

+ +

st_sendmsg()

+ +Sends a message to a file descriptor object. +

+

Syntax
+ +
+#include <st.h>
+
+int st_sendmsg(st_netfd_t fd, const struct msghdr *msg, int flags,
+               st_utime_t timeout);
+
+

+

Parameters
+st_sendmsg() has the following parameters:

+fd

+A file descriptor object identifier (see +st_netfd_t) representing a UDP socket.

+msg

+A pointer to a msghdr structure describing the message to be sent.

+flags

+Control flags for sendmsg(3).

+timeout

+A value of type st_utime_t specifying the time +limit in microseconds for completion of the send operation. +Note that timeouts are measured since the +last context switch. +

+

Returns
+On success a non-negative integer indicating the number of bytes sent is +returned. Otherwise, a value of -1 is returned and errno is +set to indicate the error. Possible errno values are the same as +set by the sendmsg(3) call with two exceptions:

+ + + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
ETIMEThe timeout occurred and no data was sent. +
+

+

Description
+This function sends bytes to a file descriptor object representing a UDP +socket. The operation is controlled by the msg parameter. +If no buffer space is available at the underlying OS socket to hold the +message to be transmitted, then st_sendmsg() blocks the calling +thread until the space becomes available, a timeout occurs, or an error +occurs. +

+


+

+ +

st_open()

+ +Opens a file for reading, writing, or both. +

+

Syntax
+ +
+#include <st.h>
+
+st_netfd_t st_open(const char *path, int oflags, mode_t mode);
+
+

+

Parameters
+st_open() has the following parameters:

+path

+The pathname of the file to be opened.

+oflags

+File status flags. These are the same flags that are used by the +open(2) system call.

+mode

+Access permission bits of the file mode, if the file is created when +O_CREAT is set in oflags (see open(2)). +

+

Returns
+Upon successful completion, a new file descriptor object identifier is +returned. Otherwise, NULL is returned and errno is set +to indicate the error. +

+

Description
+This function creates a new file descriptor object of type +st_netfd_t for the file with the pathname +path. This object can be freed by +st_netfd_free() or +st_netfd_close().

+The primary purpose of this function is to open FIFOs (named pipes) or +other special files in order to create an end point of communication. +However, it can be used on regular files as well.

+Among other things, this function always sets a non-blocking flag on the +underlying OS file descriptor, so there is no need to include that flag in +oflags. +

+


+

+ +

st_poll()

+ +Detects when I/O is ready for a set of OS file descriptors. +

+

Syntax
+ +
+#include <st.h>
+
+int st_poll(struct pollfd *pds, int npds, st_utime_t timeout);
+
+

+

Parameters
+st_poll() has the following parameters:

+pds

+A pointer to an array of pollfd structures (see poll(2)). +

+npds

+The number of elements in the pds array.

+timeout

+A value of type st_utime_t specifying the +amount of time in microseconds the call will block waiting for I/O +to become ready. If this time expires without any I/O becoming ready, +st_poll() returns zero. +Note that timeouts are measured since the +last context switch. +

+

Returns
+Upon successful completion, a non-negative value is returned. A positive +value indicates the total number of OS file descriptors in pds +that have events. A value of 0 indicates that the call timed out. +Upon failure, a value of -1 is returned and errno is set +to indicate the error:

+ + +
EINTRThe current thread was interrupted by +st_thread_interrupt().
+

+If an alternative event notification mechanism has been set by +st_set_eventsys(), other values of +errno could be set upon failure as well. The values +depend on the specific mechanism in use. +

+

Description
+This function returns as soon as I/O is ready on one or more of the specified +OS file descriptors. A count of the number of ready descriptors is returned +unless a timeout occurs, in which case zero is returned.

+The pollfd structure is defined in the poll.h header file +and contains the following members:

+

+    int fd;             /* OS file descriptor */
+    short events;       /* requested events   */
+    short revents;      /* returned events    */
+
+The events field should be set to the I/O events (readable, +writable, exception, or some combination) that the caller is interested in. +On return, the revents field is set to indicate what kind of I/O +is ready on the respective descriptor.

+The events and revents fields are constructed by OR-ing +any combination of the following event flags (defined in poll.h): +

+ + + + + + +
POLLINfd is readable.
POLLOUTfd is is writable.
POLLPRIfd has an exception condition.
POLLNVALfd is bad.
+

+The POLLNVAL flag is only valid in the revents field; +it is not used in the events field.

+Despite having an interface like poll(2), this function uses +the same event notification mechanism as the rest of the library. For +instance if an alternative event nofication mechanism was set using st_set_eventsys(), this function uses that +mechanism to check for events.

+Note that unlike the poll(2) call, this function has the +timeout parameter expressed in microseconds. If the value of +timeout is ST_UTIME_NO_TIMEOUT +(-1), this function blocks until a requested I/O +event occurs or until the call is interrupted by +st_thread_interrupt(). +

+Note: if kqueue(2) is used as an alternative event +notification mechanism (see st_set_eventsys()), the POLLPRI +event flag is not supported and st_poll() will return an error +if it's set (errno will be set to EINVAL). +

+


+

+ + +

Program Structure

+ +

+Generally, the following steps should be followed when writing an application +using the State Threads library: +

+

    +
  1. Configure the library by calling these pre-init functions, if desired. + +
  2. +

    +

  3. Initialize the library by calling st_init().
  4. +

    +

  5. Configure the library by calling these post-init functions, if desired. + +
  6. +

    +

  7. Create resources that will be shared among different processes: + create and bind listening sockets (see socket(3), + bind(3), listen(3), + st_netfd_open_socket(), and possibly + st_netfd_serialize_accept()), + create shared memory segments, inter-process communication (IPC) + channels and synchronization primitives (if any).
  8. +

    +

  9. Create several processes via fork(2). The parent process should + either exit or become a "watchdog" (e.g., it starts a new process when + an existing one crashes, does a cleanup upon application termination, + etc.).
  10. +

    +

  11. In each child process create a pool of threads (see + st_thread_create()) to handle user + connections. Each thread in the pool may accept client connections + (st_accept()), connect to other servers + (st_connect()), perform various network I/O + (st_read(), st_write(), etc.).
  12. +
+

+Note that only State Threads library I/O functions should +be used for a network I/O: any other I/O calls may block the calling process +indefinitely. For example, standard I/O functions (fgets(3), +fread(3), fwrite(3), fprintf(3), etc.) call +read(2) and write(2) directly and therefore should not be +used on sockets or pipes. +

+Also note that for short timeouts to work the program +should do context switches (for example by calling +st_usleep()) on a regular basis. +

+


+

+ + +

List of Blocking Functions

+ +

+The thread context switch (process state change) can only happen +in a well-known set of blocking functions. +Only the following functions can block the calling thread: +

+

+
st_thread_join()
+
st_sleep()
+
st_usleep()
+
st_cond_wait()
+
st_cond_timedwait()
+
st_mutex_lock()
+
st_netfd_poll()
+
st_accept()
+
st_connect()
+
st_read()
+
st_read_fully()
+
st_read_resid()
+
st_readv()
+
st_readv_resid()
+
st_write()
+
st_write_resid()
+
st_writev()
+
st_writev_resid()
+
st_recvfrom()
+
st_sendto()
+
st_recvmsg()
+
st_sendmsg()
+
st_poll()
+
+

+


+

+ + + + diff --git a/trunk/research/st-1.9/docs/st.html b/trunk/research/st-1.9/docs/st.html new file mode 100644 index 000000000..a6b932a81 --- /dev/null +++ b/trunk/research/st-1.9/docs/st.html @@ -0,0 +1,504 @@ + + +State Threads for Internet Applications + + +

State Threads for Internet Applications

+

Introduction

+

+State Threads is an application library which provides a +foundation for writing fast and highly scalable Internet Applications +on UNIX-like platforms. It combines the simplicity of the multithreaded +programming paradigm, in which one thread supports each simultaneous +connection, with the performance and scalability of an event-driven +state machine architecture.

+ +

1. Definitions

+

+ +

1.1 Internet Applications

+ +

+An Internet Application (IA) is either a server or client network +application that accepts connections from clients and may or may not +connect to servers. In an IA the arrival or departure of network data +often controls processing (that is, IA is a data-driven application). +For each connection, an IA does some finite amount of work +involving data exchange with its peer, where its peer may be either +a client or a server. +The typical transaction steps of an IA are to accept a connection, +read a request, do some finite and predictable amount of work to +process the request, then write a response to the peer that sent the +request. One example of an IA is a Web server; +the most general example of an IA is a proxy server, because it both +accepts connections from clients and connects to other servers.

+

+We assume that the performance of an IA is constrained by available CPU +cycles rather than network bandwidth or disk I/O (that is, CPU +is a bottleneck resource). +

+ + +

1.2 Performance and Scalability

+ +

+The performance of an IA is usually evaluated as its +throughput measured in transactions per second or bytes per second (one +can be converted to the other, given the average transaction size). There are +several benchmarks that can be used to measure throughput of Web serving +applications for specific workloads (such as +SPECweb96, +WebStone, +WebBench). +Although there is no common definition for scalability, in general it +expresses the ability of an application to sustain its performance when some +external condition changes. For IAs this external condition is either the +number of clients (also known as "users," "simultaneous connections," or "load +generators") or the underlying hardware system size (number of CPUs, memory +size, and so on). Thus there are two types of scalability: load +scalability and system scalability, respectively. +

+The figure below shows how the throughput of an idealized IA changes with +the increasing number of clients (solid blue line). Initially the throughput +grows linearly (the slope represents the maximal throughput that one client +can provide). Within this initial range, the IA is underutilized and CPUs are +partially idle. Further increase in the number of clients leads to a system +saturation, and the throughput gradually stops growing as all CPUs become fully +utilized. After that point, the throughput stays flat because there are no +more CPU cycles available. +In the real world, however, each simultaneous connection +consumes some computational and memory resources, even when idle, and this +overhead grows with the number of clients. Therefore, the throughput of the +real world IA starts dropping after some point (dashed blue line in the figure +below). The rate at which the throughput drops depends, among other things, on +application design. +

+We say that an application has a good load scalability if it can +sustain its throughput over a wide range of loads. +Interestingly, the SPECweb99 +benchmark somewhat reflects the Web server's load scalability because it +measures the number of clients (load generators) given a mandatory minimal +throughput per client (that is, it measures the server's capacity). +This is unlike SPECweb96 and +other benchmarks that use the throughput as their main metric (see the figure +below). +

+

Figure: Throughput vs. Number of clients +
+

+System scalability is the ability of an application to sustain its +performance per hardware unit (such as a CPU) with the increasing number of +these units. In other words, good system scalability means that doubling the +number of processors will roughly double the application's throughput (dashed +green line). We assume here that the underlying operating system also scales +well. Good system scalability allows you to initially run an application on +the smallest system possible, while retaining the ability to move that +application to a larger system if necessary, without excessive effort or +expense. That is, an application need not be rewritten or even undergo a +major porting effort when changing system size. +

+Although scalability and performance are more important in the case of server +IAs, they should also be considered for some client applications (such as +benchmark load generators). +

+ + +

1.3 Concurrency

+ +

+Concurrency reflects the parallelism in a system. The two unrelated types +are virtual concurrency and real concurrency. +

    +
  • Virtual (or apparent) concurrency is the number of simultaneous +connections that a system supports. +

    +
  • Real concurrency is the number of hardware devices, including +CPUs, network cards, and disks, that actually allow a system to perform +tasks in parallel. +
+

+An IA must provide virtual concurrency in order to serve many users +simultaneously. +To achieve maximum performance and scalability in doing so, the number of +programming entities than an IA creates to be scheduled by the OS kernel +should be +kept close to (within an order of magnitude of) the real concurrency found on +the system. These programming entities scheduled by the kernel are known as +kernel execution vehicles. Examples of kernel execution vehicles +include Solaris lightweight processes and IRIX kernel threads. +In other words, the number of kernel execution vehicles should be dictated by +the system size and not by the number of simultaneous connections. +

+ +

2. Existing Architectures

+

+There are a few different architectures that are commonly used by IAs. +These include the Multi-Process, +Multi-Threaded, and Event-Driven State Machine +architectures. +

+ +

2.1 Multi-Process Architecture

+ +

+In the Multi-Process (MP) architecture, an individual process is +dedicated to each simultaneous connection. +A process performs all of a transaction's initialization steps +and services a connection completely before moving on to service +a new connection. +

+User sessions in IAs are relatively independent; therefore, no +synchronization between processes handling different connections is +necessary. Because each process has its own private address space, +this architecture is very robust. If a process serving one of the connections +crashes, the other sessions will not be affected. However, to serve many +concurrent connections, an equal number of processes must be employed. +Because processes are kernel entities (and are in fact the heaviest ones), +the number of kernel entities will be at least as large as the number of +concurrent sessions. On most systems, good performance will not be achieved +when more than a few hundred processes are created because of the high +context-switching overhead. In other words, MP applications have poor load +scalability. +

+On the other hand, MP applications have very good system scalability, because +no resources are shared among different processes and there is no +synchronization overhead. +

+The Apache Web Server 1.x ([Reference 1]) uses the MP +architecture on UNIX systems. +

+ +

2.2 Multi-Threaded Architecture

+ +

+In the Multi-Threaded (MT) architecture, multiple independent threads +of control are employed within a single shared address space. Like a +process in the MP architecture, each thread performs all of a +transaction's initialization steps and services a connection completely +before moving on to service a new connection. +

+Many modern UNIX operating systems implement a many-to-few model when +mapping user-level threads to kernel entities. In this model, an +arbitrarily large number of user-level threads is multiplexed onto a +lesser number of kernel execution vehicles. Kernel execution +vehicles are also known as virtual processors. Whenever a user-level +thread makes a blocking system call, the kernel execution vehicle it is using +will become blocked in the kernel. If there are no other non-blocked kernel +execution vehicles and there are other runnable user-level threads, a new +kernel execution vehicle will be created automatically. This prevents the +application from blocking when it can continue to make useful forward +progress. +

+Because IAs are by nature network I/O driven, all concurrent sessions block on +network I/O at various points. As a result, the number of virtual processors +created in the kernel grows close to the number of user-level threads +(or simultaneous connections). When this occurs, the many-to-few model +effectively degenerates to a one-to-one model. Again, like in +the MP architecture, the number of kernel execution vehicles is dictated by +the number of simultaneous connections rather than by number of CPUs. This +reduces an application's load scalability. However, because kernel threads +(lightweight processes) use fewer resources and are more light-weight than +traditional UNIX processes, an MT application should scale better with load +than an MP application. +

+Unexpectedly, the small number of virtual processors sharing the same address +space in the MT architecture destroys an application's system scalability +because of contention among the threads on various locks. Even if an +application itself is carefully +optimized to avoid lock contention around its own global data (a non-trivial +task), there are still standard library functions and system calls +that use common resources hidden from the application. For example, +on many platforms thread safety of memory allocation routines +(malloc(3), free(3), and so on) is achieved by using a single +global lock. Another example is a per-process file descriptor table. +This common resource table is shared by all kernel execution vehicles within +the same process and must be protected when one modifies it via +certain system calls (such as open(2), close(2), and so on). +In addition to that, maintaining the caches coherent +among CPUs on multiprocessor systems hurts performance when different threads +running on different CPUs modify data items on the same cache line. +

+In order to improve load scalability, some applications employ a different +type of MT architecture: they create one or more thread(s) per task +rather than one thread per connection. For example, one small group +of threads may be responsible for accepting client connections, another +for request processing, and yet another for serving responses. The main +advantage of this architecture is that it eliminates the tight coupling +between the number of threads and number of simultaneous connections. However, +in this architecture, different task-specific thread groups must share common +work queues that must be protected by mutual exclusion locks (a typical +producer-consumer problem). This adds synchronization overhead that causes an +application to perform badly on multiprocessor systems. In other words, in +this architecture, the application's system scalability is sacrificed for the +sake of load scalability. +

+Of course, the usual nightmares of threaded programming, including data +corruption, deadlocks, and race conditions, also make MT architecture (in any +form) non-simplistic to use. +

+ + +

2.3 Event-Driven State Machine Architecture

+ +

+In the Event-Driven State Machine (EDSM) architecture, a single process +is employed to concurrently process multiple connections. The basics of this +architecture are described in Comer and Stevens +[Reference 2]. +The EDSM architecture performs one basic data-driven step associated with +a particular connection at a time, thus multiplexing many concurrent +connections. The process operates as a state machine that receives an event +and then reacts to it. +

+In the idle state the EDSM calls select(2) or poll(2) to +wait for network I/O events. When a particular file descriptor is ready for +I/O, the EDSM completes the corresponding basic step (usually by invoking a +handler function) and starts the next one. This architecture uses +non-blocking system calls to perform asynchronous network I/O operations. +For more details on non-blocking I/O see Stevens +[Reference 3]. +

+To take advantage of hardware parallelism (real concurrency), multiple +identical processes may be created. This is called Symmetric Multi-Process +EDSM and is used, for example, in the Zeus Web Server +([Reference 4]). To more efficiently multiplex disk I/O, +special "helper" processes may be created. This is called Asymmetric +Multi-Process EDSM and was proposed for Web servers by Druschel +and others [Reference 5]. +

+EDSM is probably the most scalable architecture for IAs. +Because the number of simultaneous connections (virtual concurrency) is +completely decoupled from the number of kernel execution vehicles (processes), +this architecture has very good load scalability. It requires only minimal +user-level resources to create and maintain additional connection. +

+Like MP applications, Multi-Process EDSM has very good system scalability +because no resources are shared among different processes and there is no +synchronization overhead. +

+Unfortunately, the EDSM architecture is monolithic rather than based on the +concept of threads, so new applications generally need to be implemented from +the ground up. In effect, the EDSM architecture simulates threads and their +stacks the hard way. +

+ + +

3. State Threads Library

+ +

+The State Threads library combines the advantages of all of the above +architectures. The interface preserves the programming simplicity of thread +abstraction, allowing each simultaneous connection to be treated as a separate +thread of execution within a single process. The underlying implementation is +close to the EDSM architecture as the state of each particular concurrent +session is saved in a separate memory segment. +

+ +

3.1 State Changes and Scheduling

+

+The state of each concurrent session includes its stack environment +(stack pointer, program counter, CPU registers) and its stack. Conceptually, +a thread context switch can be viewed as a process changing its state. There +are no kernel entities involved other than processes. +Unlike other general-purpose threading libraries, the State Threads library +is fully deterministic. The thread context switch (process state change) can +only happen in a well-known set of functions (at I/O points or at explicit +synchronization points). As a result, process-specific global data does not +have to be protected by mutual exclusion locks in most cases. The entire +application is free to use all the static variables and non-reentrant library +functions it wants, greatly simplifying programming and debugging while +increasing performance. This is somewhat similar to a co-routine model +(co-operatively multitasked threads), except that no explicit yield is needed +-- +sooner or later, a thread performs a blocking I/O operation and thus surrenders +control. All threads of execution (simultaneous connections) have the +same priority, so scheduling is non-preemptive, like in the EDSM architecture. +Because IAs are data-driven (processing is limited by the size of network +buffers and data arrival rates), scheduling is non-time-slicing. +

+Only two types of external events are handled by the library's +scheduler, because only these events can be detected by +select(2) or poll(2): I/O events (a file descriptor is ready +for I/O) and time events +(some timeout has expired). However, other types of events (such as +a signal sent to a process) can also be handled by converting them to I/O +events. For example, a signal handling function can perform a write to a pipe +(write(2) is reentrant/asynchronous-safe), thus converting a signal +event to an I/O event. +

+To take advantage of hardware parallelism, as in the EDSM architecture, +multiple processes can be created in either a symmetric or asymmetric manner. +Process management is not in the library's scope but instead is left up to the +application. +

+There are several general-purpose threading libraries that implement a +many-to-one model (many user-level threads to one kernel execution +vehicle), using the same basic techniques as the State Threads library +(non-blocking I/O, event-driven scheduler, and so on). For an example, see GNU +Portable Threads ([Reference 6]). Because they are +general-purpose, these libraries have different objectives than the State +Threads library. The State Threads library is not a general-purpose +threading library, +but rather an application library that targets only certain types of +applications (IAs) in order to achieve the highest possible performance and +scalability for those applications. +

+ +

3.2 Scalability

+

+State threads are very lightweight user-level entities, and therefore creating +and maintaining user connections requires minimal resources. An application +using the State Threads library scales very well with the increasing number +of connections. +

+On multiprocessor systems an application should create multiple processes +to take advantage of hardware parallelism. Using multiple separate processes +is the only way to achieve the highest possible system scalability. +This is because duplicating per-process resources is the only way to avoid +significant synchronization overhead on multiprocessor systems. Creating +separate UNIX processes naturally offers resource duplication. Again, +as in the EDSM architecture, there is no connection between the number of +simultaneous connections (which may be very large and changes within a wide +range) and the number of kernel entities (which is usually small and constant). +In other words, the State Threads library makes it possible to multiplex a +large number of simultaneous connections onto a much smaller number of +separate processes, thus allowing an application to scale well with both +the load and system size. +

+ +

3.3 Performance

+

+Performance is one of the library's main objectives. The State Threads +library is implemented to minimize the number of system calls and +to make thread creation and context switching as fast as possible. +For example, per-thread signal mask does not exist (unlike +POSIX threads), so there is no need to save and restore a process's +signal mask on every thread context switch. This eliminates two system +calls per context switch. Signal events can be handled much more +efficiently by converting them to I/O events (see above). +

+ +

3.4 Portability

+

+The library uses the same general, underlying concepts as the EDSM +architecture, including non-blocking I/O, file descriptors, and +I/O multiplexing. These concepts are available in some form on most +UNIX platforms, making the library very portable across many +flavors of UNIX. There are only a few platform-dependent sections in the +source. +

+ +

3.5 State Threads and NSPR

+

+The State Threads library is a derivative of the Netscape Portable +Runtime library (NSPR) [Reference 7]. The primary goal of +NSPR is to provide a platform-independent layer for system facilities, +where system facilities include threads, thread synchronization, and I/O. +Performance and scalability are not the main concern of NSPR. The +State Threads library addresses performance and scalability while +remaining much smaller than NSPR. It is contained in 8 source files +as opposed to more than 400, but provides all the functionality that +is needed to write efficient IAs on UNIX-like platforms. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
NSPRState Threads
Lines of code~150,000~3000
Dynamic library size  
(debug version)
IRIX~700 KB~60 KB
Linux~900 KB~70 KB
+

+ +

Conclusion

+

+State Threads is an application library which provides a foundation for +writing Internet Applications. To summarize, it has the +following advantages: +

+

    +
  • It allows the design of fast and highly scalable applications. An +application will scale well with both load and number of CPUs. +

    +

  • It greatly simplifies application programming and debugging because, as a +rule, no mutual exclusion locking is necessary and the entire application is +free to use static variables and non-reentrant library functions. +
+

+The library's main limitation: +

+

    +
  • All I/O operations on sockets must use the State Thread library's I/O +functions because only those functions perform thread scheduling and prevent +the application's processes from blocking. +
+

+ +

References

+
    + +
  1. Apache Software Foundation, +http://www.apache.org. + +
  2. Douglas E. Comer, David L. Stevens, Internetworking With TCP/IP, +Vol. III: Client-Server Programming And Applications, Second Edition, +Ch. 8, 12. + +
  3. W. Richard Stevens, UNIX Network Programming, Second Edition, +Vol. 1, Ch. 15. + +
  4. Zeus Technology Limited, +http://www.zeus.co.uk. + +
  5. Peter Druschel, Vivek S. Pai, Willy Zwaenepoel, + +Flash: An Efficient and Portable Web Server. In Proceedings of the +USENIX 1999 Annual Technical Conference, Monterey, CA, June 1999. + +
  6. GNU Portable Threads, +http://www.gnu.org/software/pth/. + +
  7. Netscape Portable Runtime, +http://www.mozilla.org/docs/refList/refNSPR/. +
+ +

Other resources covering various architectural issues in IAs

+
    +
  1. Dan Kegel, The C10K problem, +http://www.kegel.com/c10k.html. +
  2. +
  3. James C. Hu, Douglas C. Schmidt, Irfan Pyarali, JAWS: Understanding +High Performance Web Systems, +http://www.cs.wustl.edu/~jxh/research/research.html.
  4. +
+

+


+

+ +

Portions created by SGI are Copyright © 2000 +Silicon Graphics, Inc. All rights reserved.
+

+ + + + diff --git a/trunk/research/st-1.9/docs/timeout_heap.txt b/trunk/research/st-1.9/docs/timeout_heap.txt new file mode 100644 index 000000000..1582dc129 --- /dev/null +++ b/trunk/research/st-1.9/docs/timeout_heap.txt @@ -0,0 +1,60 @@ +How the timeout heap works + +As of version 1.5, the State Threads Library represents the queue of +sleeping threads using a heap data structure rather than a sorted +linked list. This improves performance when there is a large number +of sleeping threads, since insertion into a heap takes O(log N) time +while insertion into a sorted list takes O(N) time. For example, in +one test 1000 threads were created, each thread called st_usleep() +with a random time interval, and then all the threads where +immediately interrupted and joined before the sleeps had a chance to +finish. The whole process was repeated 1000 times, for a total of a +million sleep queue insertions and removals. With the old list-based +sleep queue, this test took 100 seconds; now it takes only 12 seconds. + +Heap data structures are typically based on dynamically resized +arrays. However, since the existing ST code base was very nicely +structured around linking the thread objects into pointer-based lists +without the need for any auxiliary data structures, implementing the +heap using a similar nodes-and-pointers based approach seemed more +appropriate for ST than introducing a separate array. + +Thus, the new ST timeout heap works by organizing the existing +_st_thread_t objects in a balanced binary tree, just as they were +previously organized into a doubly-linked, sorted list. The global +_ST_SLEEPQ variable, formerly a linked list head, is now simply a +pointer to the root of this tree, and the root node of the tree is the +thread with the earliest timeout. Each thread object has two child +pointers, "left" and "right", pointing to threads with later timeouts. + +Each node in the tree is numbered with an integer index, corresponding +to the array index in an array-based heap, and the tree is kept fully +balanced and left-adjusted at all times. In other words, the tree +consists of any number of fully populated top levels, followed by a +single bottom level which may be partially populated, such that any +existing nodes form a contiguous block to the left and the spaces for +missing nodes form a contiguous block to the right. For example, if +there are nine threads waiting for a timeout, they are numbered and +arranged in a tree exactly as follows: + + 1 + / \ + 2 3 + / \ / \ + 4 5 6 7 + / \ + 8 9 + +Each node has either no children, only a left child, or both a left +and a right child. Children always time out later than their parents +(this is called the "heap invariant"), but when a node has two +children, their mutual order is unspecified - the left child may time +out before or after the right child. If a node is numbered N, its +left child is numbered 2N, and its right child is numbered 2N+1. + +There is no pointer from a child to its parent; all pointers point +downward. Additions and deletions both work by starting at the root +and traversing the tree towards the leaves, going left or right +according to the binary digits forming the index of the destination +node. As nodes are added or deleted, existing nodes are rearranged to +maintain the heap invariant. diff --git a/trunk/research/st-1.9/event.c b/trunk/research/st-1.9/event.c new file mode 100644 index 000000000..cb14aed5a --- /dev/null +++ b/trunk/research/st-1.9/event.c @@ -0,0 +1,1449 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * Yahoo! Inc. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#include +#include +#include +#include +#include +#include +#include "common.h" + +#ifdef MD_HAVE_KQUEUE +#include +#endif +#ifdef MD_HAVE_EPOLL +#include +#endif + +#if defined(USE_POLL) && !defined(MD_HAVE_POLL) +/* Force poll usage if explicitly asked for it */ +#define MD_HAVE_POLL +#endif + + +static struct _st_seldata { + fd_set fd_read_set, fd_write_set, fd_exception_set; + int fd_ref_cnts[FD_SETSIZE][3]; + int maxfd; +} *_st_select_data; + +#define _ST_SELECT_MAX_OSFD (_st_select_data->maxfd) +#define _ST_SELECT_READ_SET (_st_select_data->fd_read_set) +#define _ST_SELECT_WRITE_SET (_st_select_data->fd_write_set) +#define _ST_SELECT_EXCEP_SET (_st_select_data->fd_exception_set) +#define _ST_SELECT_READ_CNT(fd) (_st_select_data->fd_ref_cnts[fd][0]) +#define _ST_SELECT_WRITE_CNT(fd) (_st_select_data->fd_ref_cnts[fd][1]) +#define _ST_SELECT_EXCEP_CNT(fd) (_st_select_data->fd_ref_cnts[fd][2]) + + +#ifdef MD_HAVE_POLL +static struct _st_polldata { + struct pollfd *pollfds; + int pollfds_size; + int fdcnt; +} *_st_poll_data; + +#define _ST_POLL_OSFD_CNT (_st_poll_data->fdcnt) +#define _ST_POLLFDS (_st_poll_data->pollfds) +#define _ST_POLLFDS_SIZE (_st_poll_data->pollfds_size) +#endif /* MD_HAVE_POLL */ + + +#ifdef MD_HAVE_KQUEUE +typedef struct _kq_fd_data { + int rd_ref_cnt; + int wr_ref_cnt; + int revents; +} _kq_fd_data_t; + +static struct _st_kqdata { + _kq_fd_data_t *fd_data; + struct kevent *evtlist; + struct kevent *addlist; + struct kevent *dellist; + int fd_data_size; + int evtlist_size; + int addlist_size; + int addlist_cnt; + int dellist_size; + int dellist_cnt; + int kq; + pid_t pid; +} *_st_kq_data; + +#ifndef ST_KQ_MIN_EVTLIST_SIZE +#define ST_KQ_MIN_EVTLIST_SIZE 64 +#endif + +#define _ST_KQ_READ_CNT(fd) (_st_kq_data->fd_data[fd].rd_ref_cnt) +#define _ST_KQ_WRITE_CNT(fd) (_st_kq_data->fd_data[fd].wr_ref_cnt) +#define _ST_KQ_REVENTS(fd) (_st_kq_data->fd_data[fd].revents) +#endif /* MD_HAVE_KQUEUE */ + + +#ifdef MD_HAVE_EPOLL +typedef struct _epoll_fd_data { + int rd_ref_cnt; + int wr_ref_cnt; + int ex_ref_cnt; + int revents; +} _epoll_fd_data_t; + +static struct _st_epolldata { + _epoll_fd_data_t *fd_data; + struct epoll_event *evtlist; + int fd_data_size; + int evtlist_size; + int evtlist_cnt; + int fd_hint; + int epfd; + pid_t pid; +} *_st_epoll_data; + +#ifndef ST_EPOLL_EVTLIST_SIZE +/* Not a limit, just a hint */ +#define ST_EPOLL_EVTLIST_SIZE 4096 +#endif + +#define _ST_EPOLL_READ_CNT(fd) (_st_epoll_data->fd_data[fd].rd_ref_cnt) +#define _ST_EPOLL_WRITE_CNT(fd) (_st_epoll_data->fd_data[fd].wr_ref_cnt) +#define _ST_EPOLL_EXCEP_CNT(fd) (_st_epoll_data->fd_data[fd].ex_ref_cnt) +#define _ST_EPOLL_REVENTS(fd) (_st_epoll_data->fd_data[fd].revents) + +#define _ST_EPOLL_READ_BIT(fd) (_ST_EPOLL_READ_CNT(fd) ? EPOLLIN : 0) +#define _ST_EPOLL_WRITE_BIT(fd) (_ST_EPOLL_WRITE_CNT(fd) ? EPOLLOUT : 0) +#define _ST_EPOLL_EXCEP_BIT(fd) (_ST_EPOLL_EXCEP_CNT(fd) ? EPOLLPRI : 0) +#define _ST_EPOLL_EVENTS(fd) \ + (_ST_EPOLL_READ_BIT(fd)|_ST_EPOLL_WRITE_BIT(fd)|_ST_EPOLL_EXCEP_BIT(fd)) + +#endif /* MD_HAVE_EPOLL */ + +_st_eventsys_t *_st_eventsys = NULL; + + +/***************************************** + * select event system + */ + +ST_HIDDEN int _st_select_init(void) +{ + _st_select_data = (struct _st_seldata *) malloc(sizeof(*_st_select_data)); + if (!_st_select_data) + return -1; + + memset(_st_select_data, 0, sizeof(*_st_select_data)); + _st_select_data->maxfd = -1; + + return 0; +} + +ST_HIDDEN int _st_select_pollset_add(struct pollfd *pds, int npds) +{ + struct pollfd *pd; + struct pollfd *epd = pds + npds; + + /* Do checks up front */ + for (pd = pds; pd < epd; pd++) { + if (pd->fd < 0 || pd->fd >= FD_SETSIZE || !pd->events || + (pd->events & ~(POLLIN | POLLOUT | POLLPRI))) { + errno = EINVAL; + return -1; + } + } + + for (pd = pds; pd < epd; pd++) { + if (pd->events & POLLIN) { + FD_SET(pd->fd, &_ST_SELECT_READ_SET); + _ST_SELECT_READ_CNT(pd->fd)++; + } + if (pd->events & POLLOUT) { + FD_SET(pd->fd, &_ST_SELECT_WRITE_SET); + _ST_SELECT_WRITE_CNT(pd->fd)++; + } + if (pd->events & POLLPRI) { + FD_SET(pd->fd, &_ST_SELECT_EXCEP_SET); + _ST_SELECT_EXCEP_CNT(pd->fd)++; + } + if (_ST_SELECT_MAX_OSFD < pd->fd) + _ST_SELECT_MAX_OSFD = pd->fd; + } + + return 0; +} + +ST_HIDDEN void _st_select_pollset_del(struct pollfd *pds, int npds) +{ + struct pollfd *pd; + struct pollfd *epd = pds + npds; + + for (pd = pds; pd < epd; pd++) { + if (pd->events & POLLIN) { + if (--_ST_SELECT_READ_CNT(pd->fd) == 0) + FD_CLR(pd->fd, &_ST_SELECT_READ_SET); + } + if (pd->events & POLLOUT) { + if (--_ST_SELECT_WRITE_CNT(pd->fd) == 0) + FD_CLR(pd->fd, &_ST_SELECT_WRITE_SET); + } + if (pd->events & POLLPRI) { + if (--_ST_SELECT_EXCEP_CNT(pd->fd) == 0) + FD_CLR(pd->fd, &_ST_SELECT_EXCEP_SET); + } + } +} + +ST_HIDDEN void _st_select_find_bad_fd(void) +{ + _st_clist_t *q; + _st_pollq_t *pq; + int notify; + struct pollfd *pds, *epds; + int pq_max_osfd, osfd; + short events; + + _ST_SELECT_MAX_OSFD = -1; + + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + notify = 0; + epds = pq->pds + pq->npds; + pq_max_osfd = -1; + + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + pds->revents = 0; + if (pds->events == 0) + continue; + if (fcntl(osfd, F_GETFL, 0) < 0) { + pds->revents = POLLNVAL; + notify = 1; + } + if (osfd > pq_max_osfd) { + pq_max_osfd = osfd; + } + } + + if (notify) { + ST_REMOVE_LINK(&pq->links); + pq->on_ioq = 0; + /* + * Decrement the count of descriptors for each descriptor/event + * because this I/O request is being removed from the ioq + */ + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + events = pds->events; + if (events & POLLIN) { + if (--_ST_SELECT_READ_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_READ_SET); + } + } + if (events & POLLOUT) { + if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_WRITE_SET); + } + } + if (events & POLLPRI) { + if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_EXCEP_SET); + } + } + } + + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(pq->thread); + pq->thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(pq->thread); + } else { + if (_ST_SELECT_MAX_OSFD < pq_max_osfd) + _ST_SELECT_MAX_OSFD = pq_max_osfd; + } + } +} + +ST_HIDDEN void _st_select_dispatch(void) +{ + struct timeval timeout, *tvp; + fd_set r, w, e; + fd_set *rp, *wp, *ep; + int nfd, pq_max_osfd, osfd; + _st_clist_t *q; + st_utime_t min_timeout; + _st_pollq_t *pq; + int notify; + struct pollfd *pds, *epds; + short events, revents; + + /* + * Assignment of fd_sets + */ + r = _ST_SELECT_READ_SET; + w = _ST_SELECT_WRITE_SET; + e = _ST_SELECT_EXCEP_SET; + + rp = &r; + wp = &w; + ep = &e; + + if (_ST_SLEEPQ == NULL) { + tvp = NULL; + } else { + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); + timeout.tv_sec = (int) (min_timeout / 1000000); + timeout.tv_usec = (int) (min_timeout % 1000000); + tvp = &timeout; + } + + /* Check for I/O operations */ + nfd = select(_ST_SELECT_MAX_OSFD + 1, rp, wp, ep, tvp); + + /* Notify threads that are associated with the selected descriptors */ + if (nfd > 0) { + _ST_SELECT_MAX_OSFD = -1; + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + notify = 0; + epds = pq->pds + pq->npds; + pq_max_osfd = -1; + + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + events = pds->events; + revents = 0; + if ((events & POLLIN) && FD_ISSET(osfd, rp)) { + revents |= POLLIN; + } + if ((events & POLLOUT) && FD_ISSET(osfd, wp)) { + revents |= POLLOUT; + } + if ((events & POLLPRI) && FD_ISSET(osfd, ep)) { + revents |= POLLPRI; + } + pds->revents = revents; + if (revents) { + notify = 1; + } + if (osfd > pq_max_osfd) { + pq_max_osfd = osfd; + } + } + if (notify) { + ST_REMOVE_LINK(&pq->links); + pq->on_ioq = 0; + /* + * Decrement the count of descriptors for each descriptor/event + * because this I/O request is being removed from the ioq + */ + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + events = pds->events; + if (events & POLLIN) { + if (--_ST_SELECT_READ_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_READ_SET); + } + } + if (events & POLLOUT) { + if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_WRITE_SET); + } + } + if (events & POLLPRI) { + if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { + FD_CLR(osfd, &_ST_SELECT_EXCEP_SET); + } + } + } + + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(pq->thread); + pq->thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(pq->thread); + } else { + if (_ST_SELECT_MAX_OSFD < pq_max_osfd) + _ST_SELECT_MAX_OSFD = pq_max_osfd; + } + } + } else if (nfd < 0) { + /* + * It can happen when a thread closes file descriptor + * that is being used by some other thread -- BAD! + */ + if (errno == EBADF) + _st_select_find_bad_fd(); + } +} + +ST_HIDDEN int _st_select_fd_new(int osfd) +{ + if (osfd >= FD_SETSIZE) { + errno = EMFILE; + return -1; + } + + return 0; +} + +ST_HIDDEN int _st_select_fd_close(int osfd) +{ + if (_ST_SELECT_READ_CNT(osfd) || _ST_SELECT_WRITE_CNT(osfd) || + _ST_SELECT_EXCEP_CNT(osfd)) { + errno = EBUSY; + return -1; + } + + return 0; +} + +ST_HIDDEN int _st_select_fd_getlimit(void) +{ + return FD_SETSIZE; +} + +static _st_eventsys_t _st_select_eventsys = { + "select", + ST_EVENTSYS_SELECT, + _st_select_init, + _st_select_dispatch, + _st_select_pollset_add, + _st_select_pollset_del, + _st_select_fd_new, + _st_select_fd_close, + _st_select_fd_getlimit +}; + + +#ifdef MD_HAVE_POLL +/***************************************** + * poll event system + */ + +ST_HIDDEN int _st_poll_init(void) +{ + _st_poll_data = (struct _st_polldata *) malloc(sizeof(*_st_poll_data)); + if (!_st_poll_data) + return -1; + + _ST_POLLFDS = (struct pollfd *) malloc(ST_MIN_POLLFDS_SIZE * + sizeof(struct pollfd)); + if (!_ST_POLLFDS) { + free(_st_poll_data); + _st_poll_data = NULL; + return -1; + } + _ST_POLLFDS_SIZE = ST_MIN_POLLFDS_SIZE; + _ST_POLL_OSFD_CNT = 0; + + return 0; +} + +ST_HIDDEN int _st_poll_pollset_add(struct pollfd *pds, int npds) +{ + struct pollfd *pd; + struct pollfd *epd = pds + npds; + + for (pd = pds; pd < epd; pd++) { + if (pd->fd < 0 || !pd->events) { + errno = EINVAL; + return -1; + } + } + + _ST_POLL_OSFD_CNT += npds; + + return 0; +} + +/* ARGSUSED */ +ST_HIDDEN void _st_poll_pollset_del(struct pollfd *pds, int npds) +{ + _ST_POLL_OSFD_CNT -= npds; + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); +} + +ST_HIDDEN void _st_poll_dispatch(void) +{ + int timeout, nfd; + _st_clist_t *q; + st_utime_t min_timeout; + _st_pollq_t *pq; + struct pollfd *pds, *epds, *pollfds; + + /* + * Build up the array of struct pollfd to wait on. + * If existing array is not big enough, release it and allocate a new one. + */ + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); + if (_ST_POLL_OSFD_CNT > _ST_POLLFDS_SIZE) { + free(_ST_POLLFDS); + _ST_POLLFDS = (struct pollfd *) malloc((_ST_POLL_OSFD_CNT + 10) * + sizeof(struct pollfd)); + ST_ASSERT(_ST_POLLFDS != NULL); + _ST_POLLFDS_SIZE = _ST_POLL_OSFD_CNT + 10; + } + pollfds = _ST_POLLFDS; + + /* Gather all descriptors into one array */ + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + memcpy(pollfds, pq->pds, sizeof(struct pollfd) * pq->npds); + pollfds += pq->npds; + } + ST_ASSERT(pollfds <= _ST_POLLFDS + _ST_POLLFDS_SIZE); + + if (_ST_SLEEPQ == NULL) { + timeout = -1; + } else { + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); + timeout = (int) (min_timeout / 1000); + } + + /* Check for I/O operations */ + nfd = poll(_ST_POLLFDS, _ST_POLL_OSFD_CNT, timeout); + + /* Notify threads that are associated with the selected descriptors */ + if (nfd > 0) { + pollfds = _ST_POLLFDS; + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + epds = pollfds + pq->npds; + for (pds = pollfds; pds < epds; pds++) { + if (pds->revents) + break; + } + if (pds < epds) { + memcpy(pq->pds, pollfds, sizeof(struct pollfd) * pq->npds); + ST_REMOVE_LINK(&pq->links); + pq->on_ioq = 0; + + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(pq->thread); + pq->thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(pq->thread); + + _ST_POLL_OSFD_CNT -= pq->npds; + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); + } + pollfds = epds; + } + } +} + +/* ARGSUSED */ +ST_HIDDEN int _st_poll_fd_new(int osfd) +{ + return 0; +} + +/* ARGSUSED */ +ST_HIDDEN int _st_poll_fd_close(int osfd) +{ + /* + * We don't maintain I/O counts for poll event system + * so nothing to check here. + */ + return 0; +} + +ST_HIDDEN int _st_poll_fd_getlimit(void) +{ + /* zero means no specific limit */ + return 0; +} + +static _st_eventsys_t _st_poll_eventsys = { + "poll", + ST_EVENTSYS_POLL, + _st_poll_init, + _st_poll_dispatch, + _st_poll_pollset_add, + _st_poll_pollset_del, + _st_poll_fd_new, + _st_poll_fd_close, + _st_poll_fd_getlimit +}; +#endif /* MD_HAVE_POLL */ + + +#ifdef MD_HAVE_KQUEUE +/***************************************** + * kqueue event system + */ + +ST_HIDDEN int _st_kq_init(void) +{ + int err = 0; + int rv = 0; + + _st_kq_data = (struct _st_kqdata *) calloc(1, sizeof(*_st_kq_data)); + if (!_st_kq_data) + return -1; + + if ((_st_kq_data->kq = kqueue()) < 0) { + err = errno; + rv = -1; + goto cleanup_kq; + } + fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC); + _st_kq_data->pid = getpid(); + + /* + * Allocate file descriptor data array. + * FD_SETSIZE looks like good initial size. + */ + _st_kq_data->fd_data_size = FD_SETSIZE; + _st_kq_data->fd_data = (_kq_fd_data_t *)calloc(_st_kq_data->fd_data_size, + sizeof(_kq_fd_data_t)); + if (!_st_kq_data->fd_data) { + err = errno; + rv = -1; + goto cleanup_kq; + } + + /* Allocate event lists */ + _st_kq_data->evtlist_size = ST_KQ_MIN_EVTLIST_SIZE; + _st_kq_data->evtlist = (struct kevent *)malloc(_st_kq_data->evtlist_size * + sizeof(struct kevent)); + _st_kq_data->addlist_size = ST_KQ_MIN_EVTLIST_SIZE; + _st_kq_data->addlist = (struct kevent *)malloc(_st_kq_data->addlist_size * + sizeof(struct kevent)); + _st_kq_data->dellist_size = ST_KQ_MIN_EVTLIST_SIZE; + _st_kq_data->dellist = (struct kevent *)malloc(_st_kq_data->dellist_size * + sizeof(struct kevent)); + if (!_st_kq_data->evtlist || !_st_kq_data->addlist || + !_st_kq_data->dellist) { + err = ENOMEM; + rv = -1; + } + + cleanup_kq: + if (rv < 0) { + if (_st_kq_data->kq >= 0) + close(_st_kq_data->kq); + free(_st_kq_data->fd_data); + free(_st_kq_data->evtlist); + free(_st_kq_data->addlist); + free(_st_kq_data->dellist); + free(_st_kq_data); + _st_kq_data = NULL; + errno = err; + } + + return rv; +} + +ST_HIDDEN int _st_kq_fd_data_expand(int maxfd) +{ + _kq_fd_data_t *ptr; + int n = _st_kq_data->fd_data_size; + + while (maxfd >= n) + n <<= 1; + + ptr = (_kq_fd_data_t *)realloc(_st_kq_data->fd_data, + n * sizeof(_kq_fd_data_t)); + if (!ptr) + return -1; + + memset(ptr + _st_kq_data->fd_data_size, 0, + (n - _st_kq_data->fd_data_size) * sizeof(_kq_fd_data_t)); + + _st_kq_data->fd_data = ptr; + _st_kq_data->fd_data_size = n; + + return 0; +} + +ST_HIDDEN int _st_kq_addlist_expand(int avail) +{ + struct kevent *ptr; + int n = _st_kq_data->addlist_size; + + while (avail > n - _st_kq_data->addlist_cnt) + n <<= 1; + + ptr = (struct kevent *)realloc(_st_kq_data->addlist, + n * sizeof(struct kevent)); + if (!ptr) + return -1; + + _st_kq_data->addlist = ptr; + _st_kq_data->addlist_size = n; + + /* + * Try to expand the result event list too + * (although we don't have to do it). + */ + ptr = (struct kevent *)realloc(_st_kq_data->evtlist, + n * sizeof(struct kevent)); + if (ptr) { + _st_kq_data->evtlist = ptr; + _st_kq_data->evtlist_size = n; + } + + return 0; +} + +ST_HIDDEN void _st_kq_addlist_add(const struct kevent *kev) +{ + ST_ASSERT(_st_kq_data->addlist_cnt < _st_kq_data->addlist_size); + memcpy(_st_kq_data->addlist + _st_kq_data->addlist_cnt, kev, + sizeof(struct kevent)); + _st_kq_data->addlist_cnt++; +} + +ST_HIDDEN void _st_kq_dellist_add(const struct kevent *kev) +{ + int n = _st_kq_data->dellist_size; + + if (_st_kq_data->dellist_cnt >= n) { + struct kevent *ptr; + + n <<= 1; + ptr = (struct kevent *)realloc(_st_kq_data->dellist, + n * sizeof(struct kevent)); + if (!ptr) { + /* See comment in _st_kq_pollset_del() */ + return; + } + + _st_kq_data->dellist = ptr; + _st_kq_data->dellist_size = n; + } + + memcpy(_st_kq_data->dellist + _st_kq_data->dellist_cnt, kev, + sizeof(struct kevent)); + _st_kq_data->dellist_cnt++; +} + +ST_HIDDEN int _st_kq_pollset_add(struct pollfd *pds, int npds) +{ + struct kevent kev; + struct pollfd *pd; + struct pollfd *epd = pds + npds; + + /* + * Pollset adding is "atomic". That is, either it succeeded for + * all descriptors in the set or it failed. It means that we + * need to do all the checks up front so we don't have to + * "unwind" if adding of one of the descriptors failed. + */ + for (pd = pds; pd < epd; pd++) { + /* POLLIN and/or POLLOUT must be set, but nothing else */ + if (pd->fd < 0 || !pd->events || (pd->events & ~(POLLIN | POLLOUT))) { + errno = EINVAL; + return -1; + } + if (pd->fd >= _st_kq_data->fd_data_size && + _st_kq_fd_data_expand(pd->fd) < 0) + return -1; + } + + /* + * Make sure we have enough room in the addlist for twice as many + * descriptors as in the pollset (for both READ and WRITE filters). + */ + npds <<= 1; + if (npds > _st_kq_data->addlist_size - _st_kq_data->addlist_cnt && + _st_kq_addlist_expand(npds) < 0) + return -1; + + for (pd = pds; pd < epd; pd++) { + if ((pd->events & POLLIN) && (_ST_KQ_READ_CNT(pd->fd)++ == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = pd->fd; + kev.filter = EVFILT_READ; +#ifdef NOTE_EOF + /* Make it behave like select() and poll() */ + kev.fflags = NOTE_EOF; +#endif + kev.flags = (EV_ADD | EV_ONESHOT); + _st_kq_addlist_add(&kev); + } + if ((pd->events & POLLOUT) && (_ST_KQ_WRITE_CNT(pd->fd)++ == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = pd->fd; + kev.filter = EVFILT_WRITE; + kev.flags = (EV_ADD | EV_ONESHOT); + _st_kq_addlist_add(&kev); + } + } + + return 0; +} + +ST_HIDDEN void _st_kq_pollset_del(struct pollfd *pds, int npds) +{ + struct kevent kev; + struct pollfd *pd; + struct pollfd *epd = pds + npds; + + /* + * It's OK if deleting fails because a descriptor will either be + * closed or fire only once (we set EV_ONESHOT flag). + */ + _st_kq_data->dellist_cnt = 0; + for (pd = pds; pd < epd; pd++) { + if ((pd->events & POLLIN) && (--_ST_KQ_READ_CNT(pd->fd) == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = pd->fd; + kev.filter = EVFILT_READ; + kev.flags = EV_DELETE; + _st_kq_dellist_add(&kev); + } + if ((pd->events & POLLOUT) && (--_ST_KQ_WRITE_CNT(pd->fd) == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = pd->fd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_DELETE; + _st_kq_dellist_add(&kev); + } + } + + if (_st_kq_data->dellist_cnt > 0) { + /* + * We do "synchronous" kqueue deletes to avoid deleting + * closed descriptors and other possible problems. + */ + int rv; + do { + /* This kevent() won't block since result list size is 0 */ + rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, + _st_kq_data->dellist_cnt, NULL, 0, NULL); + } while (rv < 0 && errno == EINTR); + } +} + +ST_HIDDEN void _st_kq_dispatch(void) +{ + struct timespec timeout, *tsp; + struct kevent kev; + st_utime_t min_timeout; + _st_clist_t *q; + _st_pollq_t *pq; + struct pollfd *pds, *epds; + int nfd, i, osfd, notify, filter; + short events, revents; + + if (_ST_SLEEPQ == NULL) { + tsp = NULL; + } else { + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); + timeout.tv_sec = (time_t) (min_timeout / 1000000); + timeout.tv_nsec = (long) ((min_timeout % 1000000) * 1000); + tsp = &timeout; + } + + retry_kevent: + /* Check for I/O operations */ + nfd = kevent(_st_kq_data->kq, + _st_kq_data->addlist, _st_kq_data->addlist_cnt, + _st_kq_data->evtlist, _st_kq_data->evtlist_size, tsp); + + _st_kq_data->addlist_cnt = 0; + + if (nfd > 0) { + for (i = 0; i < nfd; i++) { + osfd = _st_kq_data->evtlist[i].ident; + filter = _st_kq_data->evtlist[i].filter; + + if (filter == EVFILT_READ) { + _ST_KQ_REVENTS(osfd) |= POLLIN; + } else if (filter == EVFILT_WRITE) { + _ST_KQ_REVENTS(osfd) |= POLLOUT; + } + if (_st_kq_data->evtlist[i].flags & EV_ERROR) { + if (_st_kq_data->evtlist[i].data == EBADF) { + _ST_KQ_REVENTS(osfd) |= POLLNVAL; + } else { + _ST_KQ_REVENTS(osfd) |= POLLERR; + } + } + } + + _st_kq_data->dellist_cnt = 0; + + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + notify = 0; + epds = pq->pds + pq->npds; + + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + events = pds->events; + revents = (short)(_ST_KQ_REVENTS(osfd) & ~(POLLIN | POLLOUT)); + if ((events & POLLIN) && (_ST_KQ_REVENTS(osfd) & POLLIN)) { + revents |= POLLIN; + } + if ((events & POLLOUT) && (_ST_KQ_REVENTS(osfd) & POLLOUT)) { + revents |= POLLOUT; + } + pds->revents = revents; + if (revents) { + notify = 1; + } + } + if (notify) { + ST_REMOVE_LINK(&pq->links); + pq->on_ioq = 0; + for (pds = pq->pds; pds < epds; pds++) { + osfd = pds->fd; + events = pds->events; + /* + * We set EV_ONESHOT flag so we only need to delete + * descriptor if it didn't fire. + */ + if ((events & POLLIN) && (--_ST_KQ_READ_CNT(osfd) == 0) && + ((_ST_KQ_REVENTS(osfd) & POLLIN) == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = osfd; + kev.filter = EVFILT_READ; + kev.flags = EV_DELETE; + _st_kq_dellist_add(&kev); + } + if ((events & POLLOUT) && (--_ST_KQ_WRITE_CNT(osfd) == 0) + && ((_ST_KQ_REVENTS(osfd) & POLLOUT) == 0)) { + memset(&kev, 0, sizeof(kev)); + kev.ident = osfd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_DELETE; + _st_kq_dellist_add(&kev); + } + } + + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(pq->thread); + pq->thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(pq->thread); + } + } + + if (_st_kq_data->dellist_cnt > 0) { + int rv; + do { + /* This kevent() won't block since result list size is 0 */ + rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, + _st_kq_data->dellist_cnt, NULL, 0, NULL); + } while (rv < 0 && errno == EINTR); + } + + for (i = 0; i < nfd; i++) { + osfd = _st_kq_data->evtlist[i].ident; + _ST_KQ_REVENTS(osfd) = 0; + } + + } else if (nfd < 0) { + if (errno == EBADF && _st_kq_data->pid != getpid()) { + /* We probably forked, reinitialize kqueue */ + if ((_st_kq_data->kq = kqueue()) < 0) { + /* There is nothing we can do here, will retry later */ + return; + } + fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC); + _st_kq_data->pid = getpid(); + /* Re-register all descriptors on ioq with new kqueue */ + memset(_st_kq_data->fd_data, 0, + _st_kq_data->fd_data_size * sizeof(_kq_fd_data_t)); + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + _st_kq_pollset_add(pq->pds, pq->npds); + } + goto retry_kevent; + } + } +} + +ST_HIDDEN int _st_kq_fd_new(int osfd) +{ + if (osfd >= _st_kq_data->fd_data_size && _st_kq_fd_data_expand(osfd) < 0) + return -1; + + return 0; +} + +ST_HIDDEN int _st_kq_fd_close(int osfd) +{ + if (_ST_KQ_READ_CNT(osfd) || _ST_KQ_WRITE_CNT(osfd)) { + errno = EBUSY; + return -1; + } + + return 0; +} + +ST_HIDDEN int _st_kq_fd_getlimit(void) +{ + /* zero means no specific limit */ + return 0; +} + +static _st_eventsys_t _st_kq_eventsys = { + "kqueue", + ST_EVENTSYS_ALT, + _st_kq_init, + _st_kq_dispatch, + _st_kq_pollset_add, + _st_kq_pollset_del, + _st_kq_fd_new, + _st_kq_fd_close, + _st_kq_fd_getlimit +}; +#endif /* MD_HAVE_KQUEUE */ + + +#ifdef MD_HAVE_EPOLL +/***************************************** + * epoll event system + */ + +ST_HIDDEN int _st_epoll_init(void) +{ + int fdlim; + int err = 0; + int rv = 0; + + _st_epoll_data = + (struct _st_epolldata *) calloc(1, sizeof(*_st_epoll_data)); + if (!_st_epoll_data) + return -1; + + fdlim = st_getfdlimit(); + _st_epoll_data->fd_hint = (fdlim > 0 && fdlim < ST_EPOLL_EVTLIST_SIZE) ? + fdlim : ST_EPOLL_EVTLIST_SIZE; + + if ((_st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint)) < 0) { + err = errno; + rv = -1; + goto cleanup_epoll; + } + fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); + _st_epoll_data->pid = getpid(); + + /* Allocate file descriptor data array */ + _st_epoll_data->fd_data_size = _st_epoll_data->fd_hint; + _st_epoll_data->fd_data = + (_epoll_fd_data_t *)calloc(_st_epoll_data->fd_data_size, + sizeof(_epoll_fd_data_t)); + if (!_st_epoll_data->fd_data) { + err = errno; + rv = -1; + goto cleanup_epoll; + } + + /* Allocate event lists */ + _st_epoll_data->evtlist_size = _st_epoll_data->fd_hint; + _st_epoll_data->evtlist = + (struct epoll_event *)malloc(_st_epoll_data->evtlist_size * + sizeof(struct epoll_event)); + if (!_st_epoll_data->evtlist) { + err = errno; + rv = -1; + } + + cleanup_epoll: + if (rv < 0) { + if (_st_epoll_data->epfd >= 0) + close(_st_epoll_data->epfd); + free(_st_epoll_data->fd_data); + free(_st_epoll_data->evtlist); + free(_st_epoll_data); + _st_epoll_data = NULL; + errno = err; + } + + return rv; +} + +ST_HIDDEN int _st_epoll_fd_data_expand(int maxfd) +{ + _epoll_fd_data_t *ptr; + int n = _st_epoll_data->fd_data_size; + + while (maxfd >= n) + n <<= 1; + + ptr = (_epoll_fd_data_t *)realloc(_st_epoll_data->fd_data, + n * sizeof(_epoll_fd_data_t)); + if (!ptr) + return -1; + + memset(ptr + _st_epoll_data->fd_data_size, 0, + (n - _st_epoll_data->fd_data_size) * sizeof(_epoll_fd_data_t)); + + _st_epoll_data->fd_data = ptr; + _st_epoll_data->fd_data_size = n; + + return 0; +} + +ST_HIDDEN void _st_epoll_evtlist_expand(void) +{ + struct epoll_event *ptr; + int n = _st_epoll_data->evtlist_size; + + while (_st_epoll_data->evtlist_cnt > n) + n <<= 1; + + ptr = (struct epoll_event *)realloc(_st_epoll_data->evtlist, + n * sizeof(struct epoll_event)); + if (ptr) { + _st_epoll_data->evtlist = ptr; + _st_epoll_data->evtlist_size = n; + } +} + +ST_HIDDEN void _st_epoll_pollset_del(struct pollfd *pds, int npds) +{ + struct epoll_event ev; + struct pollfd *pd; + struct pollfd *epd = pds + npds; + int old_events, events, op; + + /* + * It's more or less OK if deleting fails because a descriptor + * will either be closed or deleted in dispatch function after + * it fires. + */ + for (pd = pds; pd < epd; pd++) { + old_events = _ST_EPOLL_EVENTS(pd->fd); + + if (pd->events & POLLIN) + _ST_EPOLL_READ_CNT(pd->fd)--; + if (pd->events & POLLOUT) + _ST_EPOLL_WRITE_CNT(pd->fd)--; + if (pd->events & POLLPRI) + _ST_EPOLL_EXCEP_CNT(pd->fd)--; + + events = _ST_EPOLL_EVENTS(pd->fd); + /* + * The _ST_EPOLL_REVENTS check below is needed so we can use + * this function inside dispatch(). Outside of dispatch() + * _ST_EPOLL_REVENTS is always zero for all descriptors. + */ + if (events != old_events && _ST_EPOLL_REVENTS(pd->fd) == 0) { + op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; + ev.events = events; + ev.data.fd = pd->fd; + if (epoll_ctl(_st_epoll_data->epfd, op, pd->fd, &ev) == 0 && + op == EPOLL_CTL_DEL) { + _st_epoll_data->evtlist_cnt--; + } + } + } +} + +ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds) +{ + struct epoll_event ev; + int i, fd; + int old_events, events, op; + + /* Do as many checks as possible up front */ + for (i = 0; i < npds; i++) { + fd = pds[i].fd; + if (fd < 0 || !pds[i].events || + (pds[i].events & ~(POLLIN | POLLOUT | POLLPRI))) { + errno = EINVAL; + return -1; + } + if (fd >= _st_epoll_data->fd_data_size && + _st_epoll_fd_data_expand(fd) < 0) + return -1; + } + + for (i = 0; i < npds; i++) { + fd = pds[i].fd; + old_events = _ST_EPOLL_EVENTS(fd); + + if (pds[i].events & POLLIN) + _ST_EPOLL_READ_CNT(fd)++; + if (pds[i].events & POLLOUT) + _ST_EPOLL_WRITE_CNT(fd)++; + if (pds[i].events & POLLPRI) + _ST_EPOLL_EXCEP_CNT(fd)++; + + events = _ST_EPOLL_EVENTS(fd); + if (events != old_events) { + op = old_events ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(_st_epoll_data->epfd, op, fd, &ev) < 0 && + (op != EPOLL_CTL_ADD || errno != EEXIST)) + break; + if (op == EPOLL_CTL_ADD) { + _st_epoll_data->evtlist_cnt++; + if (_st_epoll_data->evtlist_cnt > _st_epoll_data->evtlist_size) + _st_epoll_evtlist_expand(); + } + } + } + + if (i < npds) { + /* Error */ + int err = errno; + /* Unroll the state */ + _st_epoll_pollset_del(pds, i + 1); + errno = err; + return -1; + } + + return 0; +} + +ST_HIDDEN void _st_epoll_dispatch(void) +{ + st_utime_t min_timeout; + _st_clist_t *q; + _st_pollq_t *pq; + struct pollfd *pds, *epds; + struct epoll_event ev; + int timeout, nfd, i, osfd, notify; + int events, op; + short revents; + + if (_ST_SLEEPQ == NULL) { + timeout = -1; + } else { + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); + timeout = (int) (min_timeout / 1000); + } + + if (_st_epoll_data->pid != getpid()) { + /* We probably forked, reinitialize epoll set */ + close(_st_epoll_data->epfd); + _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); + if (_st_epoll_data->epfd < 0) { + /* There is nothing we can do here, will retry later */ + return; + } + fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); + _st_epoll_data->pid = getpid(); + + /* Put all descriptors on ioq into new epoll set */ + memset(_st_epoll_data->fd_data, 0, + _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); + _st_epoll_data->evtlist_cnt = 0; + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + _st_epoll_pollset_add(pq->pds, pq->npds); + } + } + + /* Check for I/O operations */ + nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, + _st_epoll_data->evtlist_size, timeout); + + if (nfd > 0) { + for (i = 0; i < nfd; i++) { + osfd = _st_epoll_data->evtlist[i].data.fd; + _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; + if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { + /* Also set I/O bits on error */ + _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); + } + } + + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { + pq = _ST_POLLQUEUE_PTR(q); + notify = 0; + epds = pq->pds + pq->npds; + + for (pds = pq->pds; pds < epds; pds++) { + if (_ST_EPOLL_REVENTS(pds->fd) == 0) { + pds->revents = 0; + continue; + } + osfd = pds->fd; + events = pds->events; + revents = 0; + if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) + revents |= POLLIN; + if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) + revents |= POLLOUT; + if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) + revents |= POLLPRI; + if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) + revents |= POLLERR; + if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) + revents |= POLLHUP; + + pds->revents = revents; + if (revents) { + notify = 1; + } + } + if (notify) { + ST_REMOVE_LINK(&pq->links); + pq->on_ioq = 0; + /* + * Here we will only delete/modify descriptors that + * didn't fire (see comments in _st_epoll_pollset_del()). + */ + _st_epoll_pollset_del(pq->pds, pq->npds); + + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(pq->thread); + pq->thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(pq->thread); + } + } + + for (i = 0; i < nfd; i++) { + /* Delete/modify descriptors that fired */ + osfd = _st_epoll_data->evtlist[i].data.fd; + _ST_EPOLL_REVENTS(osfd) = 0; + events = _ST_EPOLL_EVENTS(osfd); + op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; + ev.events = events; + ev.data.fd = osfd; + if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && + op == EPOLL_CTL_DEL) { + _st_epoll_data->evtlist_cnt--; + } + } + } +} + +ST_HIDDEN int _st_epoll_fd_new(int osfd) +{ + if (osfd >= _st_epoll_data->fd_data_size && + _st_epoll_fd_data_expand(osfd) < 0) + return -1; + + return 0; +} + +ST_HIDDEN int _st_epoll_fd_close(int osfd) +{ + if (_ST_EPOLL_READ_CNT(osfd) || _ST_EPOLL_WRITE_CNT(osfd) || + _ST_EPOLL_EXCEP_CNT(osfd)) { + errno = EBUSY; + return -1; + } + + return 0; +} + +ST_HIDDEN int _st_epoll_fd_getlimit(void) +{ + /* zero means no specific limit */ + return 0; +} + +/* + * Check if epoll functions are just stubs. + */ +ST_HIDDEN int _st_epoll_is_supported(void) +{ + struct epoll_event ev; + + ev.events = EPOLLIN; + ev.data.ptr = NULL; + /* Guaranteed to fail */ + epoll_ctl(-1, EPOLL_CTL_ADD, -1, &ev); + + return (errno != ENOSYS); +} + +static _st_eventsys_t _st_epoll_eventsys = { + "epoll", + ST_EVENTSYS_ALT, + _st_epoll_init, + _st_epoll_dispatch, + _st_epoll_pollset_add, + _st_epoll_pollset_del, + _st_epoll_fd_new, + _st_epoll_fd_close, + _st_epoll_fd_getlimit +}; +#endif /* MD_HAVE_EPOLL */ + + +/***************************************** + * Public functions + */ + +int st_set_eventsys(int eventsys) +{ + if (_st_eventsys) { + errno = EBUSY; + return -1; + } + + switch (eventsys) { + case ST_EVENTSYS_DEFAULT: +#ifdef USE_POLL + _st_eventsys = &_st_poll_eventsys; +#else + _st_eventsys = &_st_select_eventsys; +#endif + break; + case ST_EVENTSYS_SELECT: + _st_eventsys = &_st_select_eventsys; + break; +#ifdef MD_HAVE_POLL + case ST_EVENTSYS_POLL: + _st_eventsys = &_st_poll_eventsys; + break; +#endif + case ST_EVENTSYS_ALT: +#if defined (MD_HAVE_KQUEUE) + _st_eventsys = &_st_kq_eventsys; +#elif defined (MD_HAVE_EPOLL) + if (_st_epoll_is_supported()) + _st_eventsys = &_st_epoll_eventsys; +#endif + break; + default: + errno = EINVAL; + return -1; + } + + return 0; +} + +int st_get_eventsys(void) +{ + return _st_eventsys ? _st_eventsys->val : -1; +} + +const char *st_get_eventsys_name(void) +{ + return _st_eventsys ? _st_eventsys->name : ""; +} + diff --git a/trunk/research/st-1.9/examples/Makefile b/trunk/research/st-1.9/examples/Makefile new file mode 100644 index 000000000..31c0a6e24 --- /dev/null +++ b/trunk/research/st-1.9/examples/Makefile @@ -0,0 +1,115 @@ +# +# Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of Silicon Graphics, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +########################## +# Supported OSes: +# +# AIX +# FREEBSD +# HPUX +# HPUX_64 +# IRIX +# IRIX_64 +# LINUX +# LINUX_IA64 +# NETBSD +# OPENBSD +# OSF1 +# SOLARIS +# SOLARIS_64 + +########################## + +CC = cc + +SHELL = /bin/sh +ECHO = /bin/echo + +DEPTH = .. +BUILD = +TARGETDIR = + +DEFINES = +CFLAGS = +OTHER_FLAGS = + +OBJDIR = $(DEPTH)/$(TARGETDIR) +INCDIR = $(DEPTH)/$(TARGETDIR) +LIBST = $(OBJDIR)/libst.a +HEADER = $(INCDIR)/st.h + +LIBRESOLV = +EXTRALIBS = + +ifeq ($(OS),) +EXAMPLES = unknown +else +EXAMPLES = $(OBJDIR)/lookupdns $(OBJDIR)/proxy $(OBJDIR)/server +endif + + +########################## +# Platform section. +# + +ifeq (DARWIN, $(findstring DARWIN, $(OS))) +LIBRESOLV = -lresolv +endif + +ifeq (LINUX, $(findstring LINUX, $(OS))) +LIBRESOLV = -lresolv +endif + +ifeq (SOLARIS, $(findstring SOLARIS, $(OS))) +LIBRESOLV = -lresolv +EXTRALIBS = -lsocket -lnsl +endif + +# +# End of platform section. +########################## + + +all: $(EXAMPLES) + +$(OBJDIR)/lookupdns: lookupdns.c $(OBJDIR)/res.o $(LIBST) $(HEADER) + $(CC) $(CFLAGS) -I$(INCDIR) lookupdns.c $(OBJDIR)/res.o $(LIBST) $(LIBRESOLV) $(EXTRALIBS) -o $@ + +$(OBJDIR)/proxy: proxy.c $(LIBST) $(HEADER) + $(CC) $(CFLAGS) -I$(INCDIR) proxy.c $(LIBST) $(EXTRALIBS) -o $@ + +$(OBJDIR)/server: server.c $(OBJDIR)/error.o $(LIBST) $(HEADER) + $(CC) $(CFLAGS) -I$(INCDIR) server.c $(OBJDIR)/error.o $(LIBST) $(EXTRALIBS) -o $@ + +$(OBJDIR)/%.o: %.c + $(CC) $(CFLAGS) -I$(INCDIR) -c $< -o $@ + +.DEFAULT: + @cd $(DEPTH); $(MAKE) $@ + diff --git a/trunk/research/st-1.9/examples/README b/trunk/research/st-1.9/examples/README new file mode 100644 index 000000000..646d4f623 --- /dev/null +++ b/trunk/research/st-1.9/examples/README @@ -0,0 +1,98 @@ +Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. +All Rights Reserved. + + +This directory contains three example programs. + + +--------------------------------------------------------------------------- + +PROGRAM + + lookupdns + +FILES + + lookupdns.c + res.c + +USAGE + + lookupdns [] ... + +DESCRIPTION + + This program performs asynchronous DNS host name resolution and reports + IP address for each specified as a command line argument. + One ST thread is created for each host name. All threads do host name + resolution concurrently. + + +--------------------------------------------------------------------------- + +PROGRAM + + proxy + +FILES + + proxy.c + +USAGE + + proxy -l -r [-p ] [-S] + + -l bind to local address specified as []: + -r connect to remote address specified as : + -p create specified number of processes + -S serialize accept() calls from different processes + on the same listening socket (if needed). + +DESCRIPTION + + This program acts as a generic gateway. It listens for connections to a + local address. Upon accepting a client connection, it connects to the + specified remote address and then just pumps the data through without any + modification. + + +--------------------------------------------------------------------------- + +PROGRAM + + server + +FILES + + server.c + error.c + +USAGE + + server -l [] + + -l open all log files in specified directory. + + Possible options: + + -b : bind to specified address (multiple addresses + are permitted) + -p create specified number of processes + -t : specify thread limits per listening socket + across all processes + -u change server's user id to specified value + -q set max length of pending connections queue + -a enable access logging + -i run in interactive mode (useful for debugging) + -S serialize accept() calls from different processes + on the same listening socket (if needed). + +DESCRIPTION + + This program is a general server example. It accepts a client connection + and outputs a short HTML page. It can be easily adapted to provide + other services. + + +--------------------------------------------------------------------------- + diff --git a/trunk/research/st-1.9/examples/error.c b/trunk/research/st-1.9/examples/error.c new file mode 100644 index 000000000..0b2e77287 --- /dev/null +++ b/trunk/research/st-1.9/examples/error.c @@ -0,0 +1,168 @@ +/* + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include "st.h" + +/* + * Simple error reporting functions. + * Suggested in W. Richard Stevens' "Advanced Programming in UNIX + * Environment". + */ + +#define MAXLINE 4096 /* max line length */ + +static void err_doit(int, int, const char *, va_list); + + +/* + * Nonfatal error related to a system call. + * Print a message and return. + */ +void err_sys_report(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_doit(fd, 1, fmt, ap); + va_end(ap); +} + + +/* + * Fatal error related to a system call. + * Print a message and terminate. + */ +void err_sys_quit(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_doit(fd, 1, fmt, ap); + va_end(ap); + exit(1); +} + + +/* + * Fatal error related to a system call. + * Print a message, dump core, and terminate. + */ +void err_sys_dump(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_doit(fd, 1, fmt, ap); + va_end(ap); + abort(); /* dump core and terminate */ + exit(1); /* shouldn't get here */ +} + + +/* + * Nonfatal error unrelated to a system call. + * Print a message and return. + */ +void err_report(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_doit(fd, 0, fmt, ap); + va_end(ap); +} + + +/* + * Fatal error unrelated to a system call. + * Print a message and terminate. + */ +void err_quit(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + err_doit(fd, 0, fmt, ap); + va_end(ap); + exit(1); +} + + +/* + * Return a pointer to a string containing current time. + */ +char *err_tstamp(void) +{ + static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + static char str[32]; + static time_t lastt = 0; + struct tm *tmp; + time_t currt = st_time(); + + if (currt == lastt) + return str; + + tmp = localtime(&currt); + sprintf(str, "[%02d/%s/%d:%02d:%02d:%02d] ", tmp->tm_mday, + months[tmp->tm_mon], 1900 + tmp->tm_year, tmp->tm_hour, + tmp->tm_min, tmp->tm_sec); + lastt = currt; + + return str; +} + + +/* + * Print a message and return to caller. + * Caller specifies "errnoflag". + */ +static void err_doit(int fd, int errnoflag, const char *fmt, va_list ap) +{ + int errno_save; + char buf[MAXLINE]; + + errno_save = errno; /* value caller might want printed */ + strcpy(buf, err_tstamp()); /* prepend a message with time stamp */ + vsprintf(buf + strlen(buf), fmt, ap); + if (errnoflag) + sprintf(buf + strlen(buf), ": %s\n", strerror(errno_save)); + else + strcat(buf, "\n"); + write(fd, buf, strlen(buf)); + errno = errno_save; +} + diff --git a/trunk/research/st-1.9/examples/lookupdns.c b/trunk/research/st-1.9/examples/lookupdns.c new file mode 100644 index 000000000..98f6ec5d8 --- /dev/null +++ b/trunk/research/st-1.9/examples/lookupdns.c @@ -0,0 +1,103 @@ +/* + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "st.h" + +#if !defined(NETDB_INTERNAL) && defined(h_NETDB_INTERNAL) +#define NETDB_INTERNAL h_NETDB_INTERNAL +#endif + +/* Resolution timeout (in microseconds) */ +#define TIMEOUT (2*1000000LL) + +/* External function defined in the res.c file */ +int dns_getaddr(const char *host, struct in_addr *addr, st_utime_t timeout); + + +void *do_resolve(void *host) +{ + struct in_addr addr; + + /* Use dns_getaddr() instead of gethostbyname(3) to get IP address */ + if (dns_getaddr(host, &addr, TIMEOUT) < 0) { + fprintf(stderr, "dns_getaddr: can't resolve %s: ", (char *)host); + if (h_errno == NETDB_INTERNAL) + perror(""); + else + herror(""); + } else + printf("%-40s %s\n", (char *)host, inet_ntoa(addr)); + + return NULL; +} + + +/* + * Asynchronous DNS host name resolution. This program creates one + * ST thread for each host name (specified as command line arguments). + * All threads do host name resolution concurrently. + */ +int main(int argc, char *argv[]) +{ + int i; + + if (argc < 2) { + fprintf(stderr, "Usage: %s [] ...\n", argv[0]); + exit(1); + } + + if (st_init() < 0) { + perror("st_init"); + exit(1); + } + + for (i = 1; i < argc; i++) { + /* Create a separate thread for each host name */ + if (st_thread_create(do_resolve, argv[i], 0, 0) == NULL) { + perror("st_thread_create"); + exit(1); + } + } + + st_thread_exit(NULL); + + /* NOTREACHED */ + return 1; +} + diff --git a/trunk/research/st-1.9/examples/proxy.c b/trunk/research/st-1.9/examples/proxy.c new file mode 100644 index 000000000..2f4636d6b --- /dev/null +++ b/trunk/research/st-1.9/examples/proxy.c @@ -0,0 +1,541 @@ +/* + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "st.h" + +#define IOBUFSIZE (16*1024) + +#define IOV_LEN 256 +#define IOV_COUNT (IOBUFSIZE / IOV_LEN) + +#ifndef INADDR_NONE +#define INADDR_NONE 0xffffffff +#endif + +static char *prog; /* Program name */ +static struct sockaddr_in rmt_addr; /* Remote address */ + +static unsigned long testing; +#define TESTING_VERBOSE 0x1 +#define TESTING_READV 0x2 +#define TESTING_READ_RESID 0x4 +#define TESTING_WRITEV 0x8 +#define TESTING_WRITE_RESID 0x10 + +static void read_address(const char *str, struct sockaddr_in *sin); +static void start_daemon(void); +static int cpu_count(void); +static void set_concurrency(int nproc); +static void *handle_request(void *arg); +static void print_sys_error(const char *msg); + + +/* + * This program acts as a generic gateway. It listens for connections + * to a local address ('-l' option). Upon accepting a client connection, + * it connects to the specified remote address ('-r' option) and then + * just pumps the data through without any modification. + */ +int main(int argc, char *argv[]) +{ + extern char *optarg; + int opt, sock, n; + int laddr, raddr, num_procs, alt_ev, one_process; + int serialize_accept = 0; + struct sockaddr_in lcl_addr, cli_addr; + st_netfd_t cli_nfd, srv_nfd; + + prog = argv[0]; + num_procs = laddr = raddr = alt_ev = one_process = 0; + + /* Parse arguments */ + while((opt = getopt(argc, argv, "l:r:p:Saht:X")) != EOF) { + switch (opt) { + case 'a': + alt_ev = 1; + break; + case 'l': + read_address(optarg, &lcl_addr); + laddr = 1; + break; + case 'r': + read_address(optarg, &rmt_addr); + if (rmt_addr.sin_addr.s_addr == INADDR_ANY) { + fprintf(stderr, "%s: invalid remote address: %s\n", prog, optarg); + exit(1); + } + raddr = 1; + break; + case 'p': + num_procs = atoi(optarg); + if (num_procs < 1) { + fprintf(stderr, "%s: invalid number of processes: %s\n", prog, optarg); + exit(1); + } + break; + case 'S': + /* + * Serialization decision is tricky on some platforms. For example, + * Solaris 2.6 and above has kernel sockets implementation, so supposedly + * there is no need for serialization. The ST library may be compiled + * on one OS version, but used on another, so the need for serialization + * should be determined at run time by the application. Since it's just + * an example, the serialization decision is left up to user. + * Only on platforms where the serialization is never needed on any OS + * version st_netfd_serialize_accept() is a no-op. + */ + serialize_accept = 1; + break; + case 't': + testing = strtoul(optarg, NULL, 0); + break; + case 'X': + one_process = 1; + break; + case 'h': + case '?': + fprintf(stderr, "Usage: %s [options] -l <[host]:port> -r \n", + prog); + fprintf(stderr, "options are:\n"); + fprintf(stderr, " -p number of parallel processes\n"); + fprintf(stderr, " -S serialize accepts\n"); + fprintf(stderr, " -a use alternate event system\n"); +#ifdef DEBUG + fprintf(stderr, " -t mask testing/debugging mode\n"); + fprintf(stderr, " -X one process, don't daemonize\n"); +#endif + exit(1); + } + } + if (!laddr) { + fprintf(stderr, "%s: local address required\n", prog); + exit(1); + } + if (!raddr) { + fprintf(stderr, "%s: remote address required\n", prog); + exit(1); + } + if (num_procs == 0) + num_procs = cpu_count(); + + fprintf(stderr, "%s: starting proxy daemon on %s:%d\n", prog, + inet_ntoa(lcl_addr.sin_addr), ntohs(lcl_addr.sin_port)); + + /* Start the daemon */ + if (one_process) + num_procs = 1; + else + start_daemon(); + + if (alt_ev) + st_set_eventsys(ST_EVENTSYS_ALT); + + /* Initialize the ST library */ + if (st_init() < 0) { + print_sys_error("st_init"); + exit(1); + } + + /* Create and bind listening socket */ + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { + print_sys_error("socket"); + exit(1); + } + n = 1; + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n)) < 0) { + print_sys_error("setsockopt"); + exit(1); + } + if (bind(sock, (struct sockaddr *)&lcl_addr, sizeof(lcl_addr)) < 0) { + print_sys_error("bind"); + exit(1); + } + listen(sock, 128); + if ((srv_nfd = st_netfd_open_socket(sock)) == NULL) { + print_sys_error("st_netfd_open"); + exit(1); + } + /* See the comment regarding serialization decision above */ + if (num_procs > 1 && serialize_accept && st_netfd_serialize_accept(srv_nfd) + < 0) { + print_sys_error("st_netfd_serialize_accept"); + exit(1); + } + + /* Start server processes */ + if (!one_process) + set_concurrency(num_procs); + + for ( ; ; ) { + n = sizeof(cli_addr); + cli_nfd = st_accept(srv_nfd, (struct sockaddr *)&cli_addr, &n, + ST_UTIME_NO_TIMEOUT); + if (cli_nfd == NULL) { + print_sys_error("st_accept"); + exit(1); + } + if (st_thread_create(handle_request, cli_nfd, 0, 0) == NULL) { + print_sys_error("st_thread_create"); + exit(1); + } + } + + /* NOTREACHED */ + return 1; +} + + +static void read_address(const char *str, struct sockaddr_in *sin) +{ + char host[128], *p; + struct hostent *hp; + unsigned short port; + + strcpy(host, str); + if ((p = strchr(host, ':')) == NULL) { + fprintf(stderr, "%s: invalid address: %s\n", prog, host); + exit(1); + } + *p++ = '\0'; + port = (unsigned short) atoi(p); + if (port < 1) { + fprintf(stderr, "%s: invalid port: %s\n", prog, p); + exit(1); + } + + memset(sin, 0, sizeof(struct sockaddr_in)); + sin->sin_family = AF_INET; + sin->sin_port = htons(port); + if (host[0] == '\0') { + sin->sin_addr.s_addr = INADDR_ANY; + return; + } + sin->sin_addr.s_addr = inet_addr(host); + if (sin->sin_addr.s_addr == INADDR_NONE) { + /* not dotted-decimal */ + if ((hp = gethostbyname(host)) == NULL) { + fprintf(stderr, "%s: can't resolve address: %s\n", prog, host); + exit(1); + } + memcpy(&sin->sin_addr, hp->h_addr, hp->h_length); + } +} + +#ifdef DEBUG +static void show_iov(const struct iovec *iov, int niov) +{ + int i; + size_t total; + + printf("iov %p has %d entries:\n", iov, niov); + total = 0; + for (i = 0; i < niov; i++) { + printf("iov[%3d] iov_base=%p iov_len=0x%lx(%lu)\n", + i, iov[i].iov_base, (unsigned long) iov[i].iov_len, + (unsigned long) iov[i].iov_len); + total += iov[i].iov_len; + } + printf("total 0x%lx(%ld)\n", (unsigned long) total, (unsigned long) total); +} + +/* + * This version is tricked out to test all the + * st_(read|write)v?(_resid)? variants. Use the non-DEBUG version for + * anything serious. st_(read|write) are all this function really + * needs. + */ +static int pass(st_netfd_t in, st_netfd_t out) +{ + char buf[IOBUFSIZE]; + struct iovec iov[IOV_COUNT]; + int ioviter, nw, nr; + + if (testing & TESTING_READV) { + for (ioviter = 0; ioviter < IOV_COUNT; ioviter++) { + iov[ioviter].iov_base = &buf[ioviter * IOV_LEN]; + iov[ioviter].iov_len = IOV_LEN; + } + if (testing & TESTING_VERBOSE) { + printf("readv(%p)...\n", in); + show_iov(iov, IOV_COUNT); + } + if (testing & TESTING_READ_RESID) { + struct iovec *riov = iov; + int riov_cnt = IOV_COUNT; + if (st_readv_resid(in, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) { + if (testing & TESTING_VERBOSE) { + printf("resid\n"); + show_iov(riov, riov_cnt); + printf("full\n"); + show_iov(iov, IOV_COUNT); + } + nr = 0; + for (ioviter = 0; ioviter < IOV_COUNT; ioviter++) + nr += iov[ioviter].iov_len; + nr = IOBUFSIZE - nr; + } else + nr = -1; + } else + nr = (int) st_readv(in, iov, IOV_COUNT, ST_UTIME_NO_TIMEOUT); + } else { + if (testing & TESTING_READ_RESID) { + size_t resid = IOBUFSIZE; + if (st_read_resid(in, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0) + nr = IOBUFSIZE - resid; + else + nr = -1; + } else + nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT); + } + if (testing & TESTING_VERBOSE) + printf("got 0x%x(%d) E=%d\n", nr, nr, errno); + + if (nr <= 0) + return 0; + + if (testing & TESTING_WRITEV) { + for (nw = 0, ioviter = 0; nw < nr; + nw += iov[ioviter].iov_len, ioviter++) { + iov[ioviter].iov_base = &buf[nw]; + iov[ioviter].iov_len = nr - nw; + if (iov[ioviter].iov_len > IOV_LEN) + iov[ioviter].iov_len = IOV_LEN; + } + if (testing & TESTING_VERBOSE) { + printf("writev(%p)...\n", out); + show_iov(iov, ioviter); + } + if (testing & TESTING_WRITE_RESID) { + struct iovec *riov = iov; + int riov_cnt = ioviter; + if (st_writev_resid(out, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) { + if (testing & TESTING_VERBOSE) { + printf("resid\n"); + show_iov(riov, riov_cnt); + printf("full\n"); + show_iov(iov, ioviter); + } + nw = 0; + while (--ioviter >= 0) + nw += iov[ioviter].iov_len; + nw = nr - nw; + } else + nw = -1; + } else + nw = st_writev(out, iov, ioviter, ST_UTIME_NO_TIMEOUT); + } else { + if (testing & TESTING_WRITE_RESID) { + size_t resid = nr; + if (st_write_resid(out, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0) + nw = nr - resid; + else + nw = -1; + } else + nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT); + } + if (testing & TESTING_VERBOSE) + printf("put 0x%x(%d) E=%d\n", nw, nw, errno); + + if (nw != nr) + return 0; + + return 1; +} +#else /* DEBUG */ +/* + * This version is the simple one suitable for serious use. + */ +static int pass(st_netfd_t in, st_netfd_t out) +{ + char buf[IOBUFSIZE]; + int nw, nr; + + nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT); + if (nr <= 0) + return 0; + + nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT); + if (nw != nr) + return 0; + + return 1; +} +#endif + +static void *handle_request(void *arg) +{ + struct pollfd pds[2]; + st_netfd_t cli_nfd, rmt_nfd; + int sock; + + cli_nfd = (st_netfd_t) arg; + pds[0].fd = st_netfd_fileno(cli_nfd); + pds[0].events = POLLIN; + + /* Connect to remote host */ + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { + print_sys_error("socket"); + goto done; + } + if ((rmt_nfd = st_netfd_open_socket(sock)) == NULL) { + print_sys_error("st_netfd_open_socket"); + close(sock); + goto done; + } + if (st_connect(rmt_nfd, (struct sockaddr *)&rmt_addr, + sizeof(rmt_addr), ST_UTIME_NO_TIMEOUT) < 0) { + print_sys_error("st_connect"); + st_netfd_close(rmt_nfd); + goto done; + } + pds[1].fd = sock; + pds[1].events = POLLIN; + + /* + * Now just pump the data through. + * XXX This should use one thread for each direction for true full-duplex. + */ + for ( ; ; ) { + pds[0].revents = 0; + pds[1].revents = 0; + + if (st_poll(pds, 2, ST_UTIME_NO_TIMEOUT) <= 0) { + print_sys_error("st_poll"); + break; + } + + if (pds[0].revents & POLLIN) { + if (!pass(cli_nfd, rmt_nfd)) + break; + } + + if (pds[1].revents & POLLIN) { + if (!pass(rmt_nfd, cli_nfd)) + break; + } + } + st_netfd_close(rmt_nfd); + +done: + + st_netfd_close(cli_nfd); + + return NULL; +} + +static void start_daemon(void) +{ + pid_t pid; + + /* Start forking */ + if ((pid = fork()) < 0) { + print_sys_error("fork"); + exit(1); + } + if (pid > 0) + exit(0); /* parent */ + + /* First child process */ + setsid(); /* become session leader */ + + if ((pid = fork()) < 0) { + print_sys_error("fork"); + exit(1); + } + if (pid > 0) /* first child */ + exit(0); + + chdir("/"); + umask(022); +} + +/* + * Create separate processes ("virtual processors"). Since it's just an + * example, there is no watchdog - the parent just exits leaving children + * on their own. + */ +static void set_concurrency(int nproc) +{ + pid_t pid; + int i; + + if (nproc < 1) + nproc = 1; + + for (i = 0; i < nproc; i++) { + if ((pid = fork()) < 0) { + print_sys_error("fork"); + exit(1); + } + /* Child returns */ + if (pid == 0) + return; + } + + /* Parent just exits */ + exit(0); +} + +static int cpu_count(void) +{ + int n; + +#if defined (_SC_NPROCESSORS_ONLN) + n = (int) sysconf(_SC_NPROCESSORS_ONLN); +#elif defined (_SC_NPROC_ONLN) + n = (int) sysconf(_SC_NPROC_ONLN); +#elif defined (HPUX) +#include + n = mpctl(MPC_GETNUMSPUS, 0, 0); +#else + n = -1; + errno = ENOSYS; +#endif + + return n; +} + +static void print_sys_error(const char *msg) +{ + fprintf(stderr, "%s: %s: %s\n", prog, msg, strerror(errno)); +} + diff --git a/trunk/research/st-1.9/examples/res.c b/trunk/research/st-1.9/examples/res.c new file mode 100644 index 000000000..14ecd8c92 --- /dev/null +++ b/trunk/research/st-1.9/examples/res.c @@ -0,0 +1,305 @@ +/* + * Copyright (c) 1985, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined (DARWIN) +#define BIND_8_COMPAT +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "st.h" + +#define MAXPACKET 1024 + +#if !defined(NETDB_INTERNAL) && defined(h_NETDB_INTERNAL) +#define NETDB_INTERNAL h_NETDB_INTERNAL +#endif + +/* New in Solaris 7 */ +#if !defined(_getshort) && defined(ns_get16) +#define _getshort(cp) ns_get16(cp) +#endif + +typedef union { + HEADER hdr; + u_char buf[MAXPACKET]; +} querybuf_t; + + +static int parse_answer(querybuf_t *ans, int len, struct in_addr *addr) +{ + char buf[MAXPACKET]; + HEADER *ahp; + u_char *cp, *eoa; + int type, n; + + ahp = &ans->hdr; + eoa = ans->buf + len; + cp = ans->buf + sizeof(HEADER); + + while (ahp->qdcount > 0) { + ahp->qdcount--; + cp += dn_skipname(cp, eoa) + QFIXEDSZ; + } + while (ahp->ancount > 0 && cp < eoa) { + ahp->ancount--; + if ((n = dn_expand(ans->buf, eoa, cp, buf, sizeof(buf))) < 0) + break; + cp += n; + type = _getshort(cp); + cp += 8; + n = _getshort(cp); + cp += 2; + if (type == T_CNAME) { + cp += n; + continue; + } + memcpy(addr, cp, n); + return 0; + } + + h_errno = TRY_AGAIN; + return -1; +} + + +static int query_domain(st_netfd_t nfd, const char *name, struct in_addr *addr, + st_utime_t timeout) +{ + querybuf_t qbuf; + u_char *buf = qbuf.buf; + HEADER *hp = &qbuf.hdr; + int blen = sizeof(qbuf); + int i, len, id; + + for (i = 0; i < _res.nscount; i++) { + len = res_mkquery(QUERY, name, C_IN, T_A, NULL, 0, NULL, buf, blen); + if (len <= 0) { + h_errno = NO_RECOVERY; + return -1; + } + id = hp->id; + + if (st_sendto(nfd, buf, len, (struct sockaddr *)&(_res.nsaddr_list[i]), + sizeof(struct sockaddr), timeout) != len) { + h_errno = NETDB_INTERNAL; + /* EINTR means interrupt by other thread, NOT by a caught signal */ + if (errno == EINTR) + return -1; + continue; + } + + /* Wait for reply */ + do { + len = st_recvfrom(nfd, buf, blen, NULL, NULL, timeout); + if (len <= 0) + break; + } while (id != hp->id); + + if (len < HFIXEDSZ) { + h_errno = NETDB_INTERNAL; + if (len >= 0) + errno = EMSGSIZE; + else if (errno == EINTR) /* see the comment above */ + return -1; + continue; + } + + hp->ancount = ntohs(hp->ancount); + hp->qdcount = ntohs(hp->qdcount); + if ((hp->rcode != NOERROR) || (hp->ancount == 0)) { + switch (hp->rcode) { + case NXDOMAIN: + h_errno = HOST_NOT_FOUND; + break; + case SERVFAIL: + h_errno = TRY_AGAIN; + break; + case NOERROR: + h_errno = NO_DATA; + break; + case FORMERR: + case NOTIMP: + case REFUSED: + default: + h_errno = NO_RECOVERY; + } + continue; + } + + if (parse_answer(&qbuf, len, addr) == 0) + return 0; + } + + return -1; +} + + +#define CLOSE_AND_RETURN(ret) \ + { \ + n = errno; \ + st_netfd_close(nfd); \ + errno = n; \ + return (ret); \ + } + + +int dns_getaddr(const char *host, struct in_addr *addr, st_utime_t timeout) +{ + char name[MAXDNAME], **domain; + const char *cp; + int s, n, maxlen, dots; + int trailing_dot, tried_as_is; + st_netfd_t nfd; + + if ((_res.options & RES_INIT) == 0 && res_init() == -1) { + h_errno = NETDB_INTERNAL; + return -1; + } + if (_res.options & RES_USEVC) { + h_errno = NETDB_INTERNAL; + errno = ENOSYS; + return -1; + } + if (!host || *host == '\0') { + h_errno = HOST_NOT_FOUND; + return -1; + } + + /* Create UDP socket */ + if ((s = socket(PF_INET, SOCK_DGRAM, 0)) < 0) { + h_errno = NETDB_INTERNAL; + return -1; + } + if ((nfd = st_netfd_open_socket(s)) == NULL) { + h_errno = NETDB_INTERNAL; + n = errno; + close(s); + errno = n; + return -1; + } + + maxlen = sizeof(name) - 1; + n = 0; + dots = 0; + trailing_dot = 0; + tried_as_is = 0; + + for (cp = host; *cp && n < maxlen; cp++) { + dots += (*cp == '.'); + name[n++] = *cp; + } + if (name[n - 1] == '.') + trailing_dot = 1; + + /* + * If there are dots in the name already, let's just give it a try + * 'as is'. The threshold can be set with the "ndots" option. + */ + if (dots >= _res.ndots) { + if (query_domain(nfd, host, addr, timeout) == 0) + CLOSE_AND_RETURN(0); + if (h_errno == NETDB_INTERNAL && errno == EINTR) + CLOSE_AND_RETURN(-1); + tried_as_is = 1; + } + + /* + * We do at least one level of search if + * - there is no dot and RES_DEFNAME is set, or + * - there is at least one dot, there is no trailing dot, + * and RES_DNSRCH is set. + */ + if ((!dots && (_res.options & RES_DEFNAMES)) || + (dots && !trailing_dot && (_res.options & RES_DNSRCH))) { + name[n++] = '.'; + for (domain = _res.dnsrch; *domain; domain++) { + strncpy(name + n, *domain, maxlen - n); + if (query_domain(nfd, name, addr, timeout) == 0) + CLOSE_AND_RETURN(0); + if (h_errno == NETDB_INTERNAL && errno == EINTR) + CLOSE_AND_RETURN(-1); + if (!(_res.options & RES_DNSRCH)) + break; + } + } + + /* + * If we have not already tried the name "as is", do that now. + * note that we do this regardless of how many dots were in the + * name or whether it ends with a dot. + */ + if (!tried_as_is) { + if (query_domain(nfd, host, addr, timeout) == 0) + CLOSE_AND_RETURN(0); + } + + CLOSE_AND_RETURN(-1); +} + diff --git a/trunk/research/st-1.9/examples/server.c b/trunk/research/st-1.9/examples/server.c new file mode 100644 index 000000000..5d5aa6d72 --- /dev/null +++ b/trunk/research/st-1.9/examples/server.c @@ -0,0 +1,1025 @@ +/* + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "st.h" + + +/****************************************************************** + * Server configuration parameters + */ + +/* Log files */ +#define PID_FILE "pid" +#define ERRORS_FILE "errors" +#define ACCESS_FILE "access" + +/* Default server port */ +#define SERV_PORT_DEFAULT 8000 + +/* Socket listen queue size */ +#define LISTENQ_SIZE_DEFAULT 256 + +/* Max number of listening sockets ("hardware virtual servers") */ +#define MAX_BIND_ADDRS 16 + +/* Max number of "spare" threads per process per socket */ +#define MAX_WAIT_THREADS_DEFAULT 8 + +/* Number of file descriptors needed to handle one client session */ +#define FD_PER_THREAD 2 + +/* Access log buffer flushing interval (in seconds) */ +#define ACCLOG_FLUSH_INTERVAL 30 + +/* Request read timeout (in seconds) */ +#define REQUEST_TIMEOUT 30 + + +/****************************************************************** + * Global data + */ + +struct socket_info { + st_netfd_t nfd; /* Listening socket */ + char *addr; /* Bind address */ + unsigned int port; /* Port */ + int wait_threads; /* Number of threads waiting to accept */ + int busy_threads; /* Number of threads processing request */ + int rqst_count; /* Total number of processed requests */ +} srv_socket[MAX_BIND_ADDRS]; /* Array of listening sockets */ + +static int sk_count = 0; /* Number of listening sockets */ + +static int vp_count = 0; /* Number of server processes (VPs) */ +static pid_t *vp_pids; /* Array of VP pids */ + +static int my_index = -1; /* Current process index */ +static pid_t my_pid = -1; /* Current process pid */ + +static st_netfd_t sig_pipe[2]; /* Signal pipe */ + +/* + * Configuration flags/parameters + */ +static int interactive_mode = 0; +static int serialize_accept = 0; +static int log_access = 0; +static char *logdir = NULL; +static char *username = NULL; +static int listenq_size = LISTENQ_SIZE_DEFAULT; +static int errfd = STDERR_FILENO; + +/* + * Thread throttling parameters (all numbers are per listening socket). + * Zero values mean use default. + */ +static int max_threads = 0; /* Max number of threads */ +static int max_wait_threads = 0; /* Max number of "spare" threads */ +static int min_wait_threads = 2; /* Min number of "spare" threads */ + + +/****************************************************************** + * Useful macros + */ + +#ifndef INADDR_NONE +#define INADDR_NONE 0xffffffff +#endif + +#define SEC2USEC(s) ((s)*1000000LL) + +#define WAIT_THREADS(i) (srv_socket[i].wait_threads) +#define BUSY_THREADS(i) (srv_socket[i].busy_threads) +#define TOTAL_THREADS(i) (WAIT_THREADS(i) + BUSY_THREADS(i)) +#define RQST_COUNT(i) (srv_socket[i].rqst_count) + + +/****************************************************************** + * Forward declarations + */ + +static void usage(const char *progname); +static void parse_arguments(int argc, char *argv[]); +static void start_daemon(void); +static void set_thread_throttling(void); +static void create_listeners(void); +static void change_user(void); +static void open_log_files(void); +static void start_processes(void); +static void wdog_sighandler(int signo); +static void child_sighandler(int signo); +static void install_sighandlers(void); +static void start_threads(void); +static void *process_signals(void *arg); +static void *flush_acclog_buffer(void *arg); +static void *handle_connections(void *arg); +static void dump_server_info(void); + +static void Signal(int sig, void (*handler)(int)); +static int cpu_count(void); + +extern void handle_session(long srv_socket_index, st_netfd_t cli_nfd); +extern void load_configs(void); +extern void logbuf_open(void); +extern void logbuf_flush(void); +extern void logbuf_close(void); + +/* Error reporting functions defined in the error.c file */ +extern void err_sys_report(int fd, const char *fmt, ...); +extern void err_sys_quit(int fd, const char *fmt, ...); +extern void err_sys_dump(int fd, const char *fmt, ...); +extern void err_report(int fd, const char *fmt, ...); +extern void err_quit(int fd, const char *fmt, ...); + + +/* + * General server example: accept a client connection and do something. + * This program just outputs a short HTML page, but can be easily adapted + * to do other things. + * + * This server creates a constant number of processes ("virtual processors" + * or VPs) and replaces them when they die. Each virtual processor manages + * its own independent set of state threads (STs), the number of which varies + * with load against the server. Each state thread listens to exactly one + * listening socket. The initial process becomes the watchdog, waiting for + * children (VPs) to die or for a signal requesting termination or restart. + * Upon receiving a restart signal (SIGHUP), all VPs close and then reopen + * log files and reload configuration. All currently active connections remain + * active. It is assumed that new configuration affects only request + * processing and not the general server parameters such as number of VPs, + * thread limits, bind addresses, etc. Those are specified as command line + * arguments, so the server has to be stopped and then started again in order + * to change them. + * + * Each state thread loops processing connections from a single listening + * socket. Only one ST runs on a VP at a time, and VPs do not share memory, + * so no mutual exclusion locking is necessary on any data, and the entire + * server is free to use all the static variables and non-reentrant library + * functions it wants, greatly simplifying programming and debugging and + * increasing performance (for example, it is safe to ++ and -- all global + * counters or call inet_ntoa(3) without any mutexes). The current thread on + * each VP maintains equilibrium on that VP, starting a new thread or + * terminating itself if the number of spare threads exceeds the lower or + * upper limit. + * + * All I/O operations on sockets must use the State Thread library's I/O + * functions because only those functions prevent blocking of the entire VP + * process and perform state thread scheduling. + */ +int main(int argc, char *argv[]) +{ + /* Parse command-line options */ + parse_arguments(argc, argv); + + /* Allocate array of server pids */ + if ((vp_pids = calloc(vp_count, sizeof(pid_t))) == NULL) + err_sys_quit(errfd, "ERROR: calloc failed"); + + /* Start the daemon */ + if (!interactive_mode) + start_daemon(); + + /* Initialize the ST library */ + if (st_init() < 0) + err_sys_quit(errfd, "ERROR: initialization failed: st_init"); + + /* Set thread throttling parameters */ + set_thread_throttling(); + + /* Create listening sockets */ + create_listeners(); + + /* Change the user */ + if (username) + change_user(); + + /* Open log files */ + open_log_files(); + + /* Start server processes (VPs) */ + start_processes(); + + /* Turn time caching on */ + st_timecache_set(1); + + /* Install signal handlers */ + install_sighandlers(); + + /* Load configuration from config files */ + load_configs(); + + /* Start all threads */ + start_threads(); + + /* Become a signal processing thread */ + process_signals(NULL); + + /* NOTREACHED */ + return 1; +} + + +/******************************************************************/ + +static void usage(const char *progname) +{ + fprintf(stderr, "Usage: %s -l []\n\n" + "Possible options:\n\n" + "\t-b : Bind to specified address. Multiple" + " addresses\n" + "\t are permitted.\n" + "\t-p Create specified number of processes.\n" + "\t-t : Specify thread limits per listening" + " socket\n" + "\t across all processes.\n" + "\t-u Change server's user id to specified" + " value.\n" + "\t-q Set max length of pending connections" + " queue.\n" + "\t-a Enable access logging.\n" + "\t-i Run in interactive mode.\n" + "\t-S Serialize all accept() calls.\n" + "\t-h Print this message.\n", + progname); + exit(1); +} + + +/******************************************************************/ + +static void parse_arguments(int argc, char *argv[]) +{ + extern char *optarg; + int opt; + char *c; + + while ((opt = getopt(argc, argv, "b:p:l:t:u:q:aiSh")) != EOF) { + switch (opt) { + case 'b': + if (sk_count >= MAX_BIND_ADDRS) + err_quit(errfd, "ERROR: max number of bind addresses (%d) exceeded", + MAX_BIND_ADDRS); + if ((c = strdup(optarg)) == NULL) + err_sys_quit(errfd, "ERROR: strdup"); + srv_socket[sk_count++].addr = c; + break; + case 'p': + vp_count = atoi(optarg); + if (vp_count < 1) + err_quit(errfd, "ERROR: invalid number of processes: %s", optarg); + break; + case 'l': + logdir = optarg; + break; + case 't': + max_wait_threads = (int) strtol(optarg, &c, 10); + if (*c++ == ':') + max_threads = atoi(c); + if (max_wait_threads < 0 || max_threads < 0) + err_quit(errfd, "ERROR: invalid number of threads: %s", optarg); + break; + case 'u': + username = optarg; + break; + case 'q': + listenq_size = atoi(optarg); + if (listenq_size < 1) + err_quit(errfd, "ERROR: invalid listen queue size: %s", optarg); + break; + case 'a': + log_access = 1; + break; + case 'i': + interactive_mode = 1; + break; + case 'S': + /* + * Serialization decision is tricky on some platforms. For example, + * Solaris 2.6 and above has kernel sockets implementation, so supposedly + * there is no need for serialization. The ST library may be compiled + * on one OS version, but used on another, so the need for serialization + * should be determined at run time by the application. Since it's just + * an example, the serialization decision is left up to user. + * Only on platforms where the serialization is never needed on any OS + * version st_netfd_serialize_accept() is a no-op. + */ + serialize_accept = 1; + break; + case 'h': + case '?': + usage(argv[0]); + } + } + + if (logdir == NULL && !interactive_mode) { + err_report(errfd, "ERROR: logging directory is required\n"); + usage(argv[0]); + } + + if (getuid() == 0 && username == NULL) + err_report(errfd, "WARNING: running as super-user!"); + + if (vp_count == 0 && (vp_count = cpu_count()) < 1) + vp_count = 1; + + if (sk_count == 0) { + sk_count = 1; + srv_socket[0].addr = "0.0.0.0"; + } +} + + +/******************************************************************/ + +static void start_daemon(void) +{ + pid_t pid; + + /* Start forking */ + if ((pid = fork()) < 0) + err_sys_quit(errfd, "ERROR: fork"); + if (pid > 0) + exit(0); /* parent */ + + /* First child process */ + setsid(); /* become session leader */ + + if ((pid = fork()) < 0) + err_sys_quit(errfd, "ERROR: fork"); + if (pid > 0) /* first child */ + exit(0); + + umask(022); + + if (chdir(logdir) < 0) + err_sys_quit(errfd, "ERROR: can't change directory to %s: chdir", logdir); +} + + +/****************************************************************** + * For simplicity, the minimal size of thread pool is considered + * as a maximum number of spare threads (max_wait_threads) that + * will be created upon server startup. The pool size can grow up + * to the max_threads value. Note that this is a per listening + * socket limit. It is also possible to limit the total number of + * threads for all sockets rather than impose a per socket limit. + */ + +static void set_thread_throttling(void) +{ + /* + * Calculate total values across all processes. + * All numbers are per listening socket. + */ + if (max_wait_threads == 0) + max_wait_threads = MAX_WAIT_THREADS_DEFAULT * vp_count; + /* Assuming that each client session needs FD_PER_THREAD file descriptors */ + if (max_threads == 0) + max_threads = (st_getfdlimit() * vp_count) / FD_PER_THREAD / sk_count; + if (max_wait_threads > max_threads) + max_wait_threads = max_threads; + + /* + * Now calculate per-process values. + */ + if (max_wait_threads % vp_count) + max_wait_threads = max_wait_threads / vp_count + 1; + else + max_wait_threads = max_wait_threads / vp_count; + if (max_threads % vp_count) + max_threads = max_threads / vp_count + 1; + else + max_threads = max_threads / vp_count; + + if (min_wait_threads > max_wait_threads) + min_wait_threads = max_wait_threads; +} + + +/******************************************************************/ + +static void create_listeners(void) +{ + int i, n, sock; + char *c; + struct sockaddr_in serv_addr; + struct hostent *hp; + unsigned short port; + + for (i = 0; i < sk_count; i++) { + port = 0; + if ((c = strchr(srv_socket[i].addr, ':')) != NULL) { + *c++ = '\0'; + port = (unsigned short) atoi(c); + } + if (srv_socket[i].addr[0] == '\0') + srv_socket[i].addr = "0.0.0.0"; + if (port == 0) + port = SERV_PORT_DEFAULT; + + /* Create server socket */ + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) + err_sys_quit(errfd, "ERROR: can't create socket: socket"); + n = 1; + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n)) < 0) + err_sys_quit(errfd, "ERROR: can't set SO_REUSEADDR: setsockopt"); + memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(port); + serv_addr.sin_addr.s_addr = inet_addr(srv_socket[i].addr); + if (serv_addr.sin_addr.s_addr == INADDR_NONE) { + /* not dotted-decimal */ + if ((hp = gethostbyname(srv_socket[i].addr)) == NULL) + err_quit(errfd, "ERROR: can't resolve address: %s", + srv_socket[i].addr); + memcpy(&serv_addr.sin_addr, hp->h_addr, hp->h_length); + } + srv_socket[i].port = port; + + /* Do bind and listen */ + if (bind(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) + err_sys_quit(errfd, "ERROR: can't bind to address %s, port %hu", + srv_socket[i].addr, port); + if (listen(sock, listenq_size) < 0) + err_sys_quit(errfd, "ERROR: listen"); + + /* Create file descriptor object from OS socket */ + if ((srv_socket[i].nfd = st_netfd_open_socket(sock)) == NULL) + err_sys_quit(errfd, "ERROR: st_netfd_open_socket"); + /* + * On some platforms (e.g. IRIX, Linux) accept() serialization is never + * needed for any OS version. In that case st_netfd_serialize_accept() + * is just a no-op. Also see the comment above. + */ + if (serialize_accept && st_netfd_serialize_accept(srv_socket[i].nfd) < 0) + err_sys_quit(errfd, "ERROR: st_netfd_serialize_accept"); + } +} + + +/******************************************************************/ + +static void change_user(void) +{ + struct passwd *pw; + + if ((pw = getpwnam(username)) == NULL) + err_quit(errfd, "ERROR: can't find user '%s': getpwnam failed", username); + + if (setgid(pw->pw_gid) < 0) + err_sys_quit(errfd, "ERROR: can't change group id: setgid"); + if (setuid(pw->pw_uid) < 0) + err_sys_quit(errfd, "ERROR: can't change user id: setuid"); + + err_report(errfd, "INFO: changed process user id to '%s'", username); +} + + +/******************************************************************/ + +static void open_log_files(void) +{ + int fd; + char str[32]; + + if (interactive_mode) + return; + + /* Open access log */ + if (log_access) + logbuf_open(); + + /* Open and write pid to pid file */ + if ((fd = open(PID_FILE, O_CREAT | O_WRONLY | O_TRUNC, 0644)) < 0) + err_sys_quit(errfd, "ERROR: can't open pid file: open"); + sprintf(str, "%d\n", (int)getpid()); + if (write(fd, str, strlen(str)) != strlen(str)) + err_sys_quit(errfd, "ERROR: can't write to pid file: write"); + close(fd); + + /* Open error log file */ + if ((fd = open(ERRORS_FILE, O_CREAT | O_WRONLY | O_APPEND, 0644)) < 0) + err_sys_quit(errfd, "ERROR: can't open error log file: open"); + errfd = fd; + + err_report(errfd, "INFO: starting the server..."); +} + + +/******************************************************************/ + +static void start_processes(void) +{ + int i, status; + pid_t pid; + sigset_t mask, omask; + + if (interactive_mode) { + my_index = 0; + my_pid = getpid(); + return; + } + + for (i = 0; i < vp_count; i++) { + if ((pid = fork()) < 0) { + err_sys_report(errfd, "ERROR: can't create process: fork"); + if (i == 0) + exit(1); + err_report(errfd, "WARN: started only %d processes out of %d", i, + vp_count); + vp_count = i; + break; + } + if (pid == 0) { + my_index = i; + my_pid = getpid(); + /* Child returns to continue in main() */ + return; + } + vp_pids[i] = pid; + } + + /* + * Parent process becomes a "watchdog" and never returns to main(). + */ + + /* Install signal handlers */ + Signal(SIGTERM, wdog_sighandler); /* terminate */ + Signal(SIGHUP, wdog_sighandler); /* restart */ + Signal(SIGUSR1, wdog_sighandler); /* dump info */ + + /* Now go to sleep waiting for a child termination or a signal */ + for ( ; ; ) { + if ((pid = wait(&status)) < 0) { + if (errno == EINTR) + continue; + err_sys_quit(errfd, "ERROR: watchdog: wait"); + } + /* Find index of the exited child */ + for (i = 0; i < vp_count; i++) { + if (vp_pids[i] == pid) + break; + } + + /* Block signals while printing and forking */ + sigemptyset(&mask); + sigaddset(&mask, SIGTERM); + sigaddset(&mask, SIGHUP); + sigaddset(&mask, SIGUSR1); + sigprocmask(SIG_BLOCK, &mask, &omask); + + if (WIFEXITED(status)) + err_report(errfd, "WARN: watchdog: process %d (pid %d) exited" + " with status %d", i, pid, WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + err_report(errfd, "WARN: watchdog: process %d (pid %d) terminated" + " by signal %d", i, pid, WTERMSIG(status)); + else if (WIFSTOPPED(status)) + err_report(errfd, "WARN: watchdog: process %d (pid %d) stopped" + " by signal %d", i, pid, WSTOPSIG(status)); + else + err_report(errfd, "WARN: watchdog: process %d (pid %d) terminated:" + " unknown termination reason", i, pid); + + /* Fork another VP */ + if ((pid = fork()) < 0) { + err_sys_report(errfd, "ERROR: watchdog: can't create process: fork"); + } else if (pid == 0) { + my_index = i; + my_pid = getpid(); + /* Child returns to continue in main() */ + return; + } + vp_pids[i] = pid; + + /* Restore the signal mask */ + sigprocmask(SIG_SETMASK, &omask, NULL); + } +} + + +/******************************************************************/ + +static void wdog_sighandler(int signo) +{ + int i, err; + + /* Save errno */ + err = errno; + /* Forward the signal to all children */ + for (i = 0; i < vp_count; i++) { + if (vp_pids[i] > 0) + kill(vp_pids[i], signo); + } + /* + * It is safe to do pretty much everything here because process is + * sleeping in wait() which is async-safe. + */ + switch (signo) { + case SIGHUP: + err_report(errfd, "INFO: watchdog: caught SIGHUP"); + /* Reopen log files - needed for log rotation */ + if (log_access) { + logbuf_close(); + logbuf_open(); + } + close(errfd); + if ((errfd = open(ERRORS_FILE, O_CREAT | O_WRONLY | O_APPEND, 0644)) < 0) + err_sys_quit(STDERR_FILENO, "ERROR: watchdog: open"); + break; + case SIGTERM: + /* Non-graceful termination */ + err_report(errfd, "INFO: watchdog: caught SIGTERM, terminating"); + unlink(PID_FILE); + exit(0); + case SIGUSR1: + err_report(errfd, "INFO: watchdog: caught SIGUSR1"); + break; + default: + err_report(errfd, "INFO: watchdog: caught signal %d", signo); + } + /* Restore errno */ + errno = err; +} + + +/******************************************************************/ + +static void install_sighandlers(void) +{ + sigset_t mask; + int p[2]; + + /* Create signal pipe */ + if (pipe(p) < 0) + err_sys_quit(errfd, "ERROR: process %d (pid %d): can't create" + " signal pipe: pipe", my_index, my_pid); + if ((sig_pipe[0] = st_netfd_open(p[0])) == NULL || + (sig_pipe[1] = st_netfd_open(p[1])) == NULL) + err_sys_quit(errfd, "ERROR: process %d (pid %d): can't create" + " signal pipe: st_netfd_open", my_index, my_pid); + + /* Install signal handlers */ + Signal(SIGTERM, child_sighandler); /* terminate */ + Signal(SIGHUP, child_sighandler); /* restart */ + Signal(SIGUSR1, child_sighandler); /* dump info */ + + /* Unblock signals */ + sigemptyset(&mask); + sigaddset(&mask, SIGTERM); + sigaddset(&mask, SIGHUP); + sigaddset(&mask, SIGUSR1); + sigprocmask(SIG_UNBLOCK, &mask, NULL); +} + + +/******************************************************************/ + +static void child_sighandler(int signo) +{ + int err, fd; + + err = errno; + fd = st_netfd_fileno(sig_pipe[1]); + + /* write() is async-safe */ + if (write(fd, &signo, sizeof(int)) != sizeof(int)) + err_sys_quit(errfd, "ERROR: process %d (pid %d): child's signal" + " handler: write", my_index, my_pid); + errno = err; +} + + +/****************************************************************** + * The "main" function of the signal processing thread. + */ + +/* ARGSUSED */ +static void *process_signals(void *arg) +{ + int signo; + + for ( ; ; ) { + /* Read the next signal from the signal pipe */ + if (st_read(sig_pipe[0], &signo, sizeof(int), + ST_UTIME_NO_TIMEOUT) != sizeof(int)) + err_sys_quit(errfd, "ERROR: process %d (pid %d): signal processor:" + " st_read", my_index, my_pid); + + switch (signo) { + case SIGHUP: + err_report(errfd, "INFO: process %d (pid %d): caught SIGHUP," + " reloading configuration", my_index, my_pid); + if (interactive_mode) { + load_configs(); + break; + } + /* Reopen log files - needed for log rotation */ + if (log_access) { + logbuf_flush(); + logbuf_close(); + logbuf_open(); + } + close(errfd); + if ((errfd = open(ERRORS_FILE, O_CREAT | O_WRONLY | O_APPEND, 0644)) < 0) + err_sys_quit(STDERR_FILENO, "ERROR: process %d (pid %d): signal" + " processor: open", my_index, my_pid); + /* Reload configuration */ + load_configs(); + break; + case SIGTERM: + /* + * Terminate ungracefully since it is generally not known how long + * it will take to gracefully complete all client sessions. + */ + err_report(errfd, "INFO: process %d (pid %d): caught SIGTERM," + " terminating", my_index, my_pid); + if (log_access) + logbuf_flush(); + exit(0); + case SIGUSR1: + err_report(errfd, "INFO: process %d (pid %d): caught SIGUSR1", + my_index, my_pid); + /* Print server info to stderr */ + dump_server_info(); + break; + default: + err_report(errfd, "INFO: process %d (pid %d): caught signal %d", + my_index, my_pid, signo); + } + } + + /* NOTREACHED */ + return NULL; +} + + +/****************************************************************** + * The "main" function of the access log flushing thread. + */ + +/* ARGSUSED */ +static void *flush_acclog_buffer(void *arg) +{ + for ( ; ; ) { + st_sleep(ACCLOG_FLUSH_INTERVAL); + logbuf_flush(); + } + + /* NOTREACHED */ + return NULL; +} + + +/******************************************************************/ + +static void start_threads(void) +{ + long i, n; + + /* Create access log flushing thread */ + if (log_access && st_thread_create(flush_acclog_buffer, NULL, 0, 0) == NULL) + err_sys_quit(errfd, "ERROR: process %d (pid %d): can't create" + " log flushing thread", my_index, my_pid); + + /* Create connections handling threads */ + for (i = 0; i < sk_count; i++) { + err_report(errfd, "INFO: process %d (pid %d): starting %d threads" + " on %s:%u", my_index, my_pid, max_wait_threads, + srv_socket[i].addr, srv_socket[i].port); + WAIT_THREADS(i) = 0; + BUSY_THREADS(i) = 0; + RQST_COUNT(i) = 0; + for (n = 0; n < max_wait_threads; n++) { + if (st_thread_create(handle_connections, (void *)i, 0, 0) != NULL) + WAIT_THREADS(i)++; + else + err_sys_report(errfd, "ERROR: process %d (pid %d): can't create" + " thread", my_index, my_pid); + } + if (WAIT_THREADS(i) == 0) + exit(1); + } +} + + +/******************************************************************/ + +static void *handle_connections(void *arg) +{ + st_netfd_t srv_nfd, cli_nfd; + struct sockaddr_in from; + int fromlen; + long i = (long) arg; + + srv_nfd = srv_socket[i].nfd; + fromlen = sizeof(from); + + while (WAIT_THREADS(i) <= max_wait_threads) { + cli_nfd = st_accept(srv_nfd, (struct sockaddr *)&from, &fromlen, + ST_UTIME_NO_TIMEOUT); + if (cli_nfd == NULL) { + err_sys_report(errfd, "ERROR: can't accept connection: st_accept"); + continue; + } + /* Save peer address, so we can retrieve it later */ + st_netfd_setspecific(cli_nfd, &from.sin_addr, NULL); + + WAIT_THREADS(i)--; + BUSY_THREADS(i)++; + if (WAIT_THREADS(i) < min_wait_threads && TOTAL_THREADS(i) < max_threads) { + /* Create another spare thread */ + if (st_thread_create(handle_connections, (void *)i, 0, 0) != NULL) + WAIT_THREADS(i)++; + else + err_sys_report(errfd, "ERROR: process %d (pid %d): can't create" + " thread", my_index, my_pid); + } + + handle_session(i, cli_nfd); + + st_netfd_close(cli_nfd); + WAIT_THREADS(i)++; + BUSY_THREADS(i)--; + } + + WAIT_THREADS(i)--; + return NULL; +} + + +/******************************************************************/ + +static void dump_server_info(void) +{ + char *buf; + int i, len; + + if ((buf = malloc(sk_count * 512)) == NULL) { + err_sys_report(errfd, "ERROR: malloc failed"); + return; + } + + len = sprintf(buf, "\n\nProcess #%d (pid %d):\n", my_index, (int)my_pid); + for (i = 0; i < sk_count; i++) { + len += sprintf(buf + len, "\nListening Socket #%d:\n" + "-------------------------\n" + "Address %s:%u\n" + "Thread limits (min/max) %d/%d\n" + "Waiting threads %d\n" + "Busy threads %d\n" + "Requests served %d\n", + i, srv_socket[i].addr, srv_socket[i].port, + max_wait_threads, max_threads, + WAIT_THREADS(i), BUSY_THREADS(i), RQST_COUNT(i)); + } + + write(STDERR_FILENO, buf, len); + free(buf); +} + + +/****************************************************************** + * Stubs + */ + +/* + * Session handling function stub. Just dumps small HTML page. + */ +void handle_session(long srv_socket_index, st_netfd_t cli_nfd) +{ + static char resp[] = "HTTP/1.0 200 OK\r\nContent-type: text/html\r\n" + "Connection: close\r\n\r\n

It worked!

\n"; + char buf[512]; + int n = sizeof(resp) - 1; + struct in_addr *from = st_netfd_getspecific(cli_nfd); + + if (st_read(cli_nfd, buf, sizeof(buf), SEC2USEC(REQUEST_TIMEOUT)) < 0) { + err_sys_report(errfd, "WARN: can't read request from %s: st_read", + inet_ntoa(*from)); + return; + } + if (st_write(cli_nfd, resp, n, ST_UTIME_NO_TIMEOUT) != n) { + err_sys_report(errfd, "WARN: can't write response to %s: st_write", + inet_ntoa(*from)); + return; + } + + RQST_COUNT(srv_socket_index)++; +} + + +/* + * Configuration loading function stub. + */ +void load_configs(void) +{ + err_report(errfd, "INFO: process %d (pid %d): configuration loaded", + my_index, my_pid); +} + + +/* + * Buffered access logging methods. + * Note that stdio functions (fopen(3), fprintf(3), fflush(3), etc.) cannot + * be used if multiple VPs are created since these functions can flush buffer + * at any point and thus write only partial log record to disk. + * Also, it is completely safe for all threads of the same VP to write to + * the same log buffer without any mutex protection (one buffer per VP, of + * course). + */ +void logbuf_open(void) +{ + +} + + +void logbuf_flush(void) +{ + +} + + +void logbuf_close(void) +{ + +} + + +/****************************************************************** + * Small utility functions + */ + +static void Signal(int sig, void (*handler)(int)) +{ + struct sigaction sa; + + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); +} + +static int cpu_count(void) +{ + int n; + +#if defined (_SC_NPROCESSORS_ONLN) + n = (int) sysconf(_SC_NPROCESSORS_ONLN); +#elif defined (_SC_NPROC_ONLN) + n = (int) sysconf(_SC_NPROC_ONLN); +#elif defined (HPUX) +#include + n = mpctl(MPC_GETNUMSPUS, 0, 0); +#else + n = -1; + errno = ENOSYS; +#endif + + return n; +} + +/******************************************************************/ + diff --git a/trunk/research/st-1.9/extensions/Makefile b/trunk/research/st-1.9/extensions/Makefile new file mode 100644 index 000000000..fc6634f93 --- /dev/null +++ b/trunk/research/st-1.9/extensions/Makefile @@ -0,0 +1,91 @@ +# +# Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of Silicon Graphics, Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CC = cc + +SHELL = /bin/sh +ECHO = /bin/echo + +DEPTH = .. +BUILD = +TARGETDIR = obj + +DEFINES = +OTHER_FLAGS = +CFLAGS = + +OBJDIR = $(DEPTH)/$(TARGETDIR) +INCDIR = $(DEPTH)/$(TARGETDIR) + +LIBRESOLV = +EXTRALIBS = + +SLIBRARY = $(OBJDIR)/libstx.a +OBJS = $(OBJDIR)/dnscache.o $(OBJDIR)/dnsres.o $(OBJDIR)/lrucache.o + + +CFLAGS += -Wall -I$(INCDIR) +AR = ar +ARFLAGS = rv +RANLIB = ranlib + + +########################## +# Platform section. +# + +ifeq (LINUX, $(findstring LINUX, $(OS))) +LIBRESOLV = -lresolv +endif + +ifeq ($(OS), SOLARIS) +LIBRESOLV = -lresolv +EXTRALIBS = -lsocket -lnsl +endif + +# +# End of platform section. +########################## + + +all: $(SLIBRARY) + +$(SLIBRARY): $(OBJS) + $(AR) $(ARFLAGS) $@ $(OBJS) + $(RANLIB) $@ + +$(OBJDIR)/%.o: %.c stx.h common.h + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -rf $(OBJS) $(SLIBRARY) + +#.DEFAULT: +# @cd $(DEPTH); $(MAKE) $@ + diff --git a/trunk/research/st-1.9/extensions/README b/trunk/research/st-1.9/extensions/README new file mode 100644 index 000000000..f768aa712 --- /dev/null +++ b/trunk/research/st-1.9/extensions/README @@ -0,0 +1,42 @@ +This directory contains extensions to the core State Threads Library +that were contributed by users. All files hereunder are not part of the +State Threads Library itself. They are provided as-is, without warranty +or support, and under whatever license terms their authors provided. To +contribute your own extensions, just mail them to the project +administrators or to one of the project's mailing lists; see +state-threads.sourceforge.net. Please indicate the license terms under +which the project may distribute your contribution. + +======================================================================== + +stx_fileio +---------- +Contributed by Jeff , 4 Nov 2002. + +Provides non-blocking random access file reading capability for +programs using the State Threads library. There is one public function: + +ssize_t stx_file_read(st_netfd_t fd, off_t offset, + void *buf, size_t nbytes, st_utime_t timeout); + +The implementation is not optimal in that the data is copied at least once +more than should be necessary. Its usefulness is limited to cases where +random access to a file is required and where starvation of other threads +is unacceptable. + +The particular application which motivated this implementation was a UDP +file transfer protocol. Because the OS does very little buffering of UDP +traffic it is important that UDP transmission threads are not starved for +periods of time which are long relative to the interval required to +maintain a steady send rate. + +Licensed under the same dual MPL/GPL as core State Threads. + +======================================================================== + +stx_dns +------- + +Documentation coming. + +======================================================================== diff --git a/trunk/research/st-1.9/extensions/common.h b/trunk/research/st-1.9/extensions/common.h new file mode 100644 index 000000000..f6298ba09 --- /dev/null +++ b/trunk/research/st-1.9/extensions/common.h @@ -0,0 +1,77 @@ +#ifndef _STX_COMMON_H_ +#define _STX_COMMON_H_ + +#include +#include + + +#define STX_BEGIN_MACRO { +#define STX_END_MACRO } + + +/***************************************** + * Circular linked list definitions + */ + +typedef struct _stx_clist { + struct _stx_clist *next; + struct _stx_clist *prev; +} stx_clist_t; + +/* Insert element "_e" into the list, before "_l" */ +#define STX_CLIST_INSERT_BEFORE(_e,_l) \ + STX_BEGIN_MACRO \ + (_e)->next = (_l); \ + (_e)->prev = (_l)->prev; \ + (_l)->prev->next = (_e); \ + (_l)->prev = (_e); \ + STX_END_MACRO + +/* Insert element "_e" into the list, after "_l" */ +#define STX_CLIST_INSERT_AFTER(_e,_l) \ + STX_BEGIN_MACRO \ + (_e)->next = (_l)->next; \ + (_e)->prev = (_l); \ + (_l)->next->prev = (_e); \ + (_l)->next = (_e); \ + STX_END_MACRO + +/* Append an element "_e" to the end of the list "_l" */ +#define STX_CLIST_APPEND_LINK(_e,_l) STX_CLIST_INSERT_BEFORE(_e,_l) + +/* Remove the element "_e" from it's circular list */ +#define STX_CLIST_REMOVE_LINK(_e) \ + STX_BEGIN_MACRO \ + (_e)->prev->next = (_e)->next; \ + (_e)->next->prev = (_e)->prev; \ + STX_END_MACRO + +/* Return the head/tail of the list */ +#define STX_CLIST_HEAD(_l) (_l)->next +#define STX_CLIST_TAIL(_l) (_l)->prev + +/* Return non-zero if the given circular list "_l" is empty, */ +/* zero if the circular list is not empty */ +#define STX_CLIST_IS_EMPTY(_l) \ + ((_l)->next == (_l)) + +/* Initialize a circular list */ +#define STX_CLIST_INIT_CLIST(_l) \ + STX_BEGIN_MACRO \ + (_l)->next = (_l); \ + (_l)->prev = (_l); \ + STX_END_MACRO + + +/***************************************** + * Useful macros + */ + +#ifndef offsetof +#define offsetof(type, identifier) ((size_t)&(((type *)0)->identifier)) +#endif + +#define STX_MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#endif /* !_STX_COMMON_H_ */ + diff --git a/trunk/research/st-1.9/extensions/dnscache.c b/trunk/research/st-1.9/extensions/dnscache.c new file mode 100644 index 000000000..ac49166a1 --- /dev/null +++ b/trunk/research/st-1.9/extensions/dnscache.c @@ -0,0 +1,190 @@ +#include "stx.h" +#include "common.h" + + +/***************************************** + * Basic types definitions + */ + +typedef struct _stx_dns_data { + struct in_addr *addrs; + int num_addrs; + int cur; + time_t expires; +} stx_dns_data_t; + + +#define MAX_HOST_ADDRS 1024 + +static struct in_addr addr_list[MAX_HOST_ADDRS]; + +stx_cache_t *_stx_dns_cache = NULL; + +extern int _stx_dns_ttl; +extern int _stx_dns_getaddrlist(const char *hostname, struct in_addr *addrs, + int *num_addrs, st_utime_t timeout); + + +static unsigned long hash_hostname(const void *key) +{ + const char *name = (const char *)key; + unsigned long hash = 0; + + while (*name) + hash = (hash << 4) - hash + *name++; /* hash = hash * 15 + *name++ */ + + return hash; +} + +static void cleanup_entry(void *key, void *data) +{ + if (key) + free(key); + + if (data) { + if (((stx_dns_data_t *)data)->addrs) + free(((stx_dns_data_t *)data)->addrs); + free(data); + } +} + +static int lookup_entry(const char *host, struct in_addr *addrs, + int *num_addrs, int rotate) +{ + stx_cache_entry_t *entry; + stx_dns_data_t *data; + int n; + + entry = stx_cache_entry_lookup(_stx_dns_cache, host); + if (entry) { + data = (stx_dns_data_t *)stx_cache_entry_getdata(entry); + if (st_time() <= data->expires) { + if (*num_addrs == 1) { + if (rotate) { + *addrs = data->addrs[data->cur++]; + if (data->cur >= data->num_addrs) + data->cur = 0; + } else { + *addrs = data->addrs[0]; + } + } else { + n = STX_MIN(*num_addrs, data->num_addrs); + memcpy(addrs, data->addrs, n * sizeof(*addrs)); + *num_addrs = n; + } + + stx_cache_entry_release(_stx_dns_cache, entry); + return 1; + } + + /* + * Cache entry expired: decrement its refcount and purge it from cache. + */ + stx_cache_entry_release(_stx_dns_cache, entry); + stx_cache_entry_delete(_stx_dns_cache, entry); + } + + return 0; +} + +static void insert_entry(const char *host, struct in_addr *addrs, int count) +{ + stx_cache_entry_t *entry; + stx_dns_data_t *data; + char *key; + size_t n; + + if (_stx_dns_ttl > 0) { + key = strdup(host); + data = (stx_dns_data_t *)malloc(sizeof(stx_dns_data_t)); + n = count * sizeof(*addrs); + if (data) { + data->addrs = (struct in_addr *)malloc(n); + if (data->addrs) + memcpy(data->addrs, addrs, n); + data->num_addrs = count; + data->cur = 0; + data->expires = st_time() + _stx_dns_ttl; + } + entry = stx_cache_entry_create(key, data, strlen(host) + 1 + + sizeof(stx_dns_data_t) + n + + stx_cache_entry_sizeof()); + if (key && data && data->addrs && entry && + stx_cache_entry_insert(_stx_dns_cache, entry) == 0) { + stx_cache_entry_release(_stx_dns_cache, entry); + return; + } + + if (entry) + stx_cache_entry_delete(_stx_dns_cache, entry); + else + cleanup_entry(key, data); + } +} + + + +int _stx_dns_cache_getaddrlist(const char *hostname, struct in_addr *addrs, + int *num_addrs, st_utime_t timeout, + int rotate) +{ + char host[128]; + int n, count; + + if (!_stx_dns_cache) + return _stx_dns_getaddrlist(hostname, addrs, num_addrs, timeout); + + for (n = 0; n < sizeof(host) - 1 && hostname[n]; n++) { + host[n] = tolower(hostname[n]); + } + host[n] = '\0'; + + if (lookup_entry(host, addrs, num_addrs, rotate)) + return 0; + + count = MAX_HOST_ADDRS; + if (_stx_dns_getaddrlist(host, addr_list, &count, timeout) < 0) + return -1; + n = STX_MIN(*num_addrs, count); + memcpy(addrs, addr_list, n * sizeof(*addrs)); + *num_addrs = n; + + insert_entry(host, addr_list, count); + return 0; +} + + +int stx_dns_cache_init(size_t max_size, size_t max_bytes, size_t hash_size) +{ + _stx_dns_cache = stx_cache_create(max_size, max_bytes, hash_size, + hash_hostname, + (long (*)(const void *, const void *))strcmp, + cleanup_entry); + if (!_stx_dns_cache) + return -1; + + return 0; +} + +void stx_dns_cache_getinfo(stx_cache_info_t *info) +{ + if (_stx_dns_cache) + stx_cache_getinfo(_stx_dns_cache, info); + else + memset(info, 0, sizeof(stx_cache_info_t)); +} + +int stx_dns_getaddrlist(const char *hostname, struct in_addr *addrs, + int *num_addrs, st_utime_t timeout) +{ + return _stx_dns_cache_getaddrlist(hostname, addrs, num_addrs, timeout, 0); +} + +int stx_dns_getaddr(const char *hostname, struct in_addr *addr, + st_utime_t timeout) +{ + int n = 1; + + return _stx_dns_cache_getaddrlist(hostname, addr, &n, timeout, 1); +} + diff --git a/trunk/research/st-1.9/extensions/dnsres.c b/trunk/research/st-1.9/extensions/dnsres.c new file mode 100644 index 000000000..04a91ccaf --- /dev/null +++ b/trunk/research/st-1.9/extensions/dnsres.c @@ -0,0 +1,305 @@ +/* + * Copyright (c) 1985, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stx.h" + +#define MAXPACKET 1024 + +#if !defined(NETDB_INTERNAL) && defined(h_NETDB_INTERNAL) +#define NETDB_INTERNAL h_NETDB_INTERNAL +#endif + +/* New in Solaris 7 */ +#if !defined(_getshort) && defined(ns_get16) +#define _getshort(cp) ns_get16(cp) +#define _getlong(cp) ns_get32(cp) +#endif + +typedef union { + HEADER hdr; + u_char buf[MAXPACKET]; +} querybuf_t; + +int _stx_dns_ttl; + + +static int parse_answer(querybuf_t *ans, int len, struct in_addr *addrs, + int *num_addrs) +{ + char buf[MAXPACKET]; + HEADER *ahp; + u_char *cp, *eoa; + int type, n, i; + + ahp = &ans->hdr; + eoa = ans->buf + len; + cp = ans->buf + sizeof(HEADER); + h_errno = TRY_AGAIN; + _stx_dns_ttl = -1; + i = 0; + + while (ahp->qdcount > 0) { + ahp->qdcount--; + cp += dn_skipname(cp, eoa) + QFIXEDSZ; + } + while (ahp->ancount > 0 && cp < eoa && i < *num_addrs) { + ahp->ancount--; + if ((n = dn_expand(ans->buf, eoa, cp, buf, sizeof(buf))) < 0) + return -1; + cp += n; + if (cp + 4 + 4 + 2 >= eoa) + return -1; + type = _getshort(cp); + cp += 4; + if (type == T_A) + _stx_dns_ttl = _getlong(cp); + cp += 4; + n = _getshort(cp); + cp += 2; + if (type == T_A) { + if (n > sizeof(*addrs) || cp + n > eoa) + return -1; + memcpy(&addrs[i++], cp, n); + } + cp += n; + } + + *num_addrs = i; + return 0; +} + + +static int query_domain(st_netfd_t nfd, const char *name, + struct in_addr *addrs, int *num_addrs, + st_utime_t timeout) +{ + querybuf_t qbuf; + u_char *buf = qbuf.buf; + HEADER *hp = &qbuf.hdr; + int blen = sizeof(qbuf); + int i, len, id; + + for (i = 0; i < _res.nscount; i++) { + len = res_mkquery(QUERY, name, C_IN, T_A, NULL, 0, NULL, buf, blen); + if (len <= 0) { + h_errno = NO_RECOVERY; + return -1; + } + id = hp->id; + + if (st_sendto(nfd, buf, len, (struct sockaddr *)&(_res.nsaddr_list[i]), + sizeof(struct sockaddr), timeout) != len) { + h_errno = NETDB_INTERNAL; + /* EINTR means interrupt by other thread, NOT by a caught signal */ + if (errno == EINTR) + return -1; + continue; + } + + /* Wait for reply */ + do { + len = st_recvfrom(nfd, buf, blen, NULL, NULL, timeout); + if (len <= 0) + break; + } while (id != hp->id); + + if (len < HFIXEDSZ) { + h_errno = NETDB_INTERNAL; + if (len >= 0) + errno = EMSGSIZE; + else if (errno == EINTR) /* see the comment above */ + return -1; + continue; + } + + hp->ancount = ntohs(hp->ancount); + hp->qdcount = ntohs(hp->qdcount); + if ((hp->rcode != NOERROR) || (hp->ancount == 0)) { + switch (hp->rcode) { + case NXDOMAIN: + h_errno = HOST_NOT_FOUND; + break; + case SERVFAIL: + h_errno = TRY_AGAIN; + break; + case NOERROR: + h_errno = NO_DATA; + break; + case FORMERR: + case NOTIMP: + case REFUSED: + default: + h_errno = NO_RECOVERY; + } + continue; + } + + if (parse_answer(&qbuf, len, addrs, num_addrs) == 0) + return 0; + } + + return -1; +} + + +#define CLOSE_AND_RETURN(ret) \ + { \ + n = errno; \ + st_netfd_close(nfd); \ + errno = n; \ + return (ret); \ + } + + +int _stx_dns_getaddrlist(const char *host, struct in_addr *addrs, + int *num_addrs, st_utime_t timeout) +{ + char name[MAXDNAME], **domain; + const char *cp; + int s, n, maxlen, dots; + int trailing_dot, tried_as_is; + st_netfd_t nfd; + + if ((_res.options & RES_INIT) == 0 && res_init() == -1) { + h_errno = NETDB_INTERNAL; + return -1; + } + if (_res.options & RES_USEVC) { + h_errno = NETDB_INTERNAL; + errno = ENOSYS; + return -1; + } + if (!host || *host == '\0') { + h_errno = HOST_NOT_FOUND; + return -1; + } + + /* Create UDP socket */ + if ((s = socket(PF_INET, SOCK_DGRAM, 0)) < 0) { + h_errno = NETDB_INTERNAL; + return -1; + } + if ((nfd = st_netfd_open_socket(s)) == NULL) { + h_errno = NETDB_INTERNAL; + n = errno; + close(s); + errno = n; + return -1; + } + + maxlen = sizeof(name) - 1; + n = 0; + dots = 0; + trailing_dot = 0; + tried_as_is = 0; + + for (cp = host; *cp && n < maxlen; cp++) { + dots += (*cp == '.'); + name[n++] = *cp; + } + if (name[n - 1] == '.') + trailing_dot = 1; + + /* + * If there are dots in the name already, let's just give it a try + * 'as is'. The threshold can be set with the "ndots" option. + */ + if (dots >= _res.ndots) { + if (query_domain(nfd, host, addrs, num_addrs, timeout) == 0) + CLOSE_AND_RETURN(0); + if (h_errno == NETDB_INTERNAL && errno == EINTR) + CLOSE_AND_RETURN(-1); + tried_as_is = 1; + } + + /* + * We do at least one level of search if + * - there is no dot and RES_DEFNAME is set, or + * - there is at least one dot, there is no trailing dot, + * and RES_DNSRCH is set. + */ + if ((!dots && (_res.options & RES_DEFNAMES)) || + (dots && !trailing_dot && (_res.options & RES_DNSRCH))) { + name[n++] = '.'; + for (domain = _res.dnsrch; *domain; domain++) { + strncpy(name + n, *domain, maxlen - n); + if (query_domain(nfd, name, addrs, num_addrs, timeout) == 0) + CLOSE_AND_RETURN(0); + if (h_errno == NETDB_INTERNAL && errno == EINTR) + CLOSE_AND_RETURN(-1); + if (!(_res.options & RES_DNSRCH)) + break; + } + } + + /* + * If we have not already tried the name "as is", do that now. + * note that we do this regardless of how many dots were in the + * name or whether it ends with a dot. + */ + if (!tried_as_is) { + if (query_domain(nfd, host, addrs, num_addrs, timeout) == 0) + CLOSE_AND_RETURN(0); + } + + CLOSE_AND_RETURN(-1); +} + diff --git a/trunk/research/st-1.9/extensions/lrucache.c b/trunk/research/st-1.9/extensions/lrucache.c new file mode 100644 index 000000000..33494fee6 --- /dev/null +++ b/trunk/research/st-1.9/extensions/lrucache.c @@ -0,0 +1,343 @@ +#include "stx.h" +#include "common.h" + + +/***************************************** + * Basic types definitions + */ + +struct _stx_centry { + void *key; /* key for doing lookups */ + void *data; /* data in the cache */ + size_t weight; /* "weight" of this entry */ + struct _stx_centry *next; /* next entry */ + struct _stx_centry **pthis; + stx_clist_t lru_link; /* for putting this entry on LRU list */ + int ref_count; /* use count for this entry */ + int delete_pending; /* pending delete flag */ +}; + +struct _stx_cache { + size_t max_size; /* max size of cache */ + size_t cur_size; /* current size of cache */ + + size_t max_weight; /* cache capacity */ + size_t cur_weight; /* current total "weight" of all entries */ + + size_t hash_size; /* size of hash table */ + stx_cache_entry_t **table; /* hash table for this cache */ + + stx_clist_t lru_list; /* least-recently-used list */ + + /* Cache stats */ + unsigned long hits; /* num cache hits */ + unsigned long lookups; /* num cache lookups */ + unsigned long inserts; /* num inserts */ + unsigned long deletes; /* num deletes */ + + /* Functions */ + unsigned long (*key_hash_fn)(const void *); + long (*key_cmp_fn)(const void *, const void *); + void (*cleanup_fn)(void *, void *); +}; + + +#define STX_CACHE_ENTRY_PTR(_qp) \ + ((stx_cache_entry_t *)((char *)(_qp) - offsetof(stx_cache_entry_t, lru_link))) + + +/***************************************** + * Cache methods + */ + +stx_cache_t *stx_cache_create(size_t max_size, size_t max_weight, + size_t hash_size, + unsigned long (*key_hash_fn)(const void *key), + long (*key_cmp_fn)(const void *key1, + const void *key2), + void (*cleanup_fn)(void *key, void *data)) +{ + stx_cache_t *newcache; + + newcache = (stx_cache_t *)calloc(1, sizeof(stx_cache_t)); + if (newcache == NULL) + return NULL; + newcache->table = (stx_cache_entry_t **)calloc(hash_size, + sizeof(stx_cache_entry_t *)); + if (newcache->table == NULL) { + free(newcache); + return NULL; + } + + newcache->max_size = max_size; + newcache->max_weight = max_weight; + newcache->hash_size = hash_size; + STX_CLIST_INIT_CLIST(&(newcache->lru_list)); + newcache->key_hash_fn = key_hash_fn; + newcache->key_cmp_fn = key_cmp_fn; + newcache->cleanup_fn = cleanup_fn; + + return newcache; +} + + +void stx_cache_empty(stx_cache_t *cache) +{ + size_t i; + stx_cache_entry_t *entry, *next_entry; + + for (i = 0; i < cache->hash_size; i++) { + entry = cache->table[i]; + while (entry) { + next_entry = entry->next; + stx_cache_entry_delete(cache, entry); + entry = next_entry; + } + } +} + + +void stx_cache_traverse(stx_cache_t *cache, + void (*callback)(void *key, void *data)) +{ + size_t i; + stx_cache_entry_t *entry; + + for (i = 0; i < cache->hash_size; i++) { + for (entry = cache->table[i]; entry; entry = entry->next) { + if (!entry->delete_pending) + (*callback)(entry->key, entry->data); + } + } +} + + +void stx_cache_traverse_lru(stx_cache_t *cache, + void (*callback)(void *key, void *data), + unsigned int n) +{ + stx_clist_t *q; + stx_cache_entry_t *entry; + + for (q = STX_CLIST_HEAD(&cache->lru_list); q != &cache->lru_list && n; + q = q->next, n--) { + entry = STX_CACHE_ENTRY_PTR(q); + (*callback)(entry->key, entry->data); + } +} + + +void stx_cache_traverse_mru(stx_cache_t *cache, + void (*callback)(void *key, void *data), + unsigned int n) +{ + stx_clist_t *q; + stx_cache_entry_t *entry; + + for (q = STX_CLIST_TAIL(&cache->lru_list); q != &cache->lru_list && n; + q = q->prev, n--) { + entry = STX_CACHE_ENTRY_PTR(q); + (*callback)(entry->key, entry->data); + } +} + + +size_t stx_cache_getsize(stx_cache_t *cache) +{ + return cache->cur_size; +} + + +size_t stx_cache_getweight(stx_cache_t *cache) +{ + return cache->cur_weight; +} + + +void stx_cache_getinfo(stx_cache_t *cache, stx_cache_info_t *info) +{ + info->max_size = cache->max_size; + info->max_weight = cache->max_weight; + info->hash_size = cache->hash_size; + info->cur_size = cache->cur_size; + info->cur_weight = cache->cur_weight; + info->hits = cache->hits; + info->lookups = cache->lookups; + info->inserts = cache->inserts; + info->deletes = cache->deletes; +} + + +/***************************************** + * Cache entry methods + */ + +stx_cache_entry_t *stx_cache_entry_create(void *key, void *data, + size_t weight) +{ + stx_cache_entry_t *newentry; + + newentry = (stx_cache_entry_t *)calloc(1, sizeof(stx_cache_entry_t)); + if (newentry == NULL) + return NULL; + + newentry->key = key; + newentry->data = data; + newentry->weight = weight; + + return newentry; +} + + +void stx_cache_entry_delete(stx_cache_t *cache, stx_cache_entry_t *entry) +{ + entry->delete_pending = 1; + + if (entry->ref_count > 0) + return; + + if (entry->pthis) { + *entry->pthis = entry->next; + if (entry->next) + entry->next->pthis = entry->pthis; + + cache->cur_size--; + cache->cur_weight -= entry->weight; + cache->deletes++; + STX_CLIST_REMOVE_LINK(&(entry->lru_link)); + } + + if (cache->cleanup_fn) + cache->cleanup_fn(entry->key, entry->data); + + entry->pthis = NULL; + entry->key = NULL; + entry->data = NULL; + free(entry); +} + + +stx_cache_entry_t *stx_cache_entry_lookup(stx_cache_t *cache, const void *key) +{ + unsigned long bucket; + stx_cache_entry_t *entry; + + cache->lookups++; + bucket = cache->key_hash_fn(key) % cache->hash_size; + for (entry = cache->table[bucket]; entry; entry = entry->next) { + if (!entry->delete_pending && cache->key_cmp_fn(key, entry->key) == 0) + break; + } + if (entry) { + cache->hits++; + if (entry->ref_count == 0) + STX_CLIST_REMOVE_LINK(&(entry->lru_link)); + entry->ref_count++; + } + + return entry; +} + + +void stx_cache_entry_release(stx_cache_t *cache, stx_cache_entry_t *entry) +{ + if (entry->ref_count == 0) + return; + + entry->ref_count--; + + if (entry->ref_count == 0) { + STX_CLIST_APPEND_LINK(&(entry->lru_link), &(cache->lru_list)); + if (entry->delete_pending) + stx_cache_entry_delete(cache, entry); + } +} + + +int stx_cache_entry_insert(stx_cache_t *cache, stx_cache_entry_t *entry) +{ + stx_cache_entry_t *old_entry; + unsigned long bucket; + + /* + * If cache capacity is exceeded, try to remove LRU entries till there is + * enough room or LRU list is empty. + */ + while (cache->cur_weight + entry->weight > cache->max_weight) { + old_entry = stx_cache_entry_getlru(cache); + if (!old_entry) { + /* cache capacity is exceeded and all entries are in use */ + return -1; + } + stx_cache_entry_delete(cache, old_entry); + } + + /* If cache size is exceeded, remove LRU entry */ + if (cache->cur_size >= cache->max_size) { + old_entry = stx_cache_entry_getlru(cache); + if (!old_entry) { + /* cache size is exceeded and all entries are in use */ + return -1; + } + stx_cache_entry_delete(cache, old_entry); + } + + /* Don't add duplicate entries in the cache */ + bucket = cache->key_hash_fn(entry->key) % cache->hash_size; + for (old_entry = cache->table[bucket]; old_entry; + old_entry = old_entry->next) { + if (!old_entry->delete_pending && + cache->key_cmp_fn(entry->key, old_entry->key) == 0) + break; + } + if (old_entry) + stx_cache_entry_delete(cache, old_entry); + + /* Insert in the hash table */ + entry->next = cache->table[bucket]; + cache->table[bucket] = entry; + entry->pthis = &cache->table[bucket]; + if (entry->next) + entry->next->pthis = &entry->next; + entry->ref_count++; + + cache->inserts++; + cache->cur_size++; + cache->cur_weight += entry->weight; + + return 0; +} + + +stx_cache_entry_t *stx_cache_entry_getlru(stx_cache_t *cache) +{ + if (STX_CLIST_IS_EMPTY(&(cache->lru_list))) + return NULL; + + return STX_CACHE_ENTRY_PTR(STX_CLIST_HEAD(&(cache->lru_list))); +} + + +int stx_cache_entry_sizeof(void) +{ + return (int)sizeof(stx_cache_entry_t); +} + + +void *stx_cache_entry_getdata(stx_cache_entry_t *entry) +{ + return entry->data; +} + + +void *stx_cache_entry_getkey(stx_cache_entry_t *entry) +{ + return entry->key; +} + + +size_t stx_cache_entry_getweight(stx_cache_entry_t *entry) +{ + return entry->weight; +} + diff --git a/trunk/research/st-1.9/extensions/print_stk.patch b/trunk/research/st-1.9/extensions/print_stk.patch new file mode 100644 index 000000000..f7451c7b0 --- /dev/null +++ b/trunk/research/st-1.9/extensions/print_stk.patch @@ -0,0 +1,367 @@ +Michael Abd-El-Malek contributed this patch. He wrote: +---------------------------------------- +Hello, + +This is a patch that enables programmatically dumping the stack of +every thread. This has been useful in debugging deadlocks, etc... +Our usage model is that the SIGUSR2 handler calls the new +_st_print_thread_stacks function, which dumps the stack for all +threads. A convenient feature is that for thread stacks that are the +same (which is common for application with a lot of worker threads +waiting for work), only one stack trace is printed, along with a +count of how many threads have that same stack. + +I use the glibc backtrace function to get the backtrace, and then use +popen to execute addr2line and convert memory addresses to file +names, function names, and line numbers. If glibc isn't available, +_st_print_thread_stacks just prints a warning. And this feature is +only available if DEBUG is turned on. + +We've found this feature extremely helpful when debugging. + +The patch can be a bit more robust (it assumes addr2line exists). +But I didn't want to go through the hassle of doing this, if the +StateThreads community doesn't want to use this patch. (In our +environment, addr2line will always be there.) + +Cheers, +Mike +---------------------------------------- +Invoking complex functions from a signal handler is not recommended, +plus this patch changes the behavior of existing API hooks. It will +not become part of State Threads proper but you may find it useful +nonetheless. This patch applies to st-1.5.2. + +diff -Nur Makefile.1.5.2 Makefile +--- Makefile.1.5.2 Wed Sep 7 14:19:50 2005 ++++ Makefile Wed Sep 7 14:33:08 2005 +@@ -255,7 +255,8 @@ + $(TARGETDIR)/stk.o \ + $(TARGETDIR)/sync.o \ + $(TARGETDIR)/key.o \ +- $(TARGETDIR)/io.o ++ $(TARGETDIR)/io.o \ ++ $(TARGETDIR)/backtrace.o + OBJS += $(EXTRA_OBJS) + HEADER = $(TARGETDIR)/st.h + SLIBRARY = $(TARGETDIR)/libst.a +diff -Nur backtrace.c.1.5.2 backtrace.c +--- backtrace.c.1.5.2 Wed Dec 31 16:00:00 1969 ++++ backtrace.c Wed Sep 7 13:40:21 2005 +@@ -0,0 +1,211 @@ ++/* ++ * The contents of this file are subject to the Mozilla Public ++ * License Version 1.1 (the "License"); you may not use this file ++ * except in compliance with the License. You may obtain a copy of ++ * the License at http://www.mozilla.org/MPL/ ++ * ++ * Software distributed under the License is distributed on an "AS ++ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or ++ * implied. See the License for the specific language governing ++ * rights and limitations under the License. ++ * ++ * Contributor(s): Michael Abd-El-Malek (mabdelmalek@cmu.edu) ++ * Carnegie Mellon University ++ * ++ * Alternatively, the contents of this file may be used under the ++ * terms of the GNU General Public License Version 2 or later (the ++ * "GPL"), in which case the provisions of the GPL are applicable ++ * instead of those above. If you wish to allow use of your ++ * version of this file only under the terms of the GPL and not to ++ * allow others to use your version of this file under the MPL, ++ * indicate your decision by deleting the provisions above and ++ * replace them with the notice and other provisions required by ++ * the GPL. If you do not delete the provisions above, a recipient ++ * may use your version of this file under either the MPL or the ++ * GPL. ++ */ ++ ++ ++ ++/* ++ * This file contains routines for printing a stack trace of all threads. ++ * Only works when DEBUG is defined and where glibc is available, since it ++ * provides the backtrace() function. ++ */ ++ ++#define _GNU_SOURCE /* to get program_invocation_name */ ++ ++#include ++#include ++ ++ ++#if defined(DEBUG) && defined(__GLIBC__) ++ ++#include ++#include "common.h" ++#include ++#include ++#include ++ ++ ++/* The maximum number of frames to get a stack trace for. If a thread has more ++ * frames than this, then we only show the latest X frames. */ ++#define MAX_NUM_FRAMES 64 ++ ++ ++typedef struct thread_stack_s { ++ uint32_t num_frames; ++ void* addresses[MAX_NUM_FRAMES]; /* frame pointers */ ++ char* locations[MAX_NUM_FRAMES]; /* file/function/line numbers */ ++ uint32_t num_matches; ++ ++ struct thread_stack_s* next; ++} thread_stack_t; ++ ++static thread_stack_t* stacks = NULL; ++ ++ ++/* Converts the function's memory addresses to function names, file names, and ++ * line numbers. Calls binutil's addr2line program. */ ++static void get_symbol_names(thread_stack_t *stack) ++{ ++ char program_to_run[1024], function[256], filename_lineno[256], temp[19]; ++ FILE* output; ++ int num_bytes_left; ++ uint32_t i; ++ ++ /* Construct the arguments to addr2line */ ++ num_bytes_left = sizeof(program_to_run); ++ num_bytes_left -= snprintf(program_to_run, sizeof(program_to_run), ++ "addr2line -fCe %s", program_invocation_name); ++ for (i = 0; i < stack->num_frames && num_bytes_left > 0; ++i) { ++ num_bytes_left -= snprintf(temp, sizeof(temp), " %p", stack->addresses[i]); ++ strncat(program_to_run, temp, num_bytes_left); ++ } ++ ++ /* Use popen to execute addr2line and read its ouput */ ++ output = popen(program_to_run, "r"); ++ for (i = 0; i < stack->num_frames; ++i) { ++ char* function_listing = (char*) malloc(512); ++ fscanf(output, "%255s\n", function); ++ fscanf(output, "%255s\n", filename_lineno); ++ snprintf(function_listing, 512, "%s at %s", function, filename_lineno); ++ stack->locations[i] = function_listing; ++ } ++ pclose(output); ++} ++ ++ ++static void print_stack(thread_stack_t* stack) ++{ ++ int skip_offset = 0, cmp_len; ++ uint32_t i; ++ ++ /* Get the function names/filenames/line numbers */ ++ get_symbol_names(stack); ++ ++ cmp_len = strlen("_st_iterate_threads_helper"); ++ ++ /* Print the backtrace */ ++ for (i = 0; i < stack->num_frames; ++i) { ++ /* Skip frames we don't have location info for */ ++ if (!strncmp(stack->locations[i], "??", 2)) { ++ continue; ++ } ++ ++ /* Skip the frames that are used for printing the stack trace */ ++ if (skip_offset) { ++ printf("\t#%2d %s %p\n", i - skip_offset, stack->locations[i], ++ stack->addresses[i]); ++ } else if (!strncmp(stack->locations[i], "_st_iterate_threads_helper", ++ cmp_len)) { ++ skip_offset = i + 1; ++ } ++ } ++} ++ ++ ++static void add_current_thread_stack(void) ++{ ++ thread_stack_t *new_stack = malloc(sizeof(thread_stack_t)); ++ thread_stack_t *search; ++ ++ /* Call glibc function to get the backtrace */ ++ new_stack->num_frames = backtrace(new_stack->addresses, MAX_NUM_FRAMES); ++ ++ /* Check if we have another stacks that is equivalent. If so, then coaelsce ++ * two stacks into one, to minimize output to user. */ ++ search = stacks; ++ while (search) { ++ if (search->num_frames == new_stack->num_frames && ++ !memcmp(search->addresses, new_stack->addresses, ++ search->num_frames * sizeof(void*))) { ++ /* Found an existing stack that is the same as this thread's stack */ ++ ++search->num_matches; ++ free(new_stack); ++ return; ++ } else { ++ search = search->next; ++ } ++ } ++ ++ /* This is a new stack. Add it to the list of stacks. */ ++ new_stack->num_matches = 1; ++ new_stack->next = stacks; ++ stacks = new_stack; ++} ++ ++static void print_stack_frames(void) ++{ ++ while (stacks) { ++ printf("\n%u thread(s) with this backtrace:\n", stacks->num_matches); ++ print_stack(stacks); ++ stacks = stacks->next; ++ } ++ printf("\n"); ++} ++ ++static void free_stacks(void) ++{ ++ uint32_t i; ++ while (stacks) { ++ thread_stack_t *next = stacks->next; ++ for (i = 0; i < stacks->num_frames; ++i) { ++ free(stacks->locations[i]); ++ } ++ free(stacks); ++ stacks = next; ++ } ++ stacks = NULL; ++} ++ ++ ++static void st_print_thread_stack(_st_thread_t *thread, int start_flag, ++ int end_flag) ++{ ++ if (end_flag == 0) { ++ add_current_thread_stack(); ++ } else { ++ print_stack_frames(); ++ } ++} ++ ++ ++void _st_print_thread_stacks(int ignore) ++{ ++ _st_iterate_threads_flag = 1; ++ _st_iterate_threads_helper(st_print_thread_stack); ++ _st_iterate_threads_flag = 0; ++ ++ /* Deallocate memory */ ++ free_stacks(); ++} ++ ++#else /* defined(DEBUG) && defined(__GLIBC__) */ ++ ++void _st_print_thread_stacks(int ignore) ++{ ++ printf("%s: need DEBUG mode and glibc-specific functions to read stack.\n", ++ __FUNCTION__); ++} ++#endif /* defined(DEBUG) && defined(__GLIBC__) */ +diff -Nur common.h.1.5.2 common.h +--- common.h.1.5.2 Wed Sep 7 14:18:37 2005 ++++ common.h Wed Sep 7 14:35:36 2005 +@@ -371,8 +371,18 @@ + */ + + #ifdef DEBUG +-void _st_iterate_threads(void); +-#define ST_DEBUG_ITERATE_THREADS() _st_iterate_threads() ++typedef void(*_st_func_ptr_t)(_st_thread_t *thread, ++ int start_flag, ++ int end_flag); ++/* Pointer to function that will be called on thread switch */ ++extern _st_func_ptr_t _st_iterate_func_ptr; ++extern int _st_iterate_threads_flag; ++/* Thread iteration function that will call an arbitrary function */ ++extern void _st_iterate_threads_helper(_st_func_ptr_t func); ++#define ST_DEBUG_ITERATE_THREADS() \ ++ if (_st_iterate_func_ptr) { \ ++ _st_iterate_threads_helper(_st_iterate_func_ptr); \ ++ } + #else + #define ST_DEBUG_ITERATE_THREADS() + #endif +diff -Nur public.h.1.5.2 public.h +--- public.h.1.5.2 Wed Sep 7 11:46:58 2005 ++++ public.h Wed Sep 7 13:38:46 2005 +@@ -171,8 +171,10 @@ + extern st_netfd_t st_open(const char *path, int oflags, mode_t mode); + + #ifdef DEBUG +-extern void _st_show_thread_stack(st_thread_t thread, const char *messg); ++extern void _st_show_thread_stack(st_thread_t thread, int start_flag, ++ int end_flag); + extern void _st_iterate_threads(void); ++extern void _st_print_thread_stacks(int ignore); + #endif + + #ifdef __cplusplus +diff -Nur sched.c.1.5.2 sched.c +--- sched.c.1.5.2 Wed Sep 7 10:48:05 2005 ++++ sched.c Wed Sep 7 13:38:46 2005 +@@ -919,16 +919,13 @@ + + + #ifdef DEBUG +-/* ARGSUSED */ +-void _st_show_thread_stack(_st_thread_t *thread, const char *messg) +-{ +- +-} +- + /* To be set from debugger */ + int _st_iterate_threads_flag = 0; ++/* Thread iteration function that will call an arbitrary function */ ++_st_func_ptr_t _st_iterate_func_ptr = NULL; + +-void _st_iterate_threads(void) ++/* This function iterates over all threads, calling "func" for each thread. */ ++void _st_iterate_threads_helper(_st_func_ptr_t func) + { + static _st_thread_t *thread = NULL; + static jmp_buf orig_jb, save_jb; +@@ -944,16 +941,20 @@ + + if (thread) { + memcpy(thread->context, save_jb, sizeof(jmp_buf)); +- _st_show_thread_stack(thread, NULL); ++ func(thread, 0, 0); + } else { + if (MD_SETJMP(orig_jb)) { + _st_iterate_threads_flag = 0; ++ _st_iterate_func_ptr = NULL; + thread = NULL; +- _st_show_thread_stack(thread, "Iteration completed"); ++ /* Last thread to iterate through */ ++ func(thread, 0, 1); + return; + } ++ /* First thread to iterate through */ + thread = _ST_CURRENT_THREAD(); +- _st_show_thread_stack(thread, "Iteration started"); ++ _st_iterate_func_ptr = func; ++ func(thread, 1, 0); + } + + q = thread->tlink.next; +@@ -966,5 +967,17 @@ + memcpy(save_jb, thread->context, sizeof(jmp_buf)); + MD_LONGJMP(thread->context, 1); + } ++ ++/* ARGSUSED */ ++void _st_show_thread_stack(_st_thread_t *thread, int start_flag, int end_flag) ++{ ++} ++ ++/* Iterate over threads inside debugger; see st/README */ ++void _st_iterate_threads(void) ++{ ++ _st_iterate_threads_helper(_st_show_thread_stack); ++} ++ + #endif /* DEBUG */ + diff --git a/trunk/research/st-1.9/extensions/stx.h b/trunk/research/st-1.9/extensions/stx.h new file mode 100644 index 000000000..8371e0d93 --- /dev/null +++ b/trunk/research/st-1.9/extensions/stx.h @@ -0,0 +1,91 @@ +#ifndef _STX_H_ +#define _STX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "st.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/***************************************** + * Basic types definitions + */ + +typedef struct _stx_centry stx_cache_entry_t; +typedef struct _stx_cache stx_cache_t; + +/* This is public type */ +typedef struct _stx_cache_info { + size_t max_size; + size_t max_weight; + size_t hash_size; + size_t cur_size; + size_t cur_weight; + unsigned long hits; + unsigned long lookups; + unsigned long inserts; + unsigned long deletes; +} stx_cache_info_t; + + +/***************************************** + * Cache and cache entry methods + */ + +stx_cache_t *stx_cache_create(size_t max_size, size_t max_weight, + size_t hash_size, + unsigned long (*key_hash_fn)(const void *key), + long (*key_cmp_fn)(const void *key1, + const void *key2), + void (*cleanup_fn)(void *key, void *data)); +void stx_cache_empty(stx_cache_t *cache); +void stx_cache_traverse(stx_cache_t *cache, + void (*callback)(void *key, void *data)); +void stx_cache_traverse_lru(stx_cache_t *, void (*)(void *, void *), + unsigned int); +void stx_cache_traverse_mru(stx_cache_t *, void (*)(void *, void *), + unsigned int); +void stx_cache_getinfo(stx_cache_t *cache, stx_cache_info_t *info); +size_t stx_cache_getsize(stx_cache_t *cache); +size_t stx_cache_getweight(stx_cache_t *cache); + + +stx_cache_entry_t *stx_cache_entry_create(void *key, void *data, + size_t weight); +void stx_cache_entry_delete(stx_cache_t *cache, stx_cache_entry_t *entry); +stx_cache_entry_t *stx_cache_entry_lookup(stx_cache_t *cache, const void *key); +void stx_cache_entry_release(stx_cache_t *, stx_cache_entry_t *); +int stx_cache_entry_insert(stx_cache_t *cache, stx_cache_entry_t *entry); +stx_cache_entry_t *stx_cache_entry_getlru(stx_cache_t *cache); +int stx_cache_entry_sizeof(void); +void *stx_cache_entry_getdata(stx_cache_entry_t *entry); +void *stx_cache_entry_getkey(stx_cache_entry_t *entry); +size_t stx_cache_entry_getweight(stx_cache_entry_t *entry); + + +int stx_dns_cache_init(size_t max_size, size_t max_bytes, size_t hash_size); +void stx_dns_cache_getinfo(stx_cache_info_t *info); +int stx_dns_getaddrlist(const char *hostname, struct in_addr *addrs, + int *num_addrs, st_utime_t timeout); +int stx_dns_getaddr(const char *hostname, struct in_addr *addr, + st_utime_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif /* !_STX_H_ */ + diff --git a/trunk/research/st-1.9/extensions/stx_fileio.c b/trunk/research/st-1.9/extensions/stx_fileio.c new file mode 100644 index 000000000..cb24346e8 --- /dev/null +++ b/trunk/research/st-1.9/extensions/stx_fileio.c @@ -0,0 +1,197 @@ +/* + * File I/O extension to the State Threads Library. + */ + +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the file I/O extension to the State Threads Library. + * + * The Initial Developer of the Original Code is Jeff + * . Portions created by the Initial + * Developer are Copyright (C) 2002 the Initial Developer. All Rights + * Reserved. + * + * Contributor(s): (none) + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#include + +#include "stx_fileio.h" + +#define STX_FILEIO_SIGNUM SIGUSR2 + +typedef struct { + st_netfd_t data_fd; + st_netfd_t control_fd; + pid_t pid; +} fileio_data_t; + +#define FILEREADER_MAX_READ 1024 + +typedef struct { + off_t offset; + ssize_t nbytes; +} file_reader_cb_t; + +/** + * Fork a process to read a file and return its pid. Receives + * offset/length commands from control stream and sends corresponding data + * to out stream. A zero length on the control stream signals an end. + * + * @param fd stream from which to read + * @param control_out receives the file descriptor to which control commands can be sent + * @param fd_out receives the file descriptor from which the output of the command can be read. + * @return PID of the process created to execute the command + */ +pid_t +file_reader(int fd, int *fd_control, int *fd_out) +{ + pid_t pid; + int control_pipe[2], out_pipe[2]; + + if (pipe(control_pipe) < 0 || pipe(out_pipe) < 0) + return (pid_t)-1; + + pid = fork(); + if (pid == (pid_t) -1) + { + close(control_pipe[0]); + close(control_pipe[1]); + close(out_pipe[0]); + close(out_pipe[1]); + return pid; + } + else if (pid == (pid_t) 0) + { + // child + off_t pos = 0; + file_reader_cb_t cb; + char buf[FILEREADER_MAX_READ]; + if (fd == -1) + _exit(EXIT_FAILURE); + + while (sizeof(cb) == read(control_pipe[0], &cb, sizeof(cb))) { + ssize_t nb; + if (0 >= cb.nbytes) + goto clean_exit; + if (pos != cb.offset) { + pos = lseek(fd, cb.offset, SEEK_SET); + if (pos == (off_t)-1) + break; + } + nb = read(fd, buf, cb.nbytes); + if (nb == (ssize_t)-1) + break; + pos += nb; + write(out_pipe[1], (char *)&nb, sizeof(nb)); + write(out_pipe[1], buf, nb); + } + perror("ERROR: file_reader: "); + clean_exit: + close(control_pipe[0]); + close(control_pipe[1]); + close(out_pipe[0]); + close(out_pipe[1]); + _exit(EXIT_SUCCESS); + } + + // parent + close(out_pipe[1]); + close(control_pipe[0]); + *fd_out = out_pipe[0]; + *fd_control = control_pipe[1]; + return pid; +} + +/** + * fileio_data_t destructor callback + */ +static void +fileio_data_destructor(void *dat_in) +{ + if (dat_in) { + fileio_data_t *dat = (fileio_data_t *)dat_in; + file_reader_cb_t cb; + cb.offset = 0; + cb.nbytes = 0; + st_write(dat->control_fd, (char *)&cb, sizeof(cb), + ST_UTIME_NO_TIMEOUT); + waitpid(dat->pid, NULL, 0); + st_netfd_close(dat->control_fd); + st_netfd_close(dat->data_fd); + free(dat_in); + } +} + +/** + * Retrieve fileio_data_t struct from an st descriptor. Create and store + * a new one if needed. + */ +static fileio_data_t *get_fileio_data(st_netfd_t fd) +{ + fileio_data_t *dat = (fileio_data_t *)st_netfd_getspecific(fd); + if (!dat) { + int fd_control, fd_out; + pid_t pid = file_reader(st_netfd_fileno(fd), &fd_control, &fd_out); + if (pid != (pid_t)-1) { + dat = (fileio_data_t *)calloc(1, sizeof(fileio_data_t)); + dat->control_fd = st_netfd_open(fd_control); + dat->data_fd = st_netfd_open(fd_out); + dat->pid = pid; + st_netfd_setspecific(fd, dat, fileio_data_destructor); + } + } + return dat; +} + +/** + * Read data from the specified section of a file. Uses a forked + * file_reader process to do the actual reading so as to avoid causing all + * State Threads to block. + * + * @param fd must refer to a seekable file. + * @param offset absolute offset within the file + * @param buf output buffer + * @param nbytes size of the output buffer + * @param timeout + */ +ssize_t +stx_file_read(st_netfd_t fd, off_t offset, void *buf, size_t nbytes, st_utime_t timeout) +{ + fileio_data_t *dat = get_fileio_data(fd); + if (dat) { + file_reader_cb_t cb; + ssize_t ret = (ssize_t)-1; + cb.offset = offset; + cb.nbytes = nbytes; + st_write(dat->control_fd, (char *)&cb, sizeof(cb), timeout); + if (sizeof(ret) == st_read(dat->data_fd, (char *)&ret, sizeof(ret), timeout) && 0 < ret && ret <= nbytes) { + return st_read(dat->data_fd, buf, ret, timeout); + } else { + return ret; + } + } + + return (ssize_t)-1; +} diff --git a/trunk/research/st-1.9/extensions/stx_fileio.h b/trunk/research/st-1.9/extensions/stx_fileio.h new file mode 100644 index 000000000..b6bec190b --- /dev/null +++ b/trunk/research/st-1.9/extensions/stx_fileio.h @@ -0,0 +1,52 @@ +/* + * File I/O extension to the State Threads Library. + */ + +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the file I/O extension to the State Threads Library. + * + * The Initial Developer of the Original Code is Jeff + * . Portions created by the Initial + * Developer are Copyright (C) 2002 the Initial Developer. All Rights + * Reserved. + * + * Contributor(s): (none) + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#ifndef __STX_FILEIO_H__ +#define __STX_FILEIO_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern ssize_t stx_file_read(st_netfd_t fd, off_t offset, void *buf, size_t nbytes, st_utime_t timeout); + +#ifdef __cplusplus +} +#endif +#endif /* !__STX_FILEIO_H__ */ diff --git a/trunk/research/st-1.9/extensions/testdns.c b/trunk/research/st-1.9/extensions/testdns.c new file mode 100644 index 000000000..aa896b25e --- /dev/null +++ b/trunk/research/st-1.9/extensions/testdns.c @@ -0,0 +1,112 @@ +#include "stx.h" +#include +#include + + +#define MAX_ADDRS 128 +#define TIMEOUT (4*1000000LL) + +static void do_resolve(const char *host) +{ + struct in_addr addrs[MAX_ADDRS]; + int i, n = MAX_ADDRS; + + if (stx_dns_getaddrlist(host, addrs, &n, TIMEOUT) < 0) { + fprintf(stderr, "stx_dns_getaddrlist: can't resolve %s: ", host); + if (h_errno == NETDB_INTERNAL) + perror(""); + else + herror(""); + } else { + if (n > 0) + printf("%-40s %s\n", (char *)host, inet_ntoa(addrs[0])); + for (i = 1; i < n; i++) + printf("%-40s %s\n", "", inet_ntoa(addrs[i])); + } +} + +static void show_info(void) +{ + stx_cache_info_t info; + + stx_dns_cache_getinfo(&info); + printf("DNS cache info:\n\n"); + printf("max_size: %8d\n", (int)info.max_size); + printf("capacity: %8d bytes\n", (int)info.max_weight); + printf("hash_size: %8d\n", (int)info.hash_size); + printf("cur_size: %8d\n" + "cur_mem: %8d bytes\n" + "hits: %8d\n" + "lookups: %8d\n" + "inserts: %8d\n" + "deletes: %8d\n", + (int)info.cur_size, (int)info.cur_weight, (int)info.hits, + (int)info.lookups, (int)info.inserts, (int)info.deletes); +} + +extern stx_cache_t *_stx_dns_cache; + +static void printhost(void *host, void *data) +{ + printf("%s\n", (char *)host); +} + +static void show_lru(void) +{ + printf("LRU hosts:\n\n"); + stx_cache_traverse_lru(_stx_dns_cache, printhost, 10); +} + +static void show_mru(void) +{ + printf("MRU hosts:\n\n"); + stx_cache_traverse_mru(_stx_dns_cache, printhost, 10); +} + +static void flush_cache(void) +{ + stx_cache_empty(_stx_dns_cache); + printf("DNS cache is empty\n"); +} + + +int main() +{ + char line[256]; + char str[sizeof(line)]; + + st_init(); + stx_dns_cache_init(100, 10000, 101); + + for ( ; ; ) { + fputs("> ", stdout); + fflush(stdout); + if (!fgets(line, sizeof(line), stdin)) + break; + if (sscanf(line, "%s", str) != 1) + continue; + if (strcmp(str, "exit") == 0 || strcmp(str, "quit") == 0) + break; + if (strcmp(str, "info") == 0) { + show_info(); + continue; + } + if (strcmp(str, "lru") == 0) { + show_lru(); + continue; + } + if (strcmp(str, "mru") == 0) { + show_mru(); + continue; + } + if (strcmp(str, "flush") == 0) { + flush_cache(); + continue; + } + + do_resolve(str); + } + + return 0; +} + diff --git a/trunk/research/st-1.9/io.c b/trunk/research/st-1.9/io.c new file mode 100644 index 000000000..f95ff8c6f --- /dev/null +++ b/trunk/research/st-1.9/io.c @@ -0,0 +1,778 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + + +#if EAGAIN != EWOULDBLOCK +#define _IO_NOT_READY_ERROR ((errno == EAGAIN) || (errno == EWOULDBLOCK)) +#else +#define _IO_NOT_READY_ERROR (errno == EAGAIN) +#endif + +#define _LOCAL_MAXIOV 16 + +/* File descriptor object free list */ +static _st_netfd_t *_st_netfd_freelist = NULL; +/* Maximum number of file descriptors that the process can open */ +static int _st_osfd_limit = -1; + +static void _st_netfd_free_aux_data(_st_netfd_t *fd); + +int _st_io_init(void) +{ + struct sigaction sigact; + struct rlimit rlim; + int fdlim; + + /* Ignore SIGPIPE */ + sigact.sa_handler = SIG_IGN; + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = 0; + if (sigaction(SIGPIPE, &sigact, NULL) < 0) + return -1; + + /* Set maximum number of open file descriptors */ + if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) + return -1; + + fdlim = (*_st_eventsys->fd_getlimit)(); + if (fdlim > 0 && rlim.rlim_max > (rlim_t) fdlim) { + rlim.rlim_max = fdlim; + } + rlim.rlim_cur = rlim.rlim_max; + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) + return -1; + _st_osfd_limit = (int) rlim.rlim_max; + + return 0; +} + + +int st_getfdlimit(void) +{ + return _st_osfd_limit; +} + + +void st_netfd_free(_st_netfd_t *fd) +{ + if (!fd->inuse) + return; + + fd->inuse = 0; + if (fd->aux_data) + _st_netfd_free_aux_data(fd); + if (fd->private_data && fd->destructor) + (*(fd->destructor))(fd->private_data); + fd->private_data = NULL; + fd->destructor = NULL; + fd->next = _st_netfd_freelist; + _st_netfd_freelist = fd; +} + + +static _st_netfd_t *_st_netfd_new(int osfd, int nonblock, int is_socket) +{ + _st_netfd_t *fd; + int flags = 1; + + if ((*_st_eventsys->fd_new)(osfd) < 0) + return NULL; + + if (_st_netfd_freelist) { + fd = _st_netfd_freelist; + _st_netfd_freelist = _st_netfd_freelist->next; + } else { + fd = calloc(1, sizeof(_st_netfd_t)); + if (!fd) + return NULL; + } + + fd->osfd = osfd; + fd->inuse = 1; + fd->next = NULL; + + if (nonblock) { + /* Use just one system call */ + if (is_socket && ioctl(osfd, FIONBIO, &flags) != -1) + return fd; + /* Do it the Posix way */ + if ((flags = fcntl(osfd, F_GETFL, 0)) < 0 || + fcntl(osfd, F_SETFL, flags | O_NONBLOCK) < 0) { + st_netfd_free(fd); + return NULL; + } + } + + return fd; +} + + +_st_netfd_t *st_netfd_open(int osfd) +{ + return _st_netfd_new(osfd, 1, 0); +} + + +_st_netfd_t *st_netfd_open_socket(int osfd) +{ + return _st_netfd_new(osfd, 1, 1); +} + + +int st_netfd_close(_st_netfd_t *fd) +{ + if ((*_st_eventsys->fd_close)(fd->osfd) < 0) + return -1; + + st_netfd_free(fd); + return close(fd->osfd); +} + + +int st_netfd_fileno(_st_netfd_t *fd) +{ + return (fd->osfd); +} + + +void st_netfd_setspecific(_st_netfd_t *fd, void *value, + _st_destructor_t destructor) +{ + if (value != fd->private_data) { + /* Free up previously set non-NULL data value */ + if (fd->private_data && fd->destructor) + (*(fd->destructor))(fd->private_data); + } + fd->private_data = value; + fd->destructor = destructor; +} + + +void *st_netfd_getspecific(_st_netfd_t *fd) +{ + return (fd->private_data); +} + + +/* + * Wait for I/O on a single descriptor. + */ +int st_netfd_poll(_st_netfd_t *fd, int how, st_utime_t timeout) +{ + struct pollfd pd; + int n; + + pd.fd = fd->osfd; + pd.events = (short) how; + pd.revents = 0; + + if ((n = st_poll(&pd, 1, timeout)) < 0) + return -1; + if (n == 0) { + /* Timed out */ + errno = ETIME; + return -1; + } + if (pd.revents & POLLNVAL) { + errno = EBADF; + return -1; + } + + return 0; +} + + +#ifdef MD_ALWAYS_UNSERIALIZED_ACCEPT +/* No-op */ +int st_netfd_serialize_accept(_st_netfd_t *fd) +{ + fd->aux_data = NULL; + return 0; +} + +/* No-op */ +static void _st_netfd_free_aux_data(_st_netfd_t *fd) +{ + fd->aux_data = NULL; +} + +_st_netfd_t *st_accept(_st_netfd_t *fd, struct sockaddr *addr, int *addrlen, + st_utime_t timeout) +{ + int osfd, err; + _st_netfd_t *newfd; + + while ((osfd = accept(fd->osfd, addr, (socklen_t *)addrlen)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return NULL; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return NULL; + } + + /* On some platforms the new socket created by accept() inherits */ + /* the nonblocking attribute of the listening socket */ +#if defined (MD_ACCEPT_NB_INHERITED) + newfd = _st_netfd_new(osfd, 0, 1); +#elif defined (MD_ACCEPT_NB_NOT_INHERITED) + newfd = _st_netfd_new(osfd, 1, 1); +#else +#error Unknown OS +#endif + + if (!newfd) { + err = errno; + close(osfd); + errno = err; + } + + return newfd; +} + +#else /* MD_ALWAYS_UNSERIALIZED_ACCEPT */ +/* + * On some platforms accept() calls from different processes + * on the same listen socket must be serialized. + * The following code serializes accept()'s without process blocking. + * A pipe is used as an inter-process semaphore. + */ +int st_netfd_serialize_accept(_st_netfd_t *fd) +{ + _st_netfd_t **p; + int osfd[2], err; + + if (fd->aux_data) { + errno = EINVAL; + return -1; + } + if ((p = (_st_netfd_t **)calloc(2, sizeof(_st_netfd_t *))) == NULL) + return -1; + if (pipe(osfd) < 0) { + free(p); + return -1; + } + if ((p[0] = st_netfd_open(osfd[0])) != NULL && + (p[1] = st_netfd_open(osfd[1])) != NULL && + write(osfd[1], " ", 1) == 1) { + fd->aux_data = p; + return 0; + } + /* Error */ + err = errno; + if (p[0]) + st_netfd_free(p[0]); + if (p[1]) + st_netfd_free(p[1]); + close(osfd[0]); + close(osfd[1]); + free(p); + errno = err; + + return -1; +} + +static void _st_netfd_free_aux_data(_st_netfd_t *fd) +{ + _st_netfd_t **p = (_st_netfd_t **) fd->aux_data; + + st_netfd_close(p[0]); + st_netfd_close(p[1]); + free(p); + fd->aux_data = NULL; +} + +_st_netfd_t *st_accept(_st_netfd_t *fd, struct sockaddr *addr, int *addrlen, + st_utime_t timeout) +{ + int osfd, err; + _st_netfd_t *newfd; + _st_netfd_t **p = (_st_netfd_t **) fd->aux_data; + ssize_t n; + char c; + + for ( ; ; ) { + if (p == NULL) { + osfd = accept(fd->osfd, addr, (socklen_t *)addrlen); + } else { + /* Get the lock */ + n = st_read(p[0], &c, 1, timeout); + if (n < 0) + return NULL; + ST_ASSERT(n == 1); + /* Got the lock */ + osfd = accept(fd->osfd, addr, (socklen_t *)addrlen); + /* Unlock */ + err = errno; + n = st_write(p[1], &c, 1, timeout); + ST_ASSERT(n == 1); + errno = err; + } + if (osfd >= 0) + break; + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return NULL; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return NULL; + } + + /* On some platforms the new socket created by accept() inherits */ + /* the nonblocking attribute of the listening socket */ +#if defined (MD_ACCEPT_NB_INHERITED) + newfd = _st_netfd_new(osfd, 0, 1); +#elif defined (MD_ACCEPT_NB_NOT_INHERITED) + newfd = _st_netfd_new(osfd, 1, 1); +#else +#error Unknown OS +#endif + + if (!newfd) { + err = errno; + close(osfd); + errno = err; + } + + return newfd; +} +#endif /* MD_ALWAYS_UNSERIALIZED_ACCEPT */ + + +int st_connect(_st_netfd_t *fd, const struct sockaddr *addr, int addrlen, + st_utime_t timeout) +{ + int n, err = 0; + + while (connect(fd->osfd, addr, addrlen) < 0) { + if (errno != EINTR) { + /* + * On some platforms, if connect() is interrupted (errno == EINTR) + * after the kernel binds the socket, a subsequent connect() + * attempt will fail with errno == EADDRINUSE. Ignore EADDRINUSE + * iff connect() was previously interrupted. See Rich Stevens' + * "UNIX Network Programming," Vol. 1, 2nd edition, p. 413 + * ("Interrupted connect"). + */ + if (errno != EINPROGRESS && (errno != EADDRINUSE || err == 0)) + return -1; + /* Wait until the socket becomes writable */ + if (st_netfd_poll(fd, POLLOUT, timeout) < 0) + return -1; + /* Try to find out whether the connection setup succeeded or failed */ + n = sizeof(int); + if (getsockopt(fd->osfd, SOL_SOCKET, SO_ERROR, (char *)&err, + (socklen_t *)&n) < 0) + return -1; + if (err) { + errno = err; + return -1; + } + break; + } + err = 1; + } + + return 0; +} + + +ssize_t st_read(_st_netfd_t *fd, void *buf, size_t nbyte, st_utime_t timeout) +{ + ssize_t n; + + while ((n = read(fd->osfd, buf, nbyte)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return -1; + } + + return n; +} + + +int st_read_resid(_st_netfd_t *fd, void *buf, size_t *resid, + st_utime_t timeout) +{ + struct iovec iov, *riov; + int riov_size, rv; + + iov.iov_base = buf; + iov.iov_len = *resid; + riov = &iov; + riov_size = 1; + rv = st_readv_resid(fd, &riov, &riov_size, timeout); + *resid = iov.iov_len; + return rv; +} + + +ssize_t st_readv(_st_netfd_t *fd, const struct iovec *iov, int iov_size, + st_utime_t timeout) +{ + ssize_t n; + + while ((n = readv(fd->osfd, iov, iov_size)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return -1; + } + + return n; +} + +int st_readv_resid(_st_netfd_t *fd, struct iovec **iov, int *iov_size, + st_utime_t timeout) +{ + ssize_t n; + + while (*iov_size > 0) { + if (*iov_size == 1) + n = read(fd->osfd, (*iov)->iov_base, (*iov)->iov_len); + else + n = readv(fd->osfd, *iov, *iov_size); + if (n < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + } else if (n == 0) + break; + else { + while ((size_t) n >= (*iov)->iov_len) { + n -= (*iov)->iov_len; + (*iov)->iov_base = (char *) (*iov)->iov_base + (*iov)->iov_len; + (*iov)->iov_len = 0; + (*iov)++; + (*iov_size)--; + if (n == 0) + break; + } + if (*iov_size == 0) + break; + (*iov)->iov_base = (char *) (*iov)->iov_base + n; + (*iov)->iov_len -= n; + } + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return -1; + } + + return 0; +} + + +ssize_t st_read_fully(_st_netfd_t *fd, void *buf, size_t nbyte, + st_utime_t timeout) +{ + size_t resid = nbyte; + return st_read_resid(fd, buf, &resid, timeout) == 0 ? + (ssize_t) (nbyte - resid) : -1; +} + + +int st_write_resid(_st_netfd_t *fd, const void *buf, size_t *resid, + st_utime_t timeout) +{ + struct iovec iov, *riov; + int riov_size, rv; + + iov.iov_base = (void *) buf; /* we promise not to modify buf */ + iov.iov_len = *resid; + riov = &iov; + riov_size = 1; + rv = st_writev_resid(fd, &riov, &riov_size, timeout); + *resid = iov.iov_len; + return rv; +} + + +ssize_t st_write(_st_netfd_t *fd, const void *buf, size_t nbyte, + st_utime_t timeout) +{ + size_t resid = nbyte; + return st_write_resid(fd, buf, &resid, timeout) == 0 ? + (ssize_t) (nbyte - resid) : -1; +} + + +ssize_t st_writev(_st_netfd_t *fd, const struct iovec *iov, int iov_size, + st_utime_t timeout) +{ + ssize_t n, rv; + size_t nleft, nbyte; + int index, iov_cnt; + struct iovec *tmp_iov; + struct iovec local_iov[_LOCAL_MAXIOV]; + + /* Calculate the total number of bytes to be sent */ + nbyte = 0; + for (index = 0; index < iov_size; index++) + nbyte += iov[index].iov_len; + + rv = (ssize_t)nbyte; + nleft = nbyte; + tmp_iov = (struct iovec *) iov; /* we promise not to modify iov */ + iov_cnt = iov_size; + + while (nleft > 0) { + if (iov_cnt == 1) { + if (st_write(fd, tmp_iov[0].iov_base, nleft, timeout) != (ssize_t) nleft) + rv = -1; + break; + } + if ((n = writev(fd->osfd, tmp_iov, iov_cnt)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) { + rv = -1; + break; + } + } else { + if ((size_t) n == nleft) + break; + nleft -= n; + /* Find the next unwritten vector */ + n = (ssize_t)(nbyte - nleft); + for (index = 0; (size_t) n >= iov[index].iov_len; index++) + n -= iov[index].iov_len; + + if (tmp_iov == iov) { + /* Must copy iov's around */ + if (iov_size - index <= _LOCAL_MAXIOV) { + tmp_iov = local_iov; + } else { + tmp_iov = calloc(1, (iov_size - index) * sizeof(struct iovec)); + if (tmp_iov == NULL) + return -1; + } + } + + /* Fill in the first partial read */ + tmp_iov[0].iov_base = &(((char *)iov[index].iov_base)[n]); + tmp_iov[0].iov_len = iov[index].iov_len - n; + index++; + /* Copy the remaining vectors */ + for (iov_cnt = 1; index < iov_size; iov_cnt++, index++) { + tmp_iov[iov_cnt].iov_base = iov[index].iov_base; + tmp_iov[iov_cnt].iov_len = iov[index].iov_len; + } + } + /* Wait until the socket becomes writable */ + if (st_netfd_poll(fd, POLLOUT, timeout) < 0) { + rv = -1; + break; + } + } + + if (tmp_iov != iov && tmp_iov != local_iov) + free(tmp_iov); + + return rv; +} + + +int st_writev_resid(_st_netfd_t *fd, struct iovec **iov, int *iov_size, + st_utime_t timeout) +{ + ssize_t n; + + while (*iov_size > 0) { + if (*iov_size == 1) + n = write(fd->osfd, (*iov)->iov_base, (*iov)->iov_len); + else + n = writev(fd->osfd, *iov, *iov_size); + if (n < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + } else { + while ((size_t) n >= (*iov)->iov_len) { + n -= (*iov)->iov_len; + (*iov)->iov_base = (char *) (*iov)->iov_base + (*iov)->iov_len; + (*iov)->iov_len = 0; + (*iov)++; + (*iov_size)--; + if (n == 0) + break; + } + if (*iov_size == 0) + break; + (*iov)->iov_base = (char *) (*iov)->iov_base + n; + (*iov)->iov_len -= n; + } + /* Wait until the socket becomes writable */ + if (st_netfd_poll(fd, POLLOUT, timeout) < 0) + return -1; + } + + return 0; +} + + +/* + * Simple I/O functions for UDP. + */ +int st_recvfrom(_st_netfd_t *fd, void *buf, int len, struct sockaddr *from, + int *fromlen, st_utime_t timeout) +{ + int n; + + while ((n = recvfrom(fd->osfd, buf, len, 0, from, (socklen_t *)fromlen)) + < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return -1; + } + + return n; +} + + +int st_sendto(_st_netfd_t *fd, const void *msg, int len, + const struct sockaddr *to, int tolen, st_utime_t timeout) +{ + int n; + + while ((n = sendto(fd->osfd, msg, len, 0, to, tolen)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes writable */ + if (st_netfd_poll(fd, POLLOUT, timeout) < 0) + return -1; + } + + return n; +} + + +int st_recvmsg(_st_netfd_t *fd, struct msghdr *msg, int flags, + st_utime_t timeout) +{ + int n; + + while ((n = recvmsg(fd->osfd, msg, flags)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes readable */ + if (st_netfd_poll(fd, POLLIN, timeout) < 0) + return -1; + } + + return n; +} + + +int st_sendmsg(_st_netfd_t *fd, const struct msghdr *msg, int flags, + st_utime_t timeout) +{ + int n; + + while ((n = sendmsg(fd->osfd, msg, flags)) < 0) { + if (errno == EINTR) + continue; + if (!_IO_NOT_READY_ERROR) + return -1; + /* Wait until the socket becomes writable */ + if (st_netfd_poll(fd, POLLOUT, timeout) < 0) + return -1; + } + + return n; +} + + + +/* + * To open FIFOs or other special files. + */ +_st_netfd_t *st_open(const char *path, int oflags, mode_t mode) +{ + int osfd, err; + _st_netfd_t *newfd; + + while ((osfd = open(path, oflags | O_NONBLOCK, mode)) < 0) { + if (errno != EINTR) + return NULL; + } + + newfd = _st_netfd_new(osfd, 0, 0); + if (!newfd) { + err = errno; + close(osfd); + errno = err; + } + + return newfd; +} + diff --git a/trunk/research/st-1.9/key.c b/trunk/research/st-1.9/key.c new file mode 100644 index 000000000..9708c355d --- /dev/null +++ b/trunk/research/st-1.9/key.c @@ -0,0 +1,121 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#include +#include +#include "common.h" + + +/* + * Destructor table for per-thread private data + */ +static _st_destructor_t _st_destructors[ST_KEYS_MAX]; +static int key_max = 0; + + +/* + * Return a key to be used for thread specific data + */ +int st_key_create(int *keyp, _st_destructor_t destructor) +{ + if (key_max >= ST_KEYS_MAX) { + errno = EAGAIN; + return -1; + } + + *keyp = key_max++; + _st_destructors[*keyp] = destructor; + + return 0; +} + + +int st_key_getlimit(void) +{ + return ST_KEYS_MAX; +} + + +int st_thread_setspecific(int key, void *value) +{ + _st_thread_t *me = _ST_CURRENT_THREAD(); + + if (key < 0 || key >= key_max) { + errno = EINVAL; + return -1; + } + + if (value != me->private_data[key]) { + /* free up previously set non-NULL data value */ + if (me->private_data[key] && _st_destructors[key]) { + (*_st_destructors[key])(me->private_data[key]); + } + me->private_data[key] = value; + } + + return 0; +} + + +void *st_thread_getspecific(int key) +{ + if (key < 0 || key >= key_max) + return NULL; + + return ((_ST_CURRENT_THREAD())->private_data[key]); +} + + +/* + * Free up all per-thread private data + */ +void _st_thread_cleanup(_st_thread_t *thread) +{ + int key; + + for (key = 0; key < key_max; key++) { + if (thread->private_data[key] && _st_destructors[key]) { + (*_st_destructors[key])(thread->private_data[key]); + thread->private_data[key] = NULL; + } + } +} + diff --git a/trunk/research/st-1.9/libst.def b/trunk/research/st-1.9/libst.def new file mode 100644 index 000000000..6eaf149a9 --- /dev/null +++ b/trunk/research/st-1.9/libst.def @@ -0,0 +1,51 @@ +EXPORTS + st_accept @62 + st_cond_broadcast @63 + st_cond_destroy @64 + st_cond_new @65 + st_cond_signal @66 + st_cond_timedwait @67 + st_cond_wait @68 + st_connect @69 + st_getfdlimit @70 + st_init @71 + st_key_create @72 + st_key_getlimit @73 + st_mutex_destroy @74 + st_mutex_lock @75 + st_mutex_new @76 + st_mutex_trylock @77 + st_mutex_unlock @78 + st_netfd_close @79 + st_netfd_fileno @80 + st_netfd_free @81 + st_netfd_getspecific @82 + st_netfd_open @83 + st_netfd_open_socket @84 + st_netfd_poll @85 + st_netfd_serialize_accept @86 + st_netfd_setspecific @87 + st_open @88 + st_poll @89 + st_randomize_stacks @90 + st_read @91 + st_read_fully @92 + st_read_resid @93 + st_recvfrom @94 + st_sendto @95 + st_sleep @96 + st_thread_create @97 + st_thread_exit @98 + st_thread_getspecific @99 + st_thread_interrupt @100 + st_thread_join @101 + st_thread_self @102 + st_thread_setspecific @103 + st_time @104 + st_timecache_set @105 + st_usleep @106 + st_utime @107 + st_utime_last_clock @108 + st_write @109 + st_write_resid @110 + st_writev @111 diff --git a/trunk/research/st-1.9/md.S b/trunk/research/st-1.9/md.S new file mode 100644 index 000000000..ab4f7b557 --- /dev/null +++ b/trunk/research/st-1.9/md.S @@ -0,0 +1,431 @@ +/* + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. + * All Rights Reserved. + */ + +#if defined(__ia64__) + +/* + * The internal __jmp_buf layout is different from one used + * by setjmp()/longjmp(). + * + * Offset Description + * ------ ----------- + * 0x000 stack pointer (r12) + * 0x008 gp (r1) + * 0x010 caller's unat + * 0x018 fpsr + * 0x020 r4 + * 0x028 r5 + * 0x030 r6 + * 0x038 r7 + * 0x040 rp (b0) + * 0x048 b1 + * 0x050 b2 + * 0x058 b3 + * 0x060 b4 + * 0x068 b5 + * 0x070 ar.pfs + * 0x078 ar.lc + * 0x080 pr + * 0x088 ar.bsp + * 0x090 ar.unat + * 0x098 &__jmp_buf + * 0x0a0 ar.rsc + * 0x0a8 ar.rnat + * 0x0b0 f2 + * 0x0c0 f3 + * 0x0d0 f4 + * 0x0e0 f5 + * 0x0f0 f16 + * 0x100 f17 + * 0x110 f18 + * 0x120 f19 + * 0x130 f20 + * 0x130 f21 + * 0x140 f22 + * 0x150 f23 + * 0x160 f24 + * 0x170 f25 + * 0x180 f26 + * 0x190 f27 + * 0x1a0 f28 + * 0x1b0 f29 + * 0x1c0 f30 + * 0x1d0 f31 + * + * Note that the address of __jmp_buf is saved but not used: we assume + * that the jmp_buf data structure is never moved around in memory. + */ + +/* + * Implemented according to "IA-64 Software Conventions and Runtime + * Architecture Guide", Chapter 10: "Context Management". + */ + + .text + .psr abi64 + .psr lsb + .lsb + + /* _st_md_cxt_save(__jmp_buf env) */ + .align 32 + .global _st_md_cxt_save + .proc _st_md_cxt_save +_st_md_cxt_save: + alloc r14 = ar.pfs,1,0,0,0 + mov r16 = ar.unat + ;; + mov r17 = ar.fpsr + mov r2 = in0 + add r3 = 8,in0 + ;; + st8.spill.nta [r2] = sp,16 // r12 (sp) + ;; + st8.spill.nta [r3] = gp,16 // r1 (gp) + ;; + st8.nta [r2] = r16,16 // save caller's unat + st8.nta [r3] = r17,16 // save fpsr + add r8 = 0xb0,in0 + ;; + st8.spill.nta [r2] = r4,16 // r4 + ;; + st8.spill.nta [r3] = r5,16 // r5 + add r9 = 0xc0,in0 + ;; + stf.spill.nta [r8] = f2,32 + stf.spill.nta [r9] = f3,32 + mov r15 = rp + ;; + stf.spill.nta [r8] = f4,32 + stf.spill.nta [r9] = f5,32 + mov r17 = b1 + ;; + stf.spill.nta [r8] = f16,32 + stf.spill.nta [r9] = f17,32 + mov r18 = b2 + ;; + stf.spill.nta [r8] = f18,32 + stf.spill.nta [r9] = f19,32 + mov r19 = b3 + ;; + stf.spill.nta [r8] = f20,32 + stf.spill.nta [r9] = f21,32 + mov r20 = b4 + ;; + stf.spill.nta [r8] = f22,32 + stf.spill.nta [r9] = f23,32 + mov r21 = b5 + ;; + stf.spill.nta [r8] = f24,32 + stf.spill.nta [r9] = f25,32 + mov r22 = ar.lc + ;; + stf.spill.nta [r8] = f26,32 + stf.spill.nta [r9] = f27,32 + mov r24 = pr + ;; + stf.spill.nta [r8] = f28,32 + stf.spill.nta [r9] = f29,32 + ;; + stf.spill.nta [r8] = f30 + stf.spill.nta [r9] = f31 + + st8.spill.nta [r2] = r6,16 // r6 + ;; + st8.spill.nta [r3] = r7,16 // r7 + ;; + mov r23 = ar.bsp + mov r25 = ar.unat + + st8.nta [r2] = r15,16 // b0 + st8.nta [r3] = r17,16 // b1 + ;; + st8.nta [r2] = r18,16 // b2 + st8.nta [r3] = r19,16 // b3 + mov r26 = ar.rsc + ;; + st8.nta [r2] = r20,16 // b4 + st8.nta [r3] = r21,16 // b5 + ;; + st8.nta [r2] = r14,16 // ar.pfs + st8.nta [r3] = r22,16 // ar.lc + ;; + st8.nta [r2] = r24,16 // pr + st8.nta [r3] = r23,16 // ar.bsp + ;; + st8.nta [r2] = r25,16 // ar.unat + st8.nta [r3] = in0,16 // &__jmp_buf (just in case) + ;; + st8.nta [r2] = r26 // ar.rsc + ;; + flushrs // flush dirty regs to backing store + ;; + and r27 = ~0x3,r26 // clear ar.rsc.mode + ;; + mov ar.rsc = r27 // put RSE in enforced lazy mode + ;; + mov r28 = ar.rnat + ;; + st8.nta [r3] = r28 // ar.rnat + mov ar.rsc = r26 // restore ar.rsc + ;; + mov r8 = 0 + br.ret.sptk.few b0 + .endp _st_md_cxt_save + + +/****************************************************************/ + + /* _st_md_cxt_restore(__jmp_buf env, int val) */ + .global _st_md_cxt_restore + .proc _st_md_cxt_restore +_st_md_cxt_restore: + alloc r8 = ar.pfs,2,0,0,0 + add r2 = 0x88,in0 // r2 <- &jmpbuf.ar_bsp + mov r16 = ar.rsc + ;; + flushrs // flush dirty regs to backing store + ;; + and r17 = ~0x3,r16 // clear ar.rsc.mode + ;; + mov ar.rsc = r17 // put RSE in enforced lazy mode + ;; + invala // invalidate the ALAT + ;; + ld8 r23 = [r2],8 // r23 <- jmpbuf.ar_bsp + ;; + mov ar.bspstore = r23 // write BSPSTORE + ld8 r25 = [r2],24 // r25 <- jmpbuf.ar_unat + ;; + ld8 r26 = [r2],-8 // r26 <- jmpbuf.ar_rnat + ;; + mov ar.rnat = r26 // write RNAT + ld8 r27 = [r2] // r27 <- jmpbuf.ar_rsc + ;; + mov ar.rsc = r27 // write RSE control + mov r2 = in0 + ;; + mov ar.unat = r25 // write ar.unat + add r3 = 8,in0 + ;; + ld8.fill.nta sp = [r2],16 // r12 (sp) + ld8.fill.nta gp = [r3],16 // r1 (gp) + ;; + ld8.nta r16 = [r2],16 // caller's unat + ld8.nta r17 = [r3],16 // fpsr + ;; + ld8.fill.nta r4 = [r2],16 // r4 + ld8.fill.nta r5 = [r3],16 // r5 + ;; + ld8.fill.nta r6 = [r2],16 // r6 + ld8.fill.nta r7 = [r3],16 // r7 + ;; + mov ar.unat = r16 // restore caller's unat + mov ar.fpsr = r17 // restore fpsr + ;; + ld8.nta r16 = [r2],16 // b0 + ld8.nta r17 = [r3],16 // b1 + ;; + ld8.nta r18 = [r2],16 // b2 + ld8.nta r19 = [r3],16 // b3 + ;; + ld8.nta r20 = [r2],16 // b4 + ld8.nta r21 = [r3],16 // b5 + ;; + ld8.nta r11 = [r2],16 // ar.pfs + ld8.nta r22 = [r3],72 // ar.lc + ;; + ld8.nta r24 = [r2],48 // pr + mov b0 = r16 + ;; + ldf.fill.nta f2 = [r2],32 + ldf.fill.nta f3 = [r3],32 + mov b1 = r17 + ;; + ldf.fill.nta f4 = [r2],32 + ldf.fill.nta f5 = [r3],32 + mov b2 = r18 + ;; + ldf.fill.nta f16 = [r2],32 + ldf.fill.nta f17 = [r3],32 + mov b3 = r19 + ;; + ldf.fill.nta f18 = [r2],32 + ldf.fill.nta f19 = [r3],32 + mov b4 = r20 + ;; + ldf.fill.nta f20 = [r2],32 + ldf.fill.nta f21 = [r3],32 + mov b5 = r21 + ;; + ldf.fill.nta f22 = [r2],32 + ldf.fill.nta f23 = [r3],32 + mov ar.lc = r22 + ;; + ldf.fill.nta f24 = [r2],32 + ldf.fill.nta f25 = [r3],32 + cmp.eq p6,p7 = 0,in1 + ;; + ldf.fill.nta f26 = [r2],32 + ldf.fill.nta f27 = [r3],32 + mov ar.pfs = r11 + ;; + ldf.fill.nta f28 = [r2],32 + ldf.fill.nta f29 = [r3],32 + ;; + ldf.fill.nta f30 = [r2] + ldf.fill.nta f31 = [r3] +(p6) mov r8 = 1 +(p7) mov r8 = in1 + + mov pr = r24,-1 + br.ret.sptk.few b0 + .endp _st_md_cxt_restore + +/****************************************************************/ + +#elif defined(__i386__) + +/* + * Internal __jmp_buf layout + */ +#define JB_BX 0 +#define JB_SI 1 +#define JB_DI 2 +#define JB_BP 3 +#define JB_SP 4 +#define JB_PC 5 + + .file "md.S" + .text + + /* _st_md_cxt_save(__jmp_buf env) */ +.globl _st_md_cxt_save + .type _st_md_cxt_save, @function + .align 16 +_st_md_cxt_save: + movl 4(%esp), %eax + + /* + * Save registers. + */ + movl %ebx, (JB_BX*4)(%eax) + movl %esi, (JB_SI*4)(%eax) + movl %edi, (JB_DI*4)(%eax) + /* Save SP */ + leal 4(%esp), %ecx + movl %ecx, (JB_SP*4)(%eax) + /* Save PC we are returning to */ + movl 0(%esp), %ecx + movl %ecx, (JB_PC*4)(%eax) + /* Save caller frame pointer */ + movl %ebp, (JB_BP*4)(%eax) + xorl %eax, %eax + ret + .size _st_md_cxt_save, .-_st_md_cxt_save + + +/****************************************************************/ + + /* _st_md_cxt_restore(__jmp_buf env, int val) */ +.globl _st_md_cxt_restore + .type _st_md_cxt_restore, @function + .align 16 +_st_md_cxt_restore: + /* First argument is jmp_buf */ + movl 4(%esp), %ecx + /* Second argument is return value */ + movl 8(%esp), %eax + /* Set the return address */ + movl (JB_PC*4)(%ecx), %edx + /* + * Restore registers. + */ + movl (JB_BX*4)(%ecx), %ebx + movl (JB_SI*4)(%ecx), %esi + movl (JB_DI*4)(%ecx), %edi + movl (JB_BP*4)(%ecx), %ebp + movl (JB_SP*4)(%ecx), %esp + testl %eax, %eax + jnz 1f + incl %eax + /* Jump to saved PC */ +1: jmp *%edx + .size _st_md_cxt_restore, .-_st_md_cxt_restore + +/****************************************************************/ + +#elif defined(__amd64__) || defined(__x86_64__) + +/* + * Internal __jmp_buf layout + */ +#define JB_RBX 0 +#define JB_RBP 1 +#define JB_R12 2 +#define JB_R13 3 +#define JB_R14 4 +#define JB_R15 5 +#define JB_RSP 6 +#define JB_PC 7 + + .file "md.S" + .text + + /* _st_md_cxt_save(__jmp_buf env) */ +.globl _st_md_cxt_save + .type _st_md_cxt_save, @function + .align 16 +_st_md_cxt_save: + /* + * Save registers. + */ + movq %rbx, (JB_RBX*8)(%rdi) + movq %rbp, (JB_RBP*8)(%rdi) + movq %r12, (JB_R12*8)(%rdi) + movq %r13, (JB_R13*8)(%rdi) + movq %r14, (JB_R14*8)(%rdi) + movq %r15, (JB_R15*8)(%rdi) + /* Save SP */ + leaq 8(%rsp), %rdx + movq %rdx, (JB_RSP*8)(%rdi) + /* Save PC we are returning to */ + movq (%rsp), %rax + movq %rax, (JB_PC*8)(%rdi) + xorq %rax, %rax + ret + .size _st_md_cxt_save, .-_st_md_cxt_save + + +/****************************************************************/ + + /* _st_md_cxt_restore(__jmp_buf env, int val) */ +.globl _st_md_cxt_restore + .type _st_md_cxt_restore, @function + .align 16 +_st_md_cxt_restore: + /* + * Restore registers. + */ + movq (JB_RBX*8)(%rdi), %rbx + movq (JB_RBP*8)(%rdi), %rbp + movq (JB_R12*8)(%rdi), %r12 + movq (JB_R13*8)(%rdi), %r13 + movq (JB_R14*8)(%rdi), %r14 + movq (JB_R15*8)(%rdi), %r15 + /* Set return value */ + test %esi, %esi + mov $01, %eax + cmove %eax, %esi + mov %esi, %eax + movq (JB_PC*8)(%rdi), %rdx + movq (JB_RSP*8)(%rdi), %rsp + /* Jump to saved PC */ + jmpq *%rdx + .size _st_md_cxt_restore, .-_st_md_cxt_restore + +/****************************************************************/ + +#endif + diff --git a/trunk/research/st-1.9/md.h b/trunk/research/st-1.9/md.h new file mode 100644 index 000000000..5bf795f24 --- /dev/null +++ b/trunk/research/st-1.9/md.h @@ -0,0 +1,627 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#ifndef __ST_MD_H__ +#define __ST_MD_H__ + +#if defined(ETIMEDOUT) && !defined(ETIME) +#define ETIME ETIMEDOUT +#endif + +#if defined(MAP_ANONYMOUS) && !defined(MAP_ANON) +#define MAP_ANON MAP_ANONYMOUS +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED -1 +#endif + +/***************************************** + * Platform specifics + */ + +#if defined (AIX) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_SYSV_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#ifndef MD_HAVE_SOCKLEN_T +#define MD_HAVE_SOCKLEN_T +#define socklen_t unsigned long +#endif + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + (_thread)->context[3] = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + timebasestruct_t rt; \ + (void) read_real_time(&rt, TIMEBASE_SZ); \ + (void) time_base_to_time(&rt, TIMEBASE_SZ); \ + return (rt.tb_high * 1000000LL + rt.tb_low / 1000) + +#elif defined (CYGWIN) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_NOT_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) setjmp(env) +#define MD_LONGJMP(env, val) longjmp(env, val) + +#define MD_JB_SP 7 + +#define MD_GET_SP(_t) (_t)->context[MD_JB_SP] + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + MD_GET_SP(_thread) = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (DARWIN) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT +#define MD_HAVE_SOCKLEN_T + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#if defined(__ppc__) +#define MD_JB_SP 0 +#elif defined(__i386__) +#define MD_JB_SP 9 +#elif defined(__x86_64__) +#define MD_JB_SP 4 +#else +#error Unknown CPU architecture +#endif + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + *((long *)&((_thread)->context[MD_JB_SP])) = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (FREEBSD) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#if defined(__i386__) +#define MD_JB_SP 2 +#elif defined(__alpha__) +#define MD_JB_SP 34 +#elif defined(__amd64__) +#define MD_JB_SP 2 +#else +#error Unknown CPU architecture +#endif + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + (_thread)->context[0]._jb[MD_JB_SP] = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (HPUX) + +#define MD_STACK_GROWS_UP +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#ifndef __LP64__ +/* 32-bit mode (ILP32 data model) */ +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + ((long *)((_thread)->context))[1] = (long) (_sp); \ + ST_END_MACRO +#else +/* 64-bit mode (LP64 data model) */ +#define MD_STACK_PAD_SIZE 256 +/* Last stack frame must be preserved */ +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + memcpy((char *)(_sp) - MD_STACK_PAD_SIZE, \ + ((char **)((_thread)->context))[1] - MD_STACK_PAD_SIZE, \ + MD_STACK_PAD_SIZE); \ + ((long *)((_thread)->context))[1] = (long) (_sp); \ + ST_END_MACRO +#endif /* !__LP64__ */ + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (IRIX) + +#include + +#define MD_STACK_GROWS_DOWN +#define MD_USE_SYSV_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) setjmp(env) +#define MD_LONGJMP(env, val) longjmp(env, val) + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + (void) MD_SETJMP((_thread)->context); \ + (_thread)->context[JB_SP] = (long) (_sp); \ + (_thread)->context[JB_PC] = (long) _main; \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + static int inited = 0; \ + static clockid_t clock_id = CLOCK_SGI_CYCLE; \ + struct timespec ts; \ + if (!inited) { \ + if (syssgi(SGI_CYCLECNTR_SIZE) < 64) \ + clock_id = CLOCK_REALTIME; \ + inited = 1; \ + } \ + (void) clock_gettime(clock_id, &ts); \ + return (ts.tv_sec * 1000000LL + ts.tv_nsec / 1000) + +/* + * Cap the stack by zeroing out the saved return address register + * value. This allows libexc, used by SpeedShop, to know when to stop + * backtracing since it won't find main, start, or any other known + * stack root function in a state thread's stack. Without this libexc + * traces right off the stack and crashes. + * The function preamble stores ra at 8(sp), this stores zero there. + * N.B. This macro is compiler/ABI dependent. It must change if ANY more + * automatic variables are added to the _st_thread_main() routine, because + * the address where ra is stored will change. + */ +#if !defined(__GNUC__) && defined(_MIPS_SIM) && _MIPS_SIM != _ABIO32 +#define MD_CAP_STACK(var_addr) \ + (((volatile __uint64_t *)(var_addr))[1] = 0) +#endif + +#elif defined (LINUX) + +/* + * These are properties of the linux kernel and are the same on every + * flavor and architecture. + */ +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_NOT_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT +/* + * Modern GNU/Linux is Posix.1g compliant. + */ +#define MD_HAVE_SOCKLEN_T + +/* + * All architectures and flavors of linux have the gettimeofday + * function but if you know of a faster way, use it. + */ +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#if defined(__ia64__) +#define MD_STACK_GROWS_DOWN + +/* + * IA-64 architecture. Besides traditional memory call stack, IA-64 + * uses general register stack. Thus each thread needs a backing store + * for register stack in addition to memory stack. Standard + * setjmp()/longjmp() cannot be used for thread context switching + * because their implementation implicitly assumes that only one + * register stack exists. + */ +#ifdef USE_LIBC_SETJMP +#undef USE_LIBC_SETJMP +#endif +#define MD_USE_BUILTIN_SETJMP + +#define MD_STACK_PAD_SIZE 128 +/* Last register stack frame must be preserved */ +#define MD_INIT_CONTEXT(_thread, _sp, _bsp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + memcpy((char *)(_bsp) - MD_STACK_PAD_SIZE, \ + (char *)(_thread)->context[0].__jmpbuf[17] - MD_STACK_PAD_SIZE, \ + MD_STACK_PAD_SIZE); \ + (_thread)->context[0].__jmpbuf[0] = (long) (_sp); \ + (_thread)->context[0].__jmpbuf[17] = (long) (_bsp); \ + ST_END_MACRO + +#elif defined(__mips__) +#define MD_STACK_GROWS_DOWN + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + MD_SETJMP((_thread)->context); \ + _thread->context[0].__jmpbuf[0].__pc = (__ptr_t) _main; \ + _thread->context[0].__jmpbuf[0].__sp = _sp; \ + ST_END_MACRO + +#else /* Not IA-64 or mips */ + +/* + * On linux, there are a few styles of jmpbuf format. These vary based + * on architecture/glibc combination. + * + * Most of the glibc based toggles were lifted from: + * mozilla/nsprpub/pr/include/md/_linux.h + */ + +/* + * Starting with glibc 2.4, JB_SP definitions are not public anymore. + * They, however, can still be found in glibc source tree in + * architecture-specific "jmpbuf-offsets.h" files. + * Most importantly, the content of jmp_buf is mangled by setjmp to make + * it completely opaque (the mangling can be disabled by setting the + * LD_POINTER_GUARD environment variable before application execution). + * Therefore we will use built-in _st_md_cxt_save/_st_md_cxt_restore + * functions as a setjmp/longjmp replacement wherever they are available + * unless USE_LIBC_SETJMP is defined. + */ + +#if defined(__powerpc__) +#define MD_STACK_GROWS_DOWN + +#if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1) +#ifndef JB_GPR1 +#define JB_GPR1 0 +#endif +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[JB_GPR1] +#else +/* not an error but certainly cause for caution */ +#error "Untested use of old glibc on powerpc" +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__misc[0] +#endif /* glibc 2.1 or later */ + +#elif defined(__alpha) +#define MD_STACK_GROWS_DOWN + +#if defined(__GLIBC__) && __GLIBC__ >= 2 +#ifndef JB_SP +#define JB_SP 8 +#endif +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[JB_SP] +#else +/* not an error but certainly cause for caution */ +#error "Untested use of old glibc on alpha" +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__sp +#endif + +#elif defined(__mc68000__) +#define MD_STACK_GROWS_DOWN + +/* m68k still uses old style sigjmp_buf */ +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__sp + +#elif defined(__sparc__) +#define MD_STACK_GROWS_DOWN + +#if defined(__GLIBC__) && __GLIBC__ >= 2 +#ifndef JB_SP +#define JB_SP 0 +#endif +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[JB_SP] +#else +/* not an error but certainly cause for caution */ +#error "Untested use of old glic on sparc -- also using odd mozilla derived __fp" +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__fp +#endif + +#elif defined(__i386__) +#define MD_STACK_GROWS_DOWN +#define MD_USE_BUILTIN_SETJMP + +#if defined(__GLIBC__) && __GLIBC__ >= 2 +#ifndef JB_SP +#define JB_SP 4 +#endif +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[JB_SP] +#else +/* not an error but certainly cause for caution */ +#error "Untested use of old glibc on i386" +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__sp +#endif + +#elif defined(__amd64__) || defined(__x86_64__) +#define MD_STACK_GROWS_DOWN +#define MD_USE_BUILTIN_SETJMP + +#ifndef JB_RSP +#define JB_RSP 6 +#endif +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[JB_RSP] + +#elif defined(__arm__) +#define MD_STACK_GROWS_DOWN + +#if defined(__GLIBC__) && __GLIBC__ >= 2 +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[20] +#else +#error "ARM/Linux pre-glibc2 not supported yet" +#endif /* defined(__GLIBC__) && __GLIBC__ >= 2 */ + +#elif defined(__s390__) +#define MD_STACK_GROWS_DOWN + +/* There is no JB_SP in glibc at this time. (glibc 2.2.5) + */ +#define MD_GET_SP(_t) (_t)->context[0].__jmpbuf[0].__gregs[9] + +#elif defined(__hppa__) +#define MD_STACK_GROWS_UP + +/* yes, this is gross, unfortunately at the moment (2002/08/01) there is + * a bug in hppa's glibc header definition for JB_SP, so we can't + * use that... + */ +#define MD_GET_SP(_t) (*(long *)(((char *)&(_t)->context[0].__jmpbuf[0]) + 76)) + +#else +#error "Unknown CPU architecture" +#endif /* Cases with common MD_INIT_CONTEXT and different SP locations */ + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + MD_GET_SP(_thread) = (long) (_sp); \ + ST_END_MACRO + +#endif /* Cases with different MD_INIT_CONTEXT */ + +#if defined(MD_USE_BUILTIN_SETJMP) && !defined(USE_LIBC_SETJMP) +#define MD_SETJMP(env) _st_md_cxt_save(env) +#define MD_LONGJMP(env, val) _st_md_cxt_restore(env, val) + +extern int _st_md_cxt_save(jmp_buf env); +extern void _st_md_cxt_restore(jmp_buf env, int val); +#else +#define MD_SETJMP(env) setjmp(env) +#define MD_LONGJMP(env, val) longjmp(env, val) +#endif + +#elif defined (NETBSD) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT +#define MD_HAVE_SOCKLEN_T + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#if defined(__i386__) +#define MD_JB_SP 2 +#elif defined(__alpha__) +#define MD_JB_SP 34 +#elif defined(__sparc__) +#define MD_JB_SP 0 +#elif defined(__vax__) +#define MD_JB_SP 2 +#else +#error Unknown CPU architecture +#endif + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + (_thread)->context[MD_JB_SP] = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (OPENBSD) + +#define MD_STACK_GROWS_DOWN +#define MD_USE_BSD_ANON_MMAP +#define MD_ACCEPT_NB_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#if defined(__i386__) +#define MD_JB_SP 2 +#elif defined(__alpha__) +#define MD_JB_SP 34 +#elif defined(__sparc__) +#define MD_JB_SP 0 +#elif defined(__amd64__) +#define MD_JB_SP 6 +#else +#error Unknown CPU architecture +#endif + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + (_thread)->context[MD_JB_SP] = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (OSF1) + +#include + +#define MD_STACK_GROWS_DOWN +#define MD_USE_SYSV_ANON_MMAP +#define MD_ACCEPT_NB_NOT_INHERITED +#define MD_ALWAYS_UNSERIALIZED_ACCEPT + +#define MD_SETJMP(env) _setjmp(env) +#define MD_LONGJMP(env, val) _longjmp(env, val) + +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + ((struct sigcontext *)((_thread)->context))->sc_sp = (long) (_sp); \ + ST_END_MACRO + +#define MD_GET_UTIME() \ + struct timeval tv; \ + (void) gettimeofday(&tv, NULL); \ + return (tv.tv_sec * 1000000LL + tv.tv_usec) + +#elif defined (SOLARIS) + +#include +extern int getpagesize(void); + +#define MD_STACK_GROWS_DOWN +#define MD_USE_SYSV_ANON_MMAP +#define MD_ACCEPT_NB_NOT_INHERITED + +#define MD_SETJMP(env) setjmp(env) +#define MD_LONGJMP(env, val) longjmp(env, val) + +#if defined(sparc) || defined(__sparc) +#ifdef _LP64 +#define MD_STACK_PAD_SIZE 4095 +#endif +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + (void) MD_SETJMP((_thread)->context); \ + (_thread)->context[1] = (long) (_sp); \ + (_thread)->context[2] = (long) _main; \ + ST_END_MACRO +#elif defined(i386) || defined(__i386) +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + (void) MD_SETJMP((_thread)->context); \ + (_thread)->context[4] = (long) (_sp); \ + (_thread)->context[5] = (long) _main; \ + ST_END_MACRO +#elif defined(__amd64__) +#define MD_INIT_CONTEXT(_thread, _sp, _main) \ + ST_BEGIN_MACRO \ + if (MD_SETJMP((_thread)->context)) \ + _main(); \ + (_thread)->context[6] = (long) (_sp); \ + ST_END_MACRO +#else +#error Unknown CPU architecture +#endif + +#define MD_GET_UTIME() \ + return (gethrtime() / 1000) + +#else +#error Unknown OS +#endif /* OS */ + +#if !defined(MD_HAVE_POLL) && !defined(MD_DONT_HAVE_POLL) +#define MD_HAVE_POLL +#endif + +#ifndef MD_STACK_PAD_SIZE +#define MD_STACK_PAD_SIZE 128 +#endif + +#if !defined(MD_HAVE_SOCKLEN_T) && !defined(socklen_t) +#define socklen_t int +#endif + +#ifndef MD_CAP_STACK +#define MD_CAP_STACK(var_addr) +#endif + +#endif /* !__ST_MD_H__ */ + diff --git a/trunk/research/st-1.9/osguess.sh b/trunk/research/st-1.9/osguess.sh new file mode 100644 index 000000000..531681efe --- /dev/null +++ b/trunk/research/st-1.9/osguess.sh @@ -0,0 +1,45 @@ +# +# This script can be used to automatically guess target OS. +# It requires the config.guess utility which is a part of GNU Autoconf. +# GNU Autoconf can be downloaded from ftp://ftp.gnu.org/gnu/autoconf/ +# +# Use "default" as a make target for automatic builds. +# + + +# Specify path to the config.guess utility (unless set via environment) +#CONFIG_GUESS_PATH= + + +if [ x"$CONFIG_GUESS_PATH" = x ]; then + echo "Error: CONFIG_GUESS_PATH variable is not set" + exit 1 +fi + +if [ ! -f "$CONFIG_GUESS_PATH/config.guess" ]; then + echo "Can't find $CONFIG_GUESS_PATH/config.guess utility. Wrong path?" + exit 1 +fi + +sys_info=`/bin/sh $CONFIG_GUESS_PATH/config.guess` + +echo "Building for $sys_info" + +case "$sys_info" in + *-ibm-aix4* ) OS=AIX ;; + *-freebsd* ) OS=FREEBSD ;; + hppa*-hp-hpux11*) OS=HPUX ;; + *-sgi-irix6* ) OS=IRIX ;; + *-linux* ) OS=LINUX ;; + *-netbsd* ) OS=NETBSD ;; + *-openbsd* ) OS=OPENBSD ;; + *-dec-osf* ) OS=OSF1 ;; + *-solaris2* ) OS=SOLARIS ;; + *-darwin* ) OS=DARWIN ;; + * ) OS= + echo "Sorry, unsupported OS" + exit 1 ;; +esac + +echo "Making with OS=$OS" + diff --git a/trunk/research/st-1.9/public.h b/trunk/research/st-1.9/public.h new file mode 100644 index 000000000..e0cc58dc4 --- /dev/null +++ b/trunk/research/st-1.9/public.h @@ -0,0 +1,184 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#ifndef __ST_THREAD_H__ +#define __ST_THREAD_H__ + +#include +#include +#include +#include +#include +#include +#include + +#define ST_VERSION "1.9" +#define ST_VERSION_MAJOR 1 +#define ST_VERSION_MINOR 9 + +/* Undefine this to remove the context switch callback feature. */ +#define ST_SWITCH_CB + +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif + +#ifndef ST_UTIME_NO_TIMEOUT +#define ST_UTIME_NO_TIMEOUT ((st_utime_t) -1LL) +#endif + +#ifndef ST_UTIME_NO_WAIT +#define ST_UTIME_NO_WAIT 0 +#endif + +#define ST_EVENTSYS_DEFAULT 0 +#define ST_EVENTSYS_SELECT 1 +#define ST_EVENTSYS_POLL 2 +#define ST_EVENTSYS_ALT 3 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned long long st_utime_t; +typedef struct _st_thread * st_thread_t; +typedef struct _st_cond * st_cond_t; +typedef struct _st_mutex * st_mutex_t; +typedef struct _st_netfd * st_netfd_t; +#ifdef ST_SWITCH_CB +typedef void (*st_switch_cb_t)(void); +#endif + +extern int st_init(void); +extern int st_getfdlimit(void); + +extern int st_set_eventsys(int eventsys); +extern int st_get_eventsys(void); +extern const char *st_get_eventsys_name(void); + +#ifdef ST_SWITCH_CB +extern st_switch_cb_t st_set_switch_in_cb(st_switch_cb_t cb); +extern st_switch_cb_t st_set_switch_out_cb(st_switch_cb_t cb); +#endif + +extern st_thread_t st_thread_self(void); +extern void st_thread_exit(void *retval); +extern int st_thread_join(st_thread_t thread, void **retvalp); +extern void st_thread_interrupt(st_thread_t thread); +extern st_thread_t st_thread_create(void *(*start)(void *arg), void *arg, + int joinable, int stack_size); +extern int st_randomize_stacks(int on); +extern int st_set_utime_function(st_utime_t (*func)(void)); + +extern st_utime_t st_utime(void); +extern st_utime_t st_utime_last_clock(void); +extern int st_timecache_set(int on); +extern time_t st_time(void); +extern int st_usleep(st_utime_t usecs); +extern int st_sleep(int secs); +extern st_cond_t st_cond_new(void); +extern int st_cond_destroy(st_cond_t cvar); +extern int st_cond_timedwait(st_cond_t cvar, st_utime_t timeout); +extern int st_cond_wait(st_cond_t cvar); +extern int st_cond_signal(st_cond_t cvar); +extern int st_cond_broadcast(st_cond_t cvar); +extern st_mutex_t st_mutex_new(void); +extern int st_mutex_destroy(st_mutex_t lock); +extern int st_mutex_lock(st_mutex_t lock); +extern int st_mutex_unlock(st_mutex_t lock); +extern int st_mutex_trylock(st_mutex_t lock); + +extern int st_key_create(int *keyp, void (*destructor)(void *)); +extern int st_key_getlimit(void); +extern int st_thread_setspecific(int key, void *value); +extern void *st_thread_getspecific(int key); + +extern st_netfd_t st_netfd_open(int osfd); +extern st_netfd_t st_netfd_open_socket(int osfd); +extern void st_netfd_free(st_netfd_t fd); +extern int st_netfd_close(st_netfd_t fd); +extern int st_netfd_fileno(st_netfd_t fd); +extern void st_netfd_setspecific(st_netfd_t fd, void *value, + void (*destructor)(void *)); +extern void *st_netfd_getspecific(st_netfd_t fd); +extern int st_netfd_serialize_accept(st_netfd_t fd); +extern int st_netfd_poll(st_netfd_t fd, int how, st_utime_t timeout); + +extern int st_poll(struct pollfd *pds, int npds, st_utime_t timeout); +extern st_netfd_t st_accept(st_netfd_t fd, struct sockaddr *addr, int *addrlen, + st_utime_t timeout); +extern int st_connect(st_netfd_t fd, const struct sockaddr *addr, int addrlen, + st_utime_t timeout); +extern ssize_t st_read(st_netfd_t fd, void *buf, size_t nbyte, + st_utime_t timeout); +extern ssize_t st_read_fully(st_netfd_t fd, void *buf, size_t nbyte, + st_utime_t timeout); +extern int st_read_resid(st_netfd_t fd, void *buf, size_t *resid, + st_utime_t timeout); +extern ssize_t st_readv(st_netfd_t fd, const struct iovec *iov, int iov_size, + st_utime_t timeout); +extern int st_readv_resid(st_netfd_t fd, struct iovec **iov, int *iov_size, + st_utime_t timeout); +extern ssize_t st_write(st_netfd_t fd, const void *buf, size_t nbyte, + st_utime_t timeout); +extern int st_write_resid(st_netfd_t fd, const void *buf, size_t *resid, + st_utime_t timeout); +extern ssize_t st_writev(st_netfd_t fd, const struct iovec *iov, int iov_size, + st_utime_t timeout); +extern int st_writev_resid(st_netfd_t fd, struct iovec **iov, int *iov_size, + st_utime_t timeout); +extern int st_recvfrom(st_netfd_t fd, void *buf, int len, + struct sockaddr *from, int *fromlen, + st_utime_t timeout); +extern int st_sendto(st_netfd_t fd, const void *msg, int len, + const struct sockaddr *to, int tolen, st_utime_t timeout); +extern int st_recvmsg(st_netfd_t fd, struct msghdr *msg, int flags, + st_utime_t timeout); +extern int st_sendmsg(st_netfd_t fd, const struct msghdr *msg, int flags, + st_utime_t timeout); +extern st_netfd_t st_open(const char *path, int oflags, mode_t mode); + +#ifdef DEBUG +extern void _st_show_thread_stack(st_thread_t thread, const char *messg); +extern void _st_iterate_threads(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !__ST_THREAD_H__ */ + diff --git a/trunk/research/st-1.9/sched.c b/trunk/research/st-1.9/sched.c new file mode 100644 index 000000000..d9c393ca1 --- /dev/null +++ b/trunk/research/st-1.9/sched.c @@ -0,0 +1,672 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#include +#include +#include +#include +#include +#include +#include "common.h" + + +/* Global data */ +_st_vp_t _st_this_vp; /* This VP */ +_st_thread_t *_st_this_thread; /* Current thread */ +int _st_active_count = 0; /* Active thread count */ + +time_t _st_curr_time = 0; /* Current time as returned by time(2) */ +st_utime_t _st_last_tset; /* Last time it was fetched */ + + +int st_poll(struct pollfd *pds, int npds, st_utime_t timeout) +{ + struct pollfd *pd; + struct pollfd *epd = pds + npds; + _st_pollq_t pq; + _st_thread_t *me = _ST_CURRENT_THREAD(); + int n; + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + if ((*_st_eventsys->pollset_add)(pds, npds) < 0) + return -1; + + pq.pds = pds; + pq.npds = npds; + pq.thread = me; + pq.on_ioq = 1; + _ST_ADD_IOQ(pq); + if (timeout != ST_UTIME_NO_TIMEOUT) + _ST_ADD_SLEEPQ(me, timeout); + me->state = _ST_ST_IO_WAIT; + + _ST_SWITCH_CONTEXT(me); + + n = 0; + if (pq.on_ioq) { + /* If we timed out, the pollq might still be on the ioq. Remove it */ + _ST_DEL_IOQ(pq); + (*_st_eventsys->pollset_del)(pds, npds); + } else { + /* Count the number of ready descriptors */ + for (pd = pds; pd < epd; pd++) { + if (pd->revents) + n++; + } + } + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + return n; +} + + +void _st_vp_schedule(void) +{ + _st_thread_t *thread; + + if (_ST_RUNQ.next != &_ST_RUNQ) { + /* Pull thread off of the run queue */ + thread = _ST_THREAD_PTR(_ST_RUNQ.next); + _ST_DEL_RUNQ(thread); + } else { + /* If there are no threads to run, switch to the idle thread */ + thread = _st_this_vp.idle_thread; + } + ST_ASSERT(thread->state == _ST_ST_RUNNABLE); + + /* Resume the thread */ + thread->state = _ST_ST_RUNNING; + _ST_RESTORE_CONTEXT(thread); +} + + +/* + * Initialize this Virtual Processor + */ +int st_init(void) +{ + _st_thread_t *thread; + + if (_st_active_count) { + /* Already initialized */ + return 0; + } + + /* We can ignore return value here */ + st_set_eventsys(ST_EVENTSYS_DEFAULT); + + if (_st_io_init() < 0) + return -1; + + memset(&_st_this_vp, 0, sizeof(_st_vp_t)); + + ST_INIT_CLIST(&_ST_RUNQ); + ST_INIT_CLIST(&_ST_IOQ); + ST_INIT_CLIST(&_ST_ZOMBIEQ); +#ifdef DEBUG + ST_INIT_CLIST(&_ST_THREADQ); +#endif + + if ((*_st_eventsys->init)() < 0) + return -1; + + _st_this_vp.pagesize = getpagesize(); + _st_this_vp.last_clock = st_utime(); + + /* + * Create idle thread + */ + _st_this_vp.idle_thread = st_thread_create(_st_idle_thread_start, + NULL, 0, 0); + if (!_st_this_vp.idle_thread) + return -1; + _st_this_vp.idle_thread->flags = _ST_FL_IDLE_THREAD; + _st_active_count--; + _ST_DEL_RUNQ(_st_this_vp.idle_thread); + + /* + * Initialize primordial thread + */ + thread = (_st_thread_t *) calloc(1, sizeof(_st_thread_t) + + (ST_KEYS_MAX * sizeof(void *))); + if (!thread) + return -1; + thread->private_data = (void **) (thread + 1); + thread->state = _ST_ST_RUNNING; + thread->flags = _ST_FL_PRIMORDIAL; + _ST_SET_CURRENT_THREAD(thread); + _st_active_count++; +#ifdef DEBUG + _ST_ADD_THREADQ(thread); +#endif + + return 0; +} + + +#ifdef ST_SWITCH_CB +st_switch_cb_t st_set_switch_in_cb(st_switch_cb_t cb) +{ + st_switch_cb_t ocb = _st_this_vp.switch_in_cb; + _st_this_vp.switch_in_cb = cb; + return ocb; +} + +st_switch_cb_t st_set_switch_out_cb(st_switch_cb_t cb) +{ + st_switch_cb_t ocb = _st_this_vp.switch_out_cb; + _st_this_vp.switch_out_cb = cb; + return ocb; +} +#endif + + +/* + * Start function for the idle thread + */ +/* ARGSUSED */ +void *_st_idle_thread_start(void *arg) +{ + _st_thread_t *me = _ST_CURRENT_THREAD(); + + while (_st_active_count > 0) { + /* Idle vp till I/O is ready or the smallest timeout expired */ + _ST_VP_IDLE(); + + /* Check sleep queue for expired threads */ + _st_vp_check_clock(); + + me->state = _ST_ST_RUNNABLE; + _ST_SWITCH_CONTEXT(me); + } + + /* No more threads */ + exit(0); + + /* NOTREACHED */ + return NULL; +} + + +void st_thread_exit(void *retval) +{ + _st_thread_t *thread = _ST_CURRENT_THREAD(); + + thread->retval = retval; + _st_thread_cleanup(thread); + _st_active_count--; + if (thread->term) { + /* Put thread on the zombie queue */ + thread->state = _ST_ST_ZOMBIE; + _ST_ADD_ZOMBIEQ(thread); + + /* Notify on our termination condition variable */ + st_cond_signal(thread->term); + + /* Switch context and come back later */ + _ST_SWITCH_CONTEXT(thread); + + /* Continue the cleanup */ + st_cond_destroy(thread->term); + thread->term = NULL; + } + +#ifdef DEBUG + _ST_DEL_THREADQ(thread); +#endif + + if (!(thread->flags & _ST_FL_PRIMORDIAL)) + _st_stack_free(thread->stack); + + /* Find another thread to run */ + _ST_SWITCH_CONTEXT(thread); + /* Not going to land here */ +} + + +int st_thread_join(_st_thread_t *thread, void **retvalp) +{ + _st_cond_t *term = thread->term; + + /* Can't join a non-joinable thread */ + if (term == NULL) { + errno = EINVAL; + return -1; + } + if (_ST_CURRENT_THREAD() == thread) { + errno = EDEADLK; + return -1; + } + + /* Multiple threads can't wait on the same joinable thread */ + if (term->wait_q.next != &term->wait_q) { + errno = EINVAL; + return -1; + } + + while (thread->state != _ST_ST_ZOMBIE) { + if (st_cond_timedwait(term, ST_UTIME_NO_TIMEOUT) != 0) + return -1; + } + + if (retvalp) + *retvalp = thread->retval; + + /* + * Remove target thread from the zombie queue and make it runnable. + * When it gets scheduled later, it will do the clean up. + */ + thread->state = _ST_ST_RUNNABLE; + _ST_DEL_ZOMBIEQ(thread); + _ST_ADD_RUNQ(thread); + + return 0; +} + + +void _st_thread_main(void) +{ + _st_thread_t *thread = _ST_CURRENT_THREAD(); + + /* + * Cap the stack by zeroing out the saved return address register + * value. This allows some debugging/profiling tools to know when + * to stop unwinding the stack. It's a no-op on most platforms. + */ + MD_CAP_STACK(&thread); + + /* Run thread main */ + thread->retval = (*thread->start)(thread->arg); + + /* All done, time to go away */ + st_thread_exit(thread->retval); +} + + +/* + * Insert "thread" into the timeout heap, in the position + * specified by thread->heap_index. See docs/timeout_heap.txt + * for details about the timeout heap. + */ +static _st_thread_t **heap_insert(_st_thread_t *thread) { + int target = thread->heap_index; + int s = target; + _st_thread_t **p = &_ST_SLEEPQ; + int bits = 0; + int bit; + int index = 1; + + while (s) { + s >>= 1; + bits++; + } + for (bit = bits - 2; bit >= 0; bit--) { + if (thread->due < (*p)->due) { + _st_thread_t *t = *p; + thread->left = t->left; + thread->right = t->right; + *p = thread; + thread->heap_index = index; + thread = t; + } + index <<= 1; + if (target & (1 << bit)) { + p = &((*p)->right); + index |= 1; + } else { + p = &((*p)->left); + } + } + thread->heap_index = index; + *p = thread; + thread->left = thread->right = NULL; + return p; +} + + +/* + * Delete "thread" from the timeout heap. + */ +static void heap_delete(_st_thread_t *thread) { + _st_thread_t *t, **p; + int bits = 0; + int s, bit; + + /* First find and unlink the last heap element */ + p = &_ST_SLEEPQ; + s = _ST_SLEEPQ_SIZE; + while (s) { + s >>= 1; + bits++; + } + for (bit = bits - 2; bit >= 0; bit--) { + if (_ST_SLEEPQ_SIZE & (1 << bit)) { + p = &((*p)->right); + } else { + p = &((*p)->left); + } + } + t = *p; + *p = NULL; + --_ST_SLEEPQ_SIZE; + if (t != thread) { + /* + * Insert the unlinked last element in place of the element we are deleting + */ + t->heap_index = thread->heap_index; + p = heap_insert(t); + t = *p; + t->left = thread->left; + t->right = thread->right; + + /* + * Reestablish the heap invariant. + */ + for (;;) { + _st_thread_t *y; /* The younger child */ + int index_tmp; + if (t->left == NULL) + break; + else if (t->right == NULL) + y = t->left; + else if (t->left->due < t->right->due) + y = t->left; + else + y = t->right; + if (t->due > y->due) { + _st_thread_t *tl = y->left; + _st_thread_t *tr = y->right; + *p = y; + if (y == t->left) { + y->left = t; + y->right = t->right; + p = &y->left; + } else { + y->left = t->left; + y->right = t; + p = &y->right; + } + t->left = tl; + t->right = tr; + index_tmp = t->heap_index; + t->heap_index = y->heap_index; + y->heap_index = index_tmp; + } else { + break; + } + } + } + thread->left = thread->right = NULL; +} + + +void _st_add_sleep_q(_st_thread_t *thread, st_utime_t timeout) +{ + thread->due = _ST_LAST_CLOCK + timeout; + thread->flags |= _ST_FL_ON_SLEEPQ; + thread->heap_index = ++_ST_SLEEPQ_SIZE; + heap_insert(thread); +} + + +void _st_del_sleep_q(_st_thread_t *thread) +{ + heap_delete(thread); + thread->flags &= ~_ST_FL_ON_SLEEPQ; +} + + +void _st_vp_check_clock(void) +{ + _st_thread_t *thread; + st_utime_t elapsed, now; + + now = st_utime(); + elapsed = now - _ST_LAST_CLOCK; + _ST_LAST_CLOCK = now; + + if (_st_curr_time && now - _st_last_tset > 999000) { + _st_curr_time = time(NULL); + _st_last_tset = now; + } + + while (_ST_SLEEPQ != NULL) { + thread = _ST_SLEEPQ; + ST_ASSERT(thread->flags & _ST_FL_ON_SLEEPQ); + if (thread->due > now) + break; + _ST_DEL_SLEEPQ(thread); + + /* If thread is waiting on condition variable, set the time out flag */ + if (thread->state == _ST_ST_COND_WAIT) + thread->flags |= _ST_FL_TIMEDOUT; + + /* Make thread runnable */ + ST_ASSERT(!(thread->flags & _ST_FL_IDLE_THREAD)); + thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(thread); + } +} + + +void st_thread_interrupt(_st_thread_t *thread) +{ + /* If thread is already dead */ + if (thread->state == _ST_ST_ZOMBIE) + return; + + thread->flags |= _ST_FL_INTERRUPT; + + if (thread->state == _ST_ST_RUNNING || thread->state == _ST_ST_RUNNABLE) + return; + + if (thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(thread); + + /* Make thread runnable */ + thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(thread); +} + + +_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, + int joinable, int stk_size) +{ + _st_thread_t *thread; + _st_stack_t *stack; + void **ptds; + char *sp; +#ifdef __ia64__ + char *bsp; +#endif + + /* Adjust stack size */ + if (stk_size == 0) + stk_size = ST_DEFAULT_STACK_SIZE; + stk_size = ((stk_size + _ST_PAGE_SIZE - 1) / _ST_PAGE_SIZE) * _ST_PAGE_SIZE; + stack = _st_stack_new(stk_size); + if (!stack) + return NULL; + + /* Allocate thread object and per-thread data off the stack */ +#if defined (MD_STACK_GROWS_DOWN) + sp = stack->stk_top; +#ifdef __ia64__ + /* + * The stack segment is split in the middle. The upper half is used + * as backing store for the register stack which grows upward. + * The lower half is used for the traditional memory stack which + * grows downward. Both stacks start in the middle and grow outward + * from each other. + */ + sp -= (stk_size >> 1); + bsp = sp; + /* Make register stack 64-byte aligned */ + if ((unsigned long)bsp & 0x3f) + bsp = bsp + (0x40 - ((unsigned long)bsp & 0x3f)); + stack->bsp = bsp + _ST_STACK_PAD_SIZE; +#endif + sp = sp - (ST_KEYS_MAX * sizeof(void *)); + ptds = (void **) sp; + sp = sp - sizeof(_st_thread_t); + thread = (_st_thread_t *) sp; + + /* Make stack 64-byte aligned */ + if ((unsigned long)sp & 0x3f) + sp = sp - ((unsigned long)sp & 0x3f); + stack->sp = sp - _ST_STACK_PAD_SIZE; +#elif defined (MD_STACK_GROWS_UP) + sp = stack->stk_bottom; + thread = (_st_thread_t *) sp; + sp = sp + sizeof(_st_thread_t); + ptds = (void **) sp; + sp = sp + (ST_KEYS_MAX * sizeof(void *)); + + /* Make stack 64-byte aligned */ + if ((unsigned long)sp & 0x3f) + sp = sp + (0x40 - ((unsigned long)sp & 0x3f)); + stack->sp = sp + _ST_STACK_PAD_SIZE; +#else +#error Unknown OS +#endif + + memset(thread, 0, sizeof(_st_thread_t)); + memset(ptds, 0, ST_KEYS_MAX * sizeof(void *)); + + /* Initialize thread */ + thread->private_data = ptds; + thread->stack = stack; + thread->start = start; + thread->arg = arg; + +#ifndef __ia64__ + _ST_INIT_CONTEXT(thread, stack->sp, _st_thread_main); +#else + _ST_INIT_CONTEXT(thread, stack->sp, stack->bsp, _st_thread_main); +#endif + + /* If thread is joinable, allocate a termination condition variable */ + if (joinable) { + thread->term = st_cond_new(); + if (thread->term == NULL) { + _st_stack_free(thread->stack); + return NULL; + } + } + + /* Make thread runnable */ + thread->state = _ST_ST_RUNNABLE; + _st_active_count++; + _ST_ADD_RUNQ(thread); +#ifdef DEBUG + _ST_ADD_THREADQ(thread); +#endif + + return thread; +} + + +_st_thread_t *st_thread_self(void) +{ + return _ST_CURRENT_THREAD(); +} + + +#ifdef DEBUG +/* ARGSUSED */ +void _st_show_thread_stack(_st_thread_t *thread, const char *messg) +{ + +} + +/* To be set from debugger */ +int _st_iterate_threads_flag = 0; + +void _st_iterate_threads(void) +{ + static _st_thread_t *thread = NULL; + static jmp_buf orig_jb, save_jb; + _st_clist_t *q; + + if (!_st_iterate_threads_flag) { + if (thread) { + memcpy(thread->context, save_jb, sizeof(jmp_buf)); + MD_LONGJMP(orig_jb, 1); + } + return; + } + + if (thread) { + memcpy(thread->context, save_jb, sizeof(jmp_buf)); + _st_show_thread_stack(thread, NULL); + } else { + if (MD_SETJMP(orig_jb)) { + _st_iterate_threads_flag = 0; + thread = NULL; + _st_show_thread_stack(thread, "Iteration completed"); + return; + } + thread = _ST_CURRENT_THREAD(); + _st_show_thread_stack(thread, "Iteration started"); + } + + q = thread->tlink.next; + if (q == &_ST_THREADQ) + q = q->next; + ST_ASSERT(q != &_ST_THREADQ); + thread = _ST_THREAD_THREADQ_PTR(q); + if (thread == _ST_CURRENT_THREAD()) + MD_LONGJMP(orig_jb, 1); + memcpy(save_jb, thread->context, sizeof(jmp_buf)); + MD_LONGJMP(thread->context, 1); +} +#endif /* DEBUG */ + diff --git a/trunk/research/st-1.9/st.pc.in b/trunk/research/st-1.9/st.pc.in new file mode 100644 index 000000000..46c39ec52 --- /dev/null +++ b/trunk/research/st-1.9/st.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: libst +Description: State Thread Library +Version: @VERSION@ +Libs: -L${libdir} -lst +Cflags: -I${includedir} diff --git a/trunk/research/st-1.9/st.spec b/trunk/research/st-1.9/st.spec new file mode 100644 index 000000000..4914aa196 --- /dev/null +++ b/trunk/research/st-1.9/st.spec @@ -0,0 +1,79 @@ +Summary: State Threads Library +Name: st +Version: 1.9 +Release: 1 +Copyright: MPL 1.2 or GPL 2+ +Packager: Wesley W. Terpstra +Source: http://prdownloads.sourceforge.net/state-threads/st-%{version}.tar.gz +Prefix: /usr +BuildRoot: /tmp/%{name}-%{version}-build +Group: Development/Libraries + +%description +The State Threads library has an interface similar to POSIX threads. + +However, the threads are actually all run in-process. This type of +threading allows for controlled schedualing points. It is highly useful +for designing robust and extremely scalable internet applications since +there is no resource contention and locking is generally unnecessary. + +It can be combined with traditional threading or multiple process +parallelism to take advantage of multiple processors. + +See: for further +information about how state threads improve performance. + +%package -n libst-devel +Summary: State Threads Library - Development Files +Group: Development/Libraries +Requires: libst1 + +%description -n libst-devel +Development headers and documentation for libst + +%package -n libst1 +Summary: State Threads Library - Shared Libs Major 1 +Group: System/Libraries + +%description -n libst1 +Shared libraries for running applications linked against api version 1. + +%prep +%setup -q + +%build +make CONFIG_GUESS_PATH=/usr/share/automake default-optimized + +%install +if [ -d ${RPM_BUILD_ROOT} ]; then rm -rf ${RPM_BUILD_ROOT}; fi + +mkdir -m 0755 -p ${RPM_BUILD_ROOT}/%{prefix}/lib/pkgconfig +mkdir -m 0755 -p ${RPM_BUILD_ROOT}/%{prefix}/include +mkdir -m 0755 -p ${RPM_BUILD_ROOT}/%{prefix}/share/doc/libst-devel +cp -a obj/libst.* ${RPM_BUILD_ROOT}/%{prefix}/lib +cp -a obj/st.h ${RPM_BUILD_ROOT}/%{prefix}/include +sed "s*@prefix@*%{prefix}*g" ${RPM_BUILD_ROOT}/%{prefix}/lib/pkgconfig/st.pc +cp -a docs/* ${RPM_BUILD_ROOT}/%{prefix}/share/doc/libst-devel/ +cp -a examples ${RPM_BUILD_ROOT}/%{prefix}/share/doc/libst-devel/ + +%post -n libst1 +/sbin/ldconfig %{prefix}/lib + +%files -n libst1 +%defattr(-,root,root) +%{prefix}/lib/lib*.so.* + +%files -n libst-devel +%defattr(-,root,root) +%{prefix}/include/* +%{prefix}/lib/lib*.a +%{prefix}/lib/lib*.so +%{prefix}/lib/pkgconfig/st.pc +%{prefix}/share/doc/libst-devel/* + +%clean +if [ -d ${RPM_BUILD_ROOT} ]; then rm -rf ${RPM_BUILD_ROOT}; fi + +%changelog +* Wed Dec 26 2001 Wesley W. Terpstra +- first rpms for libst-1.3.tar.gz diff --git a/trunk/research/st-1.9/stk.c b/trunk/research/st-1.9/stk.c new file mode 100644 index 000000000..344552eb6 --- /dev/null +++ b/trunk/research/st-1.9/stk.c @@ -0,0 +1,173 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#include +#include +#include +#include +#include +#include "common.h" + + +/* How much space to leave between the stacks, at each end */ +#define REDZONE _ST_PAGE_SIZE + +_st_clist_t _st_free_stacks = ST_INIT_STATIC_CLIST(&_st_free_stacks); +int _st_num_free_stacks = 0; +int _st_randomize_stacks = 0; + +static char *_st_new_stk_segment(int size); + +_st_stack_t *_st_stack_new(int stack_size) +{ + _st_clist_t *qp; + _st_stack_t *ts; + int extra; + + for (qp = _st_free_stacks.next; qp != &_st_free_stacks; qp = qp->next) { + ts = _ST_THREAD_STACK_PTR(qp); + if (ts->stk_size >= stack_size) { + /* Found a stack that is big enough */ + ST_REMOVE_LINK(&ts->links); + _st_num_free_stacks--; + ts->links.next = NULL; + ts->links.prev = NULL; + return ts; + } + } + + /* Make a new thread stack object. */ + if ((ts = (_st_stack_t *)calloc(1, sizeof(_st_stack_t))) == NULL) + return NULL; + extra = _st_randomize_stacks ? _ST_PAGE_SIZE : 0; + ts->vaddr_size = stack_size + 2*REDZONE + extra; + ts->vaddr = _st_new_stk_segment(ts->vaddr_size); + if (!ts->vaddr) { + free(ts); + return NULL; + } + ts->stk_size = stack_size; + ts->stk_bottom = ts->vaddr + REDZONE; + ts->stk_top = ts->stk_bottom + stack_size; + +#ifdef DEBUG + mprotect(ts->vaddr, REDZONE, PROT_NONE); + mprotect(ts->stk_top + extra, REDZONE, PROT_NONE); +#endif + + if (extra) { + long offset = (random() % extra) & ~0xf; + + ts->stk_bottom += offset; + ts->stk_top += offset; + } + + return ts; +} + + +/* + * Free the stack for the current thread + */ +void _st_stack_free(_st_stack_t *ts) +{ + if (!ts) + return; + + /* Put the stack on the free list */ + ST_APPEND_LINK(&ts->links, _st_free_stacks.prev); + _st_num_free_stacks++; +} + + +static char *_st_new_stk_segment(int size) +{ +#ifdef MALLOC_STACK + void *vaddr = malloc(size); +#else + static int zero_fd = -1; + int mmap_flags = MAP_PRIVATE; + void *vaddr; + +#if defined (MD_USE_SYSV_ANON_MMAP) + if (zero_fd < 0) { + if ((zero_fd = open("/dev/zero", O_RDWR, 0)) < 0) + return NULL; + fcntl(zero_fd, F_SETFD, FD_CLOEXEC); + } +#elif defined (MD_USE_BSD_ANON_MMAP) + mmap_flags |= MAP_ANON; +#else +#error Unknown OS +#endif + + vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, mmap_flags, zero_fd, 0); + if (vaddr == (void *)MAP_FAILED) + return NULL; + +#endif /* MALLOC_STACK */ + + return (char *)vaddr; +} + + +/* Not used */ +#if 0 +void _st_delete_stk_segment(char *vaddr, int size) +{ +#ifdef MALLOC_STACK + free(vaddr); +#else + (void) munmap(vaddr, size); +#endif +} +#endif + +int st_randomize_stacks(int on) +{ + int wason = _st_randomize_stacks; + + _st_randomize_stacks = on; + if (on) + srandom((unsigned int) st_utime()); + + return wason; +} diff --git a/trunk/research/st-1.9/sync.c b/trunk/research/st-1.9/sync.c new file mode 100644 index 000000000..a71876c5f --- /dev/null +++ b/trunk/research/st-1.9/sync.c @@ -0,0 +1,369 @@ +/* + * The contents of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is the Netscape Portable Runtime library. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. Portions created by Netscape are + * Copyright (C) 1994-2000 Netscape Communications Corporation. All + * Rights Reserved. + * + * Contributor(s): Silicon Graphics, Inc. + * + * Portions created by SGI are Copyright (C) 2000-2001 Silicon + * Graphics, Inc. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +/* + * This file is derived directly from Netscape Communications Corporation, + * and consists of extensive modifications made during the year(s) 1999-2000. + */ + +#include +#include +#include +#include "common.h" + + +extern time_t _st_curr_time; +extern st_utime_t _st_last_tset; +extern int _st_active_count; + +static st_utime_t (*_st_utime)(void) = NULL; + + +/***************************************** + * Time functions + */ + +st_utime_t st_utime(void) +{ + if (_st_utime == NULL) { +#ifdef MD_GET_UTIME + MD_GET_UTIME(); +#else +#error Unknown OS +#endif + } + + return (*_st_utime)(); +} + + +int st_set_utime_function(st_utime_t (*func)(void)) +{ + if (_st_active_count) { + errno = EINVAL; + return -1; + } + + _st_utime = func; + + return 0; +} + + +st_utime_t st_utime_last_clock(void) +{ + return _ST_LAST_CLOCK; +} + + +int st_timecache_set(int on) +{ + int wason = (_st_curr_time) ? 1 : 0; + + if (on) { + _st_curr_time = time(NULL); + _st_last_tset = st_utime(); + } else + _st_curr_time = 0; + + return wason; +} + + +time_t st_time(void) +{ + if (_st_curr_time) + return _st_curr_time; + + return time(NULL); +} + + +int st_usleep(st_utime_t usecs) +{ + _st_thread_t *me = _ST_CURRENT_THREAD(); + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + if (usecs != ST_UTIME_NO_TIMEOUT) { + me->state = _ST_ST_SLEEPING; + _ST_ADD_SLEEPQ(me, usecs); + } else + me->state = _ST_ST_SUSPENDED; + + _ST_SWITCH_CONTEXT(me); + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + return 0; +} + + +int st_sleep(int secs) +{ + return st_usleep((secs >= 0) ? secs * (st_utime_t) 1000000LL : + ST_UTIME_NO_TIMEOUT); +} + + +/***************************************** + * Condition variable functions + */ + +_st_cond_t *st_cond_new(void) +{ + _st_cond_t *cvar; + + cvar = (_st_cond_t *) calloc(1, sizeof(_st_cond_t)); + if (cvar) { + ST_INIT_CLIST(&cvar->wait_q); + } + + return cvar; +} + + +int st_cond_destroy(_st_cond_t *cvar) +{ + if (cvar->wait_q.next != &cvar->wait_q) { + errno = EBUSY; + return -1; + } + + free(cvar); + + return 0; +} + + +int st_cond_timedwait(_st_cond_t *cvar, st_utime_t timeout) +{ + _st_thread_t *me = _ST_CURRENT_THREAD(); + int rv; + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + /* Put caller thread on the condition variable's wait queue */ + me->state = _ST_ST_COND_WAIT; + ST_APPEND_LINK(&me->wait_links, &cvar->wait_q); + + if (timeout != ST_UTIME_NO_TIMEOUT) + _ST_ADD_SLEEPQ(me, timeout); + + _ST_SWITCH_CONTEXT(me); + + ST_REMOVE_LINK(&me->wait_links); + rv = 0; + + if (me->flags & _ST_FL_TIMEDOUT) { + me->flags &= ~_ST_FL_TIMEDOUT; + errno = ETIME; + rv = -1; + } + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + rv = -1; + } + + return rv; +} + + +int st_cond_wait(_st_cond_t *cvar) +{ + return st_cond_timedwait(cvar, ST_UTIME_NO_TIMEOUT); +} + + +static int _st_cond_signal(_st_cond_t *cvar, int broadcast) +{ + _st_thread_t *thread; + _st_clist_t *q; + + for (q = cvar->wait_q.next; q != &cvar->wait_q; q = q->next) { + thread = _ST_THREAD_WAITQ_PTR(q); + if (thread->state == _ST_ST_COND_WAIT) { + if (thread->flags & _ST_FL_ON_SLEEPQ) + _ST_DEL_SLEEPQ(thread); + + /* Make thread runnable */ + thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(thread); + if (!broadcast) + break; + } + } + + return 0; +} + + +int st_cond_signal(_st_cond_t *cvar) +{ + return _st_cond_signal(cvar, 0); +} + + +int st_cond_broadcast(_st_cond_t *cvar) +{ + return _st_cond_signal(cvar, 1); +} + + +/***************************************** + * Mutex functions + */ + +_st_mutex_t *st_mutex_new(void) +{ + _st_mutex_t *lock; + + lock = (_st_mutex_t *) calloc(1, sizeof(_st_mutex_t)); + if (lock) { + ST_INIT_CLIST(&lock->wait_q); + lock->owner = NULL; + } + + return lock; +} + + +int st_mutex_destroy(_st_mutex_t *lock) +{ + if (lock->owner != NULL || lock->wait_q.next != &lock->wait_q) { + errno = EBUSY; + return -1; + } + + free(lock); + + return 0; +} + + +int st_mutex_lock(_st_mutex_t *lock) +{ + _st_thread_t *me = _ST_CURRENT_THREAD(); + + if (me->flags & _ST_FL_INTERRUPT) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + if (lock->owner == NULL) { + /* Got the mutex */ + lock->owner = me; + return 0; + } + + if (lock->owner == me) { + errno = EDEADLK; + return -1; + } + + /* Put caller thread on the mutex's wait queue */ + me->state = _ST_ST_LOCK_WAIT; + ST_APPEND_LINK(&me->wait_links, &lock->wait_q); + + _ST_SWITCH_CONTEXT(me); + + ST_REMOVE_LINK(&me->wait_links); + + if ((me->flags & _ST_FL_INTERRUPT) && lock->owner != me) { + me->flags &= ~_ST_FL_INTERRUPT; + errno = EINTR; + return -1; + } + + return 0; +} + + +int st_mutex_unlock(_st_mutex_t *lock) +{ + _st_thread_t *thread; + _st_clist_t *q; + + if (lock->owner != _ST_CURRENT_THREAD()) { + errno = EPERM; + return -1; + } + + for (q = lock->wait_q.next; q != &lock->wait_q; q = q->next) { + thread = _ST_THREAD_WAITQ_PTR(q); + if (thread->state == _ST_ST_LOCK_WAIT) { + lock->owner = thread; + /* Make thread runnable */ + thread->state = _ST_ST_RUNNABLE; + _ST_ADD_RUNQ(thread); + return 0; + } + } + + /* No threads waiting on this mutex */ + lock->owner = NULL; + + return 0; +} + + +int st_mutex_trylock(_st_mutex_t *lock) +{ + if (lock->owner != NULL) { + errno = EBUSY; + return -1; + } + + /* Got the mutex */ + lock->owner = _ST_CURRENT_THREAD(); + + return 0; +} +