#TARGET = linux22
#TARGET = solaris
+USE_POLL = 1
+
+ifeq ($(TARGET),linux24e)
+USE_EPOLL = 1
+endif
+
+ifeq ($(TARGET),linux26)
+USE_EPOLL = 1
+endif
+
# pass CPU=<cpu_name> to make to optimize for a particular CPU
CPU = generic
#CPU = i586
TCPSPLICEDIR :=
# This is for standard Linux 2.6 with netfilter and epoll()
-COPTS.linux26 = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL
+COPTS.linux26 = -DNETFILTER
LIBS.linux26 =
# This is for enhanced Linux 2.4 with netfilter and epoll() patch.
# Warning! If kernel is 2.4 with epoll-lt <= 0.21, then you must add
# -DEPOLL_CTL_MOD_WORKAROUND to workaround a very rare bug.
-#COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND
-COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL
+#COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND
+COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL
LIBS.linux24e =
# This is for standard Linux 2.4 with netfilter but without epoll()
-COPTS.linux24 = -DNETFILTER -DENABLE_POLL
+COPTS.linux24 = -DNETFILTER
LIBS.linux24 =
# This is for Linux 2.2
-COPTS.linux22 = -DUSE_GETSOCKNAME -DENABLE_POLL
+COPTS.linux22 = -DUSE_GETSOCKNAME
LIBS.linux22 =
# This is for Solaris 8
-COPTS.solaris = -fomit-frame-pointer -DENABLE_POLL -DFD_SETSIZE=65536
+COPTS.solaris = -fomit-frame-pointer -DFD_SETSIZE=65536
LIBS.solaris = -lnsl -lsocket
# CPU dependant optimizations
ADDLIB =
# set some defines when needed.
-# Known ones are -DENABLE_POLL, -DENABLE_EPOLL, and -DUSE_MY_EPOLL
# - use -DTPROXY to compile with transparent proxy support.
DEFINE = -DTPROXY
ifneq ($(USE_POLL),)
OPTIONS += -DENABLE_POLL
+OPT_OBJS += src/ev_poll.o
endif
ifneq ($(USE_EPOLL),)
OPTIONS += -DENABLE_EPOLL
+OPT_OBJS += src/ev_epoll.o
endif
ifneq ($(USE_MY_EPOLL),)
src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \
src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \
src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \
- src/session.o src/hdr_idx.o src/rbtree.o
+ src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o
haproxy: $(OBJS) $(OPT_OBJS)
$(LD) $(LDFLAGS) -o $@ $^ $(LIBS)
src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \
src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \
src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \
- src/session.o src/hdr_idx.o src/rbtree.o
+ src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o
all: haproxy
src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \
src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \
src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \
- src/session.o src/hdr_idx.o src/rbtree.o
+ src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o
all: haproxy
include/proto/fd.h
File descriptors states.
- Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
+ Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
*/
void fd_delete(int fd);
+/* registers all known pollers */
+void register_pollers();
+
+/* disable the specified poller */
+void disable_poller(const char *poller_name);
/*
- * Benchmarks performed on a Pentium-M notebook show that using functions
- * instead of the usual macros improve the FD_* performance by about 80%,
- * and that marking them regparm(2) adds another 20%.
+ * Initialize the pollers till the best one is found.
+ * If none works, returns 0, otherwise 1.
*/
-#if defined(CONFIG_HAP_INLINE_FD_SET)
-
-# define MY_FD_SET FD_SET
-# define MY_FD_CLR FD_CLR
-# define MY_FD_ISSET FD_ISSET
-
-#else
-
-# define MY_FD_SET my_fd_set
-# define MY_FD_CLR my_fd_clr
-# define MY_FD_ISSET my_fd_isset
+int init_pollers();
-REGPRM2 void my_fd_set(const int fd, fd_set *ev);
-REGPRM2 void my_fd_clr(const int fd, fd_set *ev);
-REGPRM2 int my_fd_isset(const int fd, const fd_set *ev);
-
-#endif
+/*
+ * Runs the polling loop
+ */
+void run_poller();
+
+
+/* FIXME: dirty hack during code transition */
+#define dir_StaticWriteEvent DIR_WR
+#define dir_StaticReadEvent DIR_RD
+#define dir_DIR_RD DIR_RD
+#define dir_DIR_WR DIR_WR
+
+#define MY_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev))
+#define MY_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev))
+#define MY_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev))
+
+#define EV_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev))
+#define EV_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev))
+#define EV_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev))
+#define EV_FD_COND_S(fd, ev) (cur_poller.cond_s((fd), dir_##ev))
+#define EV_FD_COND_C(fd, ev) (cur_poller.cond_c((fd), dir_##ev))
+#define EV_FD_REM(fd) (cur_poller.rem(fd))
+#define EV_FD_CLO(fd) (cur_poller.clo(fd))
/* recomputes the maxfd limit from the fd */
int state; /* the state of this fd */
};
+/*
+ * Poller descriptors.
+ * - <name> is initialized by the poller's register() function, and should not
+ * be allocated, just linked to.
+ * - <pref> is initialized by the poller's register() function. It is set to 0
+ * by default, meaning the poller is disabled. init() should set it to 0 in
+ * case of failure. term() must set it to 0. A generic unoptimized select()
+ * poller should set it to 100.
+ * - <private> is initialized by the poller's init() function, and cleaned by
+ * the term() function.
+ * - cond_s() checks if fd was not set then sets it and returns 1. Otherwise 0.
+ * - cond_c() checks if fd was set then clears it and returns 1. Otherwise 0.
+ * - clo() should be used to do indicate the poller that fd will be closed. It
+ * may be the same as rem() on some pollers.
+ * - poll() calls the poller, waiting at most wait_time ms.
+ */
+struct poller {
+ void *private; /* any private data for the poller */
+ REGPRM2 int (*isset)(const int fd, const int dir); /* check if <fd> is being polled for dir <dir> */
+ REGPRM2 void (*set)(const int fd, const int dir); /* set polling on <fd> for <dir> */
+ REGPRM2 void (*clr)(const int fd, const int dir); /* clear polling on <fd> for <dir> */
+ REGPRM2 int (*cond_s)(const int fd, const int dir); /* set polling on <fd> for <dir> if unset */
+ REGPRM2 int (*cond_c)(const int fd, const int dir); /* clear polling on <fd> for <dir> if set */
+ REGPRM1 void (*rem)(const int fd); /* remove any polling on <fd> */
+ REGPRM1 void (*clo)(const int fd); /* mark <fd> as closed */
+ REGPRM2 void (*poll)(struct poller *p, int wait_time); /* the poller itself */
+ REGPRM1 int (*init)(struct poller *p); /* poller initialization */
+ REGPRM1 void (*term)(struct poller *p); /* termination of this poller */
+ const char *name; /* poller name */
+ int pref; /* try pollers with higher preference first */
+};
+
+extern struct poller cur_poller; /* the current poller */
+extern int nbpollers;
+#define MAX_POLLERS 10
+extern struct poller pollers[MAX_POLLERS]; /* all registered pollers */
+
extern struct fdtab *fdtab; /* array of all the file descriptors */
extern int maxfd; /* # of the highest fd + 1 */
extern int totalconn; /* total # of terminated sessions */
include/types/polling.h
File descriptors and polling definitions.
- Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
+ Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
#define POLL_USE_POLL (1<<1)
#define POLL_USE_EPOLL (1<<2)
-/* fd states */
-extern fd_set *StaticReadEvent, *StaticWriteEvent;
extern int cfg_polling_mechanism; /* POLL_USE_{SELECT|POLL|EPOLL} */
--- /dev/null
+/*
+ * FD polling functions for linux epoll()
+ *
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <common/compat.h>
+#include <common/config.h>
+#include <common/time.h>
+
+#include <types/fd.h>
+#include <types/global.h>
+
+#include <proto/fd.h>
+#include <proto/polling.h>
+#include <proto/task.h>
+
+#if defined(USE_MY_EPOLL)
+#include <errno.h>
+#include <sys/syscall.h>
+_syscall1 (int, epoll_create, int, size);
+_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event);
+_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout);
+#endif
+
+
+static fd_set *StaticReadEvent, *StaticWriteEvent;
+static fd_set *PrevReadEvent, *PrevWriteEvent;
+
+/* private data */
+static struct epoll_event *epoll_events;
+static int epoll_fd;
+
+
+/*
+ * Benchmarks performed on a Pentium-M notebook show that using functions
+ * instead of the usual macros improve the FD_* performance by about 80%,
+ * and that marking them regparm(2) adds another 20%.
+ */
+REGPRM2 static int __fd_isset(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ return FD_ISSET(fd, ev);
+}
+
+REGPRM2 static void __fd_set(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_SET(fd, ev);
+}
+
+REGPRM2 static void __fd_clr(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_CLR(fd, ev);
+}
+
+REGPRM2 static int __fd_cond_s(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = !FD_ISSET(fd, ev);
+ if (ret)
+ FD_SET(fd, ev);
+ return ret;
+}
+
+REGPRM2 static int __fd_cond_c(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = FD_ISSET(fd, ev);
+ if (ret)
+ FD_CLR(fd, ev);
+ return ret;
+}
+
+REGPRM1 static void __fd_rem(const int fd)
+{
+ FD_CLR(fd, StaticReadEvent);
+ FD_CLR(fd, StaticWriteEvent);
+}
+
+REGPRM1 static void __fd_clo(const int fd)
+{
+ FD_CLR(fd, StaticReadEvent);
+ FD_CLR(fd, StaticWriteEvent);
+ FD_CLR(fd, PrevReadEvent);
+ FD_CLR(fd, PrevWriteEvent);
+}
+
+
+
+/*
+ * Initialization of the epoll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+REGPRM1 static int epoll_init(struct poller *p)
+{
+ __label__ fail_pwevt, fail_prevt, fail_swevt, fail_srevt, fail_ee, fail_fd;
+ int fd_set_bytes;
+
+ p->private = NULL;
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+
+ epoll_fd = epoll_create(global.maxsock + 1);
+ if (epoll_fd < 0)
+ goto fail_fd;
+
+ epoll_events = (struct epoll_event*)
+ calloc(1, sizeof(struct epoll_event) * global.maxsock);
+
+ if (epoll_events == NULL)
+ goto fail_ee;
+
+ if ((PrevReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_prevt;
+
+ if ((PrevWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_pwevt;
+
+ if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_srevt;
+
+ if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_swevt;
+
+ return 1;
+
+ fail_swevt:
+ free(StaticReadEvent);
+ fail_srevt:
+ free(PrevWriteEvent);
+ fail_pwevt:
+ free(PrevReadEvent);
+ fail_prevt:
+ free(epoll_events);
+ fail_ee:
+ close(epoll_fd);
+ epoll_fd = 0;
+ fail_fd:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the epoll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+REGPRM1 static void epoll_term(struct poller *p)
+{
+ if (StaticWriteEvent)
+ free(StaticWriteEvent);
+
+ if (StaticReadEvent)
+ free(StaticReadEvent);
+
+ if (PrevWriteEvent)
+ free(PrevWriteEvent);
+
+ if (PrevReadEvent)
+ free(PrevReadEvent);
+
+ if (epoll_events)
+ free(epoll_events);
+
+ close(epoll_fd);
+ epoll_fd = 0;
+
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * epoll() poller
+ */
+REGPRM2 static void epoll_poll(struct poller *p, int wait_time)
+{
+ int status;
+ int fd;
+
+ int fds, count;
+ int pr, pw, sr, sw;
+ unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */
+ struct epoll_event ev;
+
+ for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
+
+ rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds];
+ wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds];
+
+ if ((ro^rn) | (wo^wn)) {
+ for (count = 0, fd = fds << INTBITS; count < (1<<INTBITS) && fd < maxfd; count++, fd++) {
+#define FDSETS_ARE_INT_ALIGNED
+#ifdef FDSETS_ARE_INT_ALIGNED
+
+#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
+#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
+ pr = (ro >> count) & 1;
+ pw = (wo >> count) & 1;
+ sr = (rn >> count) & 1;
+ sw = (wn >> count) & 1;
+#else
+ pr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&ro);
+ pw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wo);
+ sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
+ sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
+#endif
+#else
+ pr = FD_ISSET(fd, PrevReadEvent);
+ pw = FD_ISSET(fd, PrevWriteEvent);
+ sr = FD_ISSET(fd, StaticReadEvent);
+ sw = FD_ISSET(fd, StaticWriteEvent);
+#endif
+ if (!((sr^pr) | (sw^pw)))
+ continue;
+
+ ev.events = (sr ? EPOLLIN : 0) | (sw ? EPOLLOUT : 0);
+ ev.data.fd = fd;
+
+#ifdef EPOLL_CTL_MOD_WORKAROUND
+ /* I encountered a rarely reproducible problem with
+ * EPOLL_CTL_MOD where a modified FD (systematically
+ * the one in epoll_events[0], fd#7) would sometimes
+ * be set EPOLL_OUT while asked for a read ! This is
+ * with the 2.4 epoll patch. The workaround is to
+ * delete then recreate in case of modification.
+ * This is in 2.4 up to epoll-lt-0.21 but not in 2.6
+ * nor RHEL kernels.
+ */
+
+ if ((pr | pw) && fdtab[fd].state != FD_STCLOSE)
+ epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev);
+
+ if ((sr | sw))
+ epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev);
+#else
+ if ((pr | pw)) {
+ /* the file-descriptor already exists... */
+ if ((sr | sw)) {
+ /* ...and it will still exist */
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
+ // perror("epoll_ctl(MOD)");
+ // exit(1);
+ }
+ } else {
+ /* ...and it will be removed */
+ if (fdtab[fd].state != FD_STCLOSE &&
+ epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
+ // perror("epoll_ctl(DEL)");
+ // exit(1);
+ }
+ }
+ } else {
+ /* the file-descriptor did not exist, let's add it */
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ // perror("epoll_ctl(ADD)");
+ // exit(1);
+ }
+ }
+#endif // EPOLL_CTL_MOD_WORKAROUND
+ }
+ ((int*)PrevReadEvent)[fds] = rn;
+ ((int*)PrevWriteEvent)[fds] = wn;
+ }
+ }
+
+ /* now let's wait for events */
+ status = epoll_wait(epoll_fd, epoll_events, maxfd, wait_time);
+ tv_now(&now);
+
+ for (count = 0; count < status; count++) {
+ fd = epoll_events[count].data.fd;
+
+ if (FD_ISSET(fd, StaticReadEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ if (epoll_events[count].events & ( EPOLLIN | EPOLLERR | EPOLLHUP ))
+ fdtab[fd].cb[DIR_RD].f(fd);
+ }
+
+ if (FD_ISSET(fd, StaticWriteEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ if (epoll_events[count].events & ( EPOLLOUT | EPOLLERR | EPOLLHUP ))
+ fdtab[fd].cb[DIR_WR].f(fd);
+ }
+ }
+}
+
+/*
+ * The only exported function. Returns 1.
+ */
+int epoll_register(struct poller *p)
+{
+ p->name = "epoll";
+ p->pref = 300;
+ p->private = NULL;
+
+ p->init = epoll_init;
+ p->term = epoll_term;
+ p->poll = epoll_poll;
+ p->isset = __fd_isset;
+ p->set = __fd_set;
+ p->clr = __fd_clr;
+ p->rem = __fd_rem;
+ p->clo = __fd_clo;
+ p->cond_s = __fd_cond_s;
+ p->cond_c = __fd_cond_c;
+ return 1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
--- /dev/null
+/*
+ * FD polling functions for generic poll()
+ *
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <common/compat.h>
+#include <common/config.h>
+#include <common/time.h>
+
+#include <types/fd.h>
+#include <types/global.h>
+
+#include <proto/fd.h>
+#include <proto/polling.h>
+#include <proto/task.h>
+
+
+static fd_set *StaticReadEvent, *StaticWriteEvent;
+
+/* private data */
+static struct pollfd *poll_events = NULL;
+
+
+/*
+ * Benchmarks performed on a Pentium-M notebook show that using functions
+ * instead of the usual macros improve the FD_* performance by about 80%,
+ * and that marking them regparm(2) adds another 20%.
+ */
+REGPRM2 static int __fd_isset(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ return FD_ISSET(fd, ev);
+}
+
+REGPRM2 static void __fd_set(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_SET(fd, ev);
+}
+
+REGPRM2 static void __fd_clr(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_CLR(fd, ev);
+}
+
+REGPRM2 static int __fd_cond_s(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = !FD_ISSET(fd, ev);
+ if (ret)
+ FD_SET(fd, ev);
+ return ret;
+}
+
+REGPRM2 static int __fd_cond_c(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = FD_ISSET(fd, ev);
+ if (ret)
+ FD_CLR(fd, ev);
+ return ret;
+}
+
+REGPRM1 static void __fd_rem(const int fd)
+{
+ FD_CLR(fd, StaticReadEvent);
+ FD_CLR(fd, StaticWriteEvent);
+}
+
+
+
+/*
+ * Initialization of the poll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+REGPRM1 static int poll_init(struct poller *p)
+{
+ __label__ fail_swevt, fail_srevt, fail_pe;
+ int fd_set_bytes;
+
+ p->private = NULL;
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+
+ poll_events = (struct pollfd*)
+ calloc(1, sizeof(struct pollfd) * global.maxsock);
+
+ if (poll_events == NULL)
+ goto fail_pe;
+
+ if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_srevt;
+
+ if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_swevt;
+
+ return 1;
+
+ fail_swevt:
+ free(StaticReadEvent);
+ fail_srevt:
+ free(poll_events);
+ fail_pe:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the poll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+REGPRM1 static void poll_term(struct poller *p)
+{
+ if (StaticWriteEvent)
+ free(StaticWriteEvent);
+ if (StaticReadEvent)
+ free(StaticReadEvent);
+ if (poll_events)
+ free(poll_events);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Poll() poller
+ */
+REGPRM2 static void poll_poll(struct poller *p, int wait_time)
+{
+ int status;
+ int fd, nbfd;
+
+ int fds, count;
+ int sr, sw;
+ unsigned rn, wn; /* read new, write new */
+
+ nbfd = 0;
+ for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
+
+ rn = ((int*)StaticReadEvent)[fds];
+ wn = ((int*)StaticWriteEvent)[fds];
+
+ if ((rn|wn)) {
+ for (count = 0, fd = fds << INTBITS; count < (1<<INTBITS) && fd < maxfd; count++, fd++) {
+#define FDSETS_ARE_INT_ALIGNED
+#ifdef FDSETS_ARE_INT_ALIGNED
+
+#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
+#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
+ sr = (rn >> count) & 1;
+ sw = (wn >> count) & 1;
+#else
+ sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
+ sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
+#endif
+#else
+ sr = FD_ISSET(fd, StaticReadEvent);
+ sw = FD_ISSET(fd, StaticWriteEvent);
+#endif
+ if ((sr|sw)) {
+ poll_events[nbfd].fd = fd;
+ poll_events[nbfd].events = (sr ? POLLIN : 0) | (sw ? POLLOUT : 0);
+ nbfd++;
+ }
+ }
+ }
+ }
+
+ /* now let's wait for events */
+ status = poll(poll_events, nbfd, wait_time);
+ tv_now(&now);
+
+ for (count = 0; status > 0 && count < nbfd; count++) {
+ fd = poll_events[count].fd;
+
+ if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP )))
+ continue;
+
+ /* ok, we found one active fd */
+ status--;
+
+ if (FD_ISSET(fd, StaticReadEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP ))
+ fdtab[fd].cb[DIR_RD].f(fd);
+ }
+
+ if (FD_ISSET(fd, StaticWriteEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP ))
+ fdtab[fd].cb[DIR_WR].f(fd);
+ }
+ }
+
+}
+
+/*
+ * The only exported function. Returns 1.
+ */
+int poll_register(struct poller *p)
+{
+ p->name = "poll";
+ p->pref = 200;
+ p->private = NULL;
+
+ p->init = poll_init;
+ p->term = poll_term;
+ p->poll = poll_poll;
+ p->isset = __fd_isset;
+ p->set = __fd_set;
+ p->clr = __fd_clr;
+ p->clo = p->rem = __fd_rem;
+ p->cond_s = __fd_cond_s;
+ p->cond_c = __fd_cond_c;
+ return 1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
--- /dev/null
+/*
+ * FD polling functions for generic select()
+ *
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <common/compat.h>
+#include <common/config.h>
+#include <common/time.h>
+
+#include <types/fd.h>
+#include <types/global.h>
+
+#include <proto/fd.h>
+#include <proto/polling.h>
+#include <proto/task.h>
+
+
+static fd_set *ReadEvent, *WriteEvent;
+static fd_set *StaticReadEvent, *StaticWriteEvent;
+
+
+/*
+ * Benchmarks performed on a Pentium-M notebook show that using functions
+ * instead of the usual macros improve the FD_* performance by about 80%,
+ * and that marking them regparm(2) adds another 20%.
+ */
+REGPRM2 static int __fd_isset(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ return FD_ISSET(fd, ev);
+}
+
+REGPRM2 static void __fd_set(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_SET(fd, ev);
+}
+
+REGPRM2 static void __fd_clr(const int fd, const int dir)
+{
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ FD_CLR(fd, ev);
+}
+
+REGPRM2 static int __fd_cond_s(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = !FD_ISSET(fd, ev);
+ if (ret)
+ FD_SET(fd, ev);
+ return ret;
+}
+
+REGPRM2 static int __fd_cond_c(const int fd, const int dir)
+{
+ int ret;
+ fd_set *ev;
+ if (dir == DIR_RD)
+ ev = StaticReadEvent;
+ else
+ ev = StaticWriteEvent;
+
+ ret = FD_ISSET(fd, ev);
+ if (ret)
+ FD_CLR(fd, ev);
+ return ret;
+}
+
+REGPRM1 static void __fd_rem(const int fd)
+{
+ FD_CLR(fd, StaticReadEvent);
+ FD_CLR(fd, StaticWriteEvent);
+}
+
+
+/*
+ * Initialization of the select() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+REGPRM1 static int select_init(struct poller *p)
+{
+ __label__ fail_swevt, fail_srevt, fail_wevt, fail_revt;
+ int fd_set_bytes;
+
+ p->private = NULL;
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+
+ if ((ReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_revt;
+
+ if ((WriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_wevt;
+
+ if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_srevt;
+
+ if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL)
+ goto fail_swevt;
+
+ return 1;
+
+ fail_swevt:
+ free(StaticReadEvent);
+ fail_srevt:
+ free(WriteEvent);
+ fail_wevt:
+ free(ReadEvent);
+ fail_revt:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the select() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+REGPRM1 static void select_term(struct poller *p)
+{
+ if (StaticWriteEvent)
+ free(StaticWriteEvent);
+ if (StaticReadEvent)
+ free(StaticReadEvent);
+ if (WriteEvent)
+ free(WriteEvent);
+ if (ReadEvent)
+ free(ReadEvent);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Select() poller
+ */
+REGPRM2 static void select_poll(struct poller *p, int wait_time)
+{
+ int status;
+ int fd, i;
+ struct timeval delta;
+ int readnotnull, writenotnull;
+ int fds;
+ char count;
+
+ /* allow select to return immediately when needed */
+ delta.tv_sec = delta.tv_usec = 0;
+ if (wait_time > 0) { /* FIXME */
+ /* Convert to timeval */
+ /* to avoid eventual select loops due to timer precision */
+ wait_time += SCHEDULER_RESOLUTION;
+ delta.tv_sec = wait_time / 1000;
+ delta.tv_usec = (wait_time % 1000) * 1000;
+ }
+
+ /* let's restore fdset state */
+
+ readnotnull = 0; writenotnull = 0;
+ for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
+ readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
+ writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
+ }
+
+ // /* just a verification code, needs to be removed for performance */
+ // for (i=0; i<maxfd; i++) {
+ // if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
+ // abort();
+ // if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
+ // abort();
+ //
+ // }
+
+ status = select(maxfd,
+ readnotnull ? ReadEvent : NULL,
+ writenotnull ? WriteEvent : NULL,
+ NULL,
+ (wait_time >= 0) ? &delta : NULL);
+
+ tv_now(&now);
+
+ if (status <= 0)
+ return;
+
+ for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
+ if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) == 0)
+ continue;
+
+ for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
+ /* if we specify read first, the accepts and zero reads will be
+ * seen first. Moreover, system buffers will be flushed faster.
+ */
+ if (FD_ISSET(fd, ReadEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ fdtab[fd].cb[DIR_RD].f(fd);
+ }
+
+ if (FD_ISSET(fd, WriteEvent)) {
+ if (fdtab[fd].state == FD_STCLOSE)
+ continue;
+ fdtab[fd].cb[DIR_WR].f(fd);
+ }
+ }
+ }
+}
+
+/*
+ * The only exported function. Returns 1.
+ */
+int select_register(struct poller *p)
+{
+ p->name = "select";
+ p->pref = 150;
+ p->private = NULL;
+
+ p->init = select_init;
+ p->term = select_term;
+ p->poll = select_poll;
+ p->isset = __fd_isset;
+ p->set = __fd_set;
+ p->clr = __fd_clr;
+ p->clo = p->rem = __fd_rem;
+ p->cond_s = __fd_cond_s;
+ p->cond_c = __fd_cond_c;
+ return 1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
/*
* File descriptors management functions.
*
- * Copyright 2000-2006 Willy Tarreau <w@1wt.eu>
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
*
*/
-/*
- * FIXME:
- * - we still use 'listeners' to check whether we want to stop or not.
- * - the various pollers should be moved to other external files, possibly
- * dynamic libs.
- */
-
+#include <string.h>
#include <unistd.h>
-#include <sys/time.h>
#include <sys/types.h>
#include <common/compat.h>
#include <common/config.h>
-#include <common/time.h>
#include <types/fd.h>
#include <types/global.h>
#include <proto/fd.h>
-#include <proto/polling.h>
-#include <proto/task.h>
struct fdtab *fdtab = NULL; /* array of all the file descriptors */
int maxfd; /* # of the highest fd + 1 */
int totalconn; /* total # of terminated sessions */
int actconn; /* # of active sessions */
-fd_set *StaticReadEvent, *StaticWriteEvent;
int cfg_polling_mechanism = 0; /* POLL_USE_{SELECT|POLL|EPOLL} */
+struct poller pollers[MAX_POLLERS];
+struct poller cur_poller;
+int nbpollers = 0;
-/******************************
- * pollers
- ******************************/
-
-
-#if !defined(CONFIG_HAP_INLINE_FD_SET)
-/*
- * Benchmarks performed on a Pentium-M notebook show that using functions
- * instead of the usual macros improve the FD_* performance by about 80%,
- * and that marking them regparm(2) adds another 20%.
- */
-REGPRM2 void my_fd_set(const int fd, fd_set *ev)
-{
- FD_SET(fd, ev);
-}
-REGPRM2 void my_fd_clr(const int fd, fd_set *ev)
-{
- FD_CLR(fd, ev);
-}
+/*********************
+ * generic functions
+ *********************/
-REGPRM2 int my_fd_isset(const int fd, const fd_set *ev)
-{
- return FD_ISSET(fd, ev);
-}
+extern int select_register(struct poller *p);
+#if defined(ENABLE_POLL)
+extern int poll_register(struct poller *p);
#endif
-
-
-/*
- * FIXME: this is dirty, but at the moment, there's no other solution to remove
- * the old FDs from outside the loop. Perhaps we should export a global 'poll'
- * structure with pointers to functions such as init_fd() and close_fd(), plus
- * a private structure with several pointers to places such as below.
- */
-
#if defined(ENABLE_EPOLL)
-fd_set *PrevReadEvent = NULL, *PrevWriteEvent = NULL;
-
-#if defined(USE_MY_EPOLL)
-#include <errno.h>
-#include <sys/syscall.h>
-_syscall1 (int, epoll_create, int, size);
-_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event);
-_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout);
+extern int epoll_register(struct poller *p);
#endif
-/*
- * Main epoll() loop.
- * does 3 actions :
- * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures
- * 1 (POLL_LOOP_ACTION_RUN) : runs the loop
- * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up
- *
- * returns 0 if initialization failed, !0 otherwise.
- */
-int epoll_loop(int action)
+/* Deletes an FD from the fdsets, and recomputes the maxfd limit.
+ * The file descriptor is also closed.
+ */
+void fd_delete(int fd)
{
- int next_time;
- int status;
- int fd;
-
- int fds, count;
- int pr, pw, sr, sw;
- unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */
- struct epoll_event ev;
-
- /* private data */
- static struct epoll_event *epoll_events = NULL;
- static int epoll_fd;
-
- if (action == POLL_LOOP_ACTION_INIT) {
- epoll_fd = epoll_create(global.maxsock + 1);
- if (epoll_fd < 0)
- return 0;
- else {
- epoll_events = (struct epoll_event*)
- calloc(1, sizeof(struct epoll_event) * global.maxsock);
- PrevReadEvent = (fd_set *)
- calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
- PrevWriteEvent = (fd_set *)
- calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
- }
- return 1;
- }
- else if (action == POLL_LOOP_ACTION_CLEAN) {
- if (PrevWriteEvent) free(PrevWriteEvent);
- if (PrevReadEvent) free(PrevReadEvent);
- if (epoll_events) free(epoll_events);
- close(epoll_fd);
- epoll_fd = 0;
- return 1;
- }
-
- /* OK, it's POLL_LOOP_ACTION_RUN */
-
- tv_now(&now);
-
- while (1) {
- next_time = process_runnable_tasks();
-
- /* stop when there's no connection left and we don't allow them anymore */
- if (!actconn && listeners == 0)
- break;
-
- for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
-
- rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds];
- wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds];
-
- if ((ro^rn) | (wo^wn)) {
- for (count = 0, fd = fds << INTBITS; count < (1<<INTBITS) && fd < maxfd; count++, fd++) {
-#define FDSETS_ARE_INT_ALIGNED
-#ifdef FDSETS_ARE_INT_ALIGNED
-
-#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
-#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
- pr = (ro >> count) & 1;
- pw = (wo >> count) & 1;
- sr = (rn >> count) & 1;
- sw = (wn >> count) & 1;
-#else
- pr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&ro);
- pw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wo);
- sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
- sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
-#endif
-#else
- pr = FD_ISSET(fd, PrevReadEvent);
- pw = FD_ISSET(fd, PrevWriteEvent);
- sr = FD_ISSET(fd, StaticReadEvent);
- sw = FD_ISSET(fd, StaticWriteEvent);
-#endif
- if (!((sr^pr) | (sw^pw)))
- continue;
-
- ev.events = (sr ? EPOLLIN : 0) | (sw ? EPOLLOUT : 0);
- ev.data.fd = fd;
-
-#ifdef EPOLL_CTL_MOD_WORKAROUND
- /* I encountered a rarely reproducible problem with
- * EPOLL_CTL_MOD where a modified FD (systematically
- * the one in epoll_events[0], fd#7) would sometimes
- * be set EPOLL_OUT while asked for a read ! This is
- * with the 2.4 epoll patch. The workaround is to
- * delete then recreate in case of modification.
- * This is in 2.4 up to epoll-lt-0.21 but not in 2.6
- * nor RHEL kernels.
- */
-
- if ((pr | pw) && fdtab[fd].state != FD_STCLOSE)
- epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev);
-
- if ((sr | sw))
- epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev);
-#else
- if ((pr | pw)) {
- /* the file-descriptor already exists... */
- if ((sr | sw)) {
- /* ...and it will still exist */
- if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
- // perror("epoll_ctl(MOD)");
- // exit(1);
- }
- } else {
- /* ...and it will be removed */
- if (fdtab[fd].state != FD_STCLOSE &&
- epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
- // perror("epoll_ctl(DEL)");
- // exit(1);
- }
- }
- } else {
- /* the file-descriptor did not exist, let's add it */
- if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- // perror("epoll_ctl(ADD)");
- // exit(1);
- }
- }
-#endif // EPOLL_CTL_MOD_WORKAROUND
- }
- ((int*)PrevReadEvent)[fds] = rn;
- ((int*)PrevWriteEvent)[fds] = wn;
- }
- }
-
- /* now let's wait for events */
- status = epoll_wait(epoll_fd, epoll_events, maxfd, next_time);
- tv_now(&now);
-
- for (count = 0; count < status; count++) {
- fd = epoll_events[count].data.fd;
-
- if (FD_ISSET(fd, StaticReadEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- if (epoll_events[count].events & ( EPOLLIN | EPOLLERR | EPOLLHUP ))
- fdtab[fd].cb[DIR_RD].f(fd);
- }
+ EV_FD_CLO(fd);
+ close(fd);
+ fdtab[fd].state = FD_STCLOSE;
- if (FD_ISSET(fd, StaticWriteEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- if (epoll_events[count].events & ( EPOLLOUT | EPOLLERR | EPOLLHUP ))
- fdtab[fd].cb[DIR_WR].f(fd);
- }
- }
- }
- return 1;
+ while ((maxfd-1 >= 0) && (fdtab[maxfd-1].state == FD_STCLOSE))
+ maxfd--;
}
-#endif
-
-#if defined(ENABLE_POLL)
-/*
- * Main poll() loop.
- * does 3 actions :
- * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures
- * 1 (POLL_LOOP_ACTION_RUN) : runs the loop
- * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up
- *
- * returns 0 if initialization failed, !0 otherwise.
- */
-
-int poll_loop(int action)
+/* registers all known pollers */
+void register_pollers()
{
- int next_time;
- int status;
- int fd, nbfd;
-
- int fds, count;
- int sr, sw;
- unsigned rn, wn; /* read new, write new */
-
- /* private data */
- static struct pollfd *poll_events = NULL;
-
- if (action == POLL_LOOP_ACTION_INIT) {
- poll_events = (struct pollfd*)
- calloc(1, sizeof(struct pollfd) * global.maxsock);
- return 1;
- }
- else if (action == POLL_LOOP_ACTION_CLEAN) {
- if (poll_events)
- free(poll_events);
- return 1;
- }
-
- /* OK, it's POLL_LOOP_ACTION_RUN */
-
- tv_now(&now);
-
- while (1) {
- next_time = process_runnable_tasks();
-
- /* stop when there's no connection left and we don't allow them anymore */
- if (!actconn && listeners == 0)
- break;
-
- nbfd = 0;
- for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
-
- rn = ((int*)StaticReadEvent)[fds];
- wn = ((int*)StaticWriteEvent)[fds];
-
- if ((rn|wn)) {
- for (count = 0, fd = fds << INTBITS; count < (1<<INTBITS) && fd < maxfd; count++, fd++) {
-#define FDSETS_ARE_INT_ALIGNED
-#ifdef FDSETS_ARE_INT_ALIGNED
-
-#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
-#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
- sr = (rn >> count) & 1;
- sw = (wn >> count) & 1;
-#else
- sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
- sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
-#endif
-#else
- sr = FD_ISSET(fd, StaticReadEvent);
- sw = FD_ISSET(fd, StaticWriteEvent);
+ if (select_register(&pollers[nbpollers]))
+ nbpollers++;
+#if defined(ENABLE_POLL)
+ poll_register(&pollers[nbpollers]);
+ nbpollers++;
#endif
- if ((sr|sw)) {
- poll_events[nbfd].fd = fd;
- poll_events[nbfd].events = (sr ? POLLIN : 0) | (sw ? POLLOUT : 0);
- nbfd++;
- }
- }
- }
- }
-
- /* now let's wait for events */
- status = poll(poll_events, nbfd, next_time);
- tv_now(&now);
-
- for (count = 0; status > 0 && count < nbfd; count++) {
- fd = poll_events[count].fd;
-
- if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP )))
- continue;
- /* ok, we found one active fd */
- status--;
-
- if (FD_ISSET(fd, StaticReadEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP ))
- fdtab[fd].cb[DIR_RD].f(fd);
- }
-
- if (FD_ISSET(fd, StaticWriteEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP ))
- fdtab[fd].cb[DIR_WR].f(fd);
- }
- }
- }
- return 1;
-}
+#if defined(ENABLE_EPOLL)
+ epoll_register(&pollers[nbpollers]);
+ nbpollers++;
#endif
+}
+/* disable the specified poller */
+void disable_poller(const char *poller_name)
+{
+ int p;
+ for (p = 0; p < nbpollers; p++)
+ if (strcmp(pollers[p].name, poller_name) == 0)
+ pollers[p].pref = 0;
+}
/*
- * Main select() loop.
- * does 3 actions :
- * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures
- * 1 (POLL_LOOP_ACTION_RUN) : runs the loop
- * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up
- *
- * returns 0 if initialization failed, !0 otherwise.
+ * Initialize the pollers till the best one is found.
+ * If none works, returns 0, otherwise 1.
*/
-
-
-int select_loop(int action)
+int init_pollers()
{
- int next_time;
- int status;
- int fd,i;
- struct timeval delta;
- int readnotnull, writenotnull;
- static fd_set *ReadEvent = NULL, *WriteEvent = NULL;
+ int p;
+ struct poller *bp;
- if (action == POLL_LOOP_ACTION_INIT) {
- ReadEvent = (fd_set *)
- calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
- WriteEvent = (fd_set *)
- calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
- return 1;
- }
- else if (action == POLL_LOOP_ACTION_CLEAN) {
- if (WriteEvent) free(WriteEvent);
- if (ReadEvent) free(ReadEvent);
- return 1;
- }
- /* OK, it's POLL_LOOP_ACTION_RUN */
+ do {
+ bp = NULL;
+ for (p = 0; p < nbpollers; p++)
+ if (!bp || (pollers[p].pref > bp->pref))
+ bp = &pollers[p];
- tv_now(&now);
-
- while (1) {
- next_time = process_runnable_tasks();
-
- /* stop when there's no connection left and we don't allow them anymore */
- if (!actconn && listeners == 0)
+ if (!bp || bp->pref == 0)
break;
- if (next_time > 0) { /* FIXME */
- /* Convert to timeval */
- /* to avoid eventual select loops due to timer precision */
- next_time += SCHEDULER_RESOLUTION;
- delta.tv_sec = next_time / 1000;
- delta.tv_usec = (next_time % 1000) * 1000;
- }
- else if (next_time == 0) { /* allow select to return immediately when needed */
- delta.tv_sec = delta.tv_usec = 0;
- }
-
-
- /* let's restore fdset state */
-
- readnotnull = 0; writenotnull = 0;
- for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
- readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
- writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
+ if (bp->init(bp)) {
+ memcpy(&cur_poller, bp, sizeof(*bp));
+ return 1;
}
-
- // /* just a verification code, needs to be removed for performance */
- // for (i=0; i<maxfd; i++) {
- // if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
- // abort();
- // if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
- // abort();
- //
- // }
-
- status = select(maxfd,
- readnotnull ? ReadEvent : NULL,
- writenotnull ? WriteEvent : NULL,
- NULL,
- (next_time >= 0) ? &delta : NULL);
-
- /* this is an experiment on the separation of the select work */
- // status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0);
- // status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0);
-
- tv_now(&now);
-
- if (status > 0) { /* must proceed with events */
-
- int fds;
- char count;
-
- for (fds = 0; (fds << INTBITS) < maxfd; fds++)
- if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0)
- for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
-
- /* if we specify read first, the accepts and zero reads will be
- * seen first. Moreover, system buffers will be flushed faster.
- */
- if (FD_ISSET(fd, ReadEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- fdtab[fd].cb[DIR_RD].f(fd);
- }
-
- if (FD_ISSET(fd, WriteEvent)) {
- if (fdtab[fd].state == FD_STCLOSE)
- continue;
- fdtab[fd].cb[DIR_WR].f(fd);
- }
- }
- }
- else {
- // fprintf(stderr,"select returned %d, maxfd=%d\n", status, maxfd);
- }
- }
- return 1;
-}
-
-
-
-/*********************
- * generic functions
- *********************/
-
-
-/* Deletes an FD from the fdsets, and recomputes the maxfd limit.
- * The file descriptor is also closed.
- */
-void fd_delete(int fd)
-{
- MY_FD_CLR(fd, StaticReadEvent);
- MY_FD_CLR(fd, StaticWriteEvent);
-#if defined(ENABLE_EPOLL)
- if (PrevReadEvent) {
- MY_FD_CLR(fd, PrevReadEvent);
- MY_FD_CLR(fd, PrevWriteEvent);
- }
-#endif
-
- close(fd);
- fdtab[fd].state = FD_STCLOSE;
-
- while ((maxfd-1 >= 0) && (fdtab[maxfd-1].state == FD_STCLOSE))
- maxfd--;
+ } while (!bp || bp->pref == 0);
+ return 0;
}
-
/*
* Local variables:
* c-indent-level: 8
if (global.nbproc < 1)
global.nbproc = 1;
- StaticReadEvent = (fd_set *)calloc(1,
- sizeof(fd_set) *
- (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
- StaticWriteEvent = (fd_set *)calloc(1,
- sizeof(fd_set) *
- (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
-
fdtab = (struct fdtab *)calloc(1,
sizeof(struct fdtab) * (global.maxsock));
for (i = 0; i < global.maxsock; i++) {
fdtab[i].state = FD_STCLOSE;
}
+
+ register_pollers();
+ /* Note: we could register external pollers here */
+
+ if (!(cfg_polling_mechanism & POLL_USE_EPOLL))
+ disable_poller("epoll");
+
+ if (!(cfg_polling_mechanism & POLL_USE_POLL))
+ disable_poller("poll");
+
+ if (!(cfg_polling_mechanism & POLL_USE_SELECT))
+ disable_poller("select");
+
+ /* Note: we could disable any poller by name here */
+
+ if (!init_pollers()) {
+ Alert("No polling mechanism available\n");
+ exit(1);
+ }
+ if (global.mode & MODE_DEBUG) {
+ printf("Note: using %s() as the polling mechanism.\n", cur_poller.name);
+ }
+
}
void deinit(void)
if (global.chroot) free(global.chroot);
if (global.pidfile) free(global.pidfile);
- if (StaticReadEvent) free(StaticReadEvent);
- if (StaticWriteEvent) free(StaticWriteEvent);
if (fdtab) free(fdtab);
pool_destroy(pool_session);
kill(oldpids[p], sig);
}
+/*
+ * Runs the polling loop
+ *
+ * FIXME:
+ * - we still use 'listeners' to check whether we want to stop or not.
+ *
+ */
+void run_poll_loop()
+{
+ int next_time;
+ tv_now(&now);
+
+ while (1) {
+ next_time = process_runnable_tasks();
+
+ /* stop when there's no connection left and we don't allow them anymore */
+ if (!actconn && listeners == 0)
+ break;
+
+ cur_poller.poll(&cur_poller, next_time);
+ }
+}
+
+
int main(int argc, char **argv)
{
int err, retry;
setsid();
}
-#if defined(ENABLE_EPOLL)
- if (cfg_polling_mechanism & POLL_USE_EPOLL) {
- if (epoll_loop(POLL_LOOP_ACTION_INIT)) {
- epoll_loop(POLL_LOOP_ACTION_RUN);
- epoll_loop(POLL_LOOP_ACTION_CLEAN);
- cfg_polling_mechanism &= POLL_USE_EPOLL;
- }
- else {
- Warning("epoll() is not available. Using poll()/select() instead.\n");
- cfg_polling_mechanism &= ~POLL_USE_EPOLL;
- }
- }
-#endif
-
-#if defined(ENABLE_POLL)
- if (cfg_polling_mechanism & POLL_USE_POLL) {
- if (poll_loop(POLL_LOOP_ACTION_INIT)) {
- poll_loop(POLL_LOOP_ACTION_RUN);
- poll_loop(POLL_LOOP_ACTION_CLEAN);
- cfg_polling_mechanism &= POLL_USE_POLL;
- }
- else {
- Warning("poll() is not available. Using select() instead.\n");
- cfg_polling_mechanism &= ~POLL_USE_POLL;
- }
- }
-#endif
- if (cfg_polling_mechanism & POLL_USE_SELECT) {
- if (select_loop(POLL_LOOP_ACTION_INIT)) {
- select_loop(POLL_LOOP_ACTION_RUN);
- select_loop(POLL_LOOP_ACTION_CLEAN);
- cfg_polling_mechanism &= POLL_USE_SELECT;
- }
- }
-
+ /*
+ * That's it : the central polling loop. Run until we stop.
+ */
+ run_poll_loop();
/* Free all Hash Keys and all Hash elements */
appsession_cleanup();