]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
ISC_SOCKET_FDSETSIZE enables use of larger fdsets with select() on
authorEvan Hunt <each@isc.org>
Tue, 29 Jul 2008 04:47:31 +0000 (04:47 +0000)
committerEvan Hunt <each@isc.org>
Tue, 29 Jul 2008 04:47:31 +0000 (04:47 +0000)
systems with small FD_SETSIZE values [rt18328]

CHANGES
lib/isc/unix/app.c
lib/isc/unix/socket.c
lib/isc/unix/socket_p.h

diff --git a/CHANGES b/CHANGES
index 627dcaaabbbe27c62f19631dd0915e3742cfb6d7..bfedeebf56b6a4d8d7fc88eb5d5148c4a44510c8 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,13 @@
+2406.   [bug]           Some operating systems have FD_SETSIZE set to a
+                       low value by default, which can cause resource
+                       exhaustion when many simultaneous connections are
+                       open.  Linux in particular makes it difficult to
+                       increase this value.  To use more sockets with
+                       select(), set ISC_SOCKET_FDSETSIZE.  Example:
+                       STD_CDEFINES="-DISC_SOCKET_FDSETSIZE=4096" ./configure
+                       (This should not be necessary in most cases, and
+                       never for an authoritative-only server.) [RT #18328]
+
 2404.  [port]          hpux: files unlimited support.
 
 2403.  [bug]           TSIG context leak. [RT #18341]
@@ -62,7 +72,7 @@
                        [RT #17113]
 
 2249.   [bug]           Only set Authentic Data bit if client requested
-                        DNSSEC, per RFC 3655 [RT #17175]
+                       DNSSEC, per RFC 3655 [RT #17175]
 
 2248.   [cleanup]       Fix several errors reported by Coverity. [RT #17160]
 
index 59b1f6cc9724aa76ae3c2af81f4f9f3cc92d911a..b71d766eb5510e3bfd5ec19cac9cdd2831408f0d 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: app.c,v 1.50.18.2 2005/04/29 00:17:06 marka Exp $ */
+/* $Id: app.c,v 1.50.18.2.50.1 2008/07/29 04:47:31 each Exp $ */
 
 /*! \file */
 
@@ -303,7 +303,7 @@ evloop() {
                int n;
                isc_time_t when, now;
                struct timeval tv, *tvp;
-               fd_set readfds, writefds;
+               fd_set *readfds, *writefds;
                int maxfd;
                isc_boolean_t readytasks;
                isc_boolean_t call_timer_dispatch = ISC_FALSE;
@@ -332,7 +332,7 @@ evloop() {
                }
 
                isc__socketmgr_getfdsets(&readfds, &writefds, &maxfd);
-               n = select(maxfd, &readfds, &writefds, NULL, tvp);
+               n = select(maxfd, readfds, writefds, NULL, tvp);
 
                if (n == 0 || call_timer_dispatch) {
                        /*
@@ -352,7 +352,7 @@ evloop() {
                        isc__timermgr_dispatch();
                }
                if (n > 0)
-                       (void)isc__socketmgr_dispatch(&readfds, &writefds,
+                       (void)isc__socketmgr_dispatch(readfds, writefds,
                                                      maxfd);
                (void)isc__taskmgr_dispatch();
 
index 4e3ff3f2fa0ace7f68baae3b9dffb22ead4961ad..1b4da78c2436bc083286b227ea4bd163972a44bc 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket.c,v 1.237.18.29.10.5 2008/07/24 10:29:28 fdupont Exp $ */
+/* $Id: socket.c,v 1.237.18.29.10.6 2008/07/29 04:47:31 each Exp $ */
 
 /*! \file */
 
 #include "socket_p.h"
 #endif /* ISC_PLATFORM_USETHREADS */
 
+/*%
+ * Max number of open sockets.  In the vast majority of cases the default size  
+ * of FD_SETSIZE should be fine, and this constant should be increased only
+ * when absolutely necessary and possible, i.e., the server is exhausting all   
+ * available file descriptors (up to FD_SETSIZE) and the select() function
+ * and FD_xxx macros support larger values than FD_SETSIZE (which may not
+ * always by true, but we keep using some of them to ensure as much
+ * portability as possible).  Note also that overall server performance
+ * may be rather worsened with a larger value of this constant due to
+ * inherent scalability problems of select().
+ *
+ * As a special note, this value shouldn't have to be touched if
+ * this is a build for an authoritative only DNS server.
+ */
+
+#ifndef ISC_SOCKET_FDSETSIZE
+#define ISC_SOCKET_FDSETSIZE FD_SETSIZE
+#endif
+
+/*%
+ * Mac OS X needs a special definition to support larger values in select()
+ */
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+#ifdef __APPLE__
+#define _DARWIN_UNLIMITED_SELECT
+#endif /* __APPLE__ */
+#endif
+
 /*%
  * Some systems define the socket length argument as an int, some as size_t,
  * some as socklen_t.  This is here so it can be easily changed if needed.
@@ -202,12 +230,16 @@ struct isc_socketmgr {
        unsigned int            magic;
        isc_mem_t              *mctx;
        isc_mutex_t             lock;
+       int                     fd_bufsize;
+       int                     fdsize;
        /* Locked by manager lock. */
        ISC_LIST(isc_socket_t)  socklist;
-       fd_set                  read_fds;
-       fd_set                  write_fds;
-       isc_socket_t           *fds[FD_SETSIZE];
-       int                     fdstate[FD_SETSIZE];
+       fd_set                  *read_fds;
+       fd_set                  *read_fds_copy;
+       fd_set                  *write_fds;
+       fd_set                  *write_fds_copy;
+       isc_socket_t           **fds;
+       int                     *fdstate;
        int                     maxfd;
        int                     reserved;       /* unlocked */
 #ifdef ISC_PLATFORM_USETHREADS
@@ -252,6 +284,8 @@ static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
                              struct msghdr *, struct iovec *, size_t *);
 static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
                              struct msghdr *, struct iovec *, size_t *);
+static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *);
+static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *);
 
 #define SELECT_POKE_SHUTDOWN           (-1)
 #define SELECT_POKE_NOTHING            (-2)
@@ -330,12 +364,12 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
         * or writes.
         */
 
-       INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
+       INSIST(fd >= 0 && fd < manager->fdsize);
 
        if (manager->fdstate[fd] == CLOSE_PENDING) {
                manager->fdstate[fd] = CLOSED;
-               FD_CLR(fd, &manager->read_fds);
-               FD_CLR(fd, &manager->write_fds);
+               FD_CLR(fd, manager->read_fds);
+               FD_CLR(fd, manager->write_fds);
                (void)close(fd);
                return;
        }
@@ -348,9 +382,9 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
         * Set requested bit.
         */
        if (msg == SELECT_POKE_READ)
-               FD_SET(sock->fd, &manager->read_fds);
+               FD_SET(sock->fd, manager->read_fds);
        if (msg == SELECT_POKE_WRITE)
-               FD_SET(sock->fd, &manager->write_fds);
+               FD_SET(sock->fd, manager->write_fds);
 }
 
 #ifdef ISC_PLATFORM_USETHREADS
@@ -1222,7 +1256,7 @@ destroy(isc_socket_t **sockp) {
        INSIST(ISC_LIST_EMPTY(sock->recv_list));
        INSIST(ISC_LIST_EMPTY(sock->send_list));
        INSIST(sock->connect_ev == NULL);
-       REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
+       REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize);
 
        LOCK(&manager->lock);
 
@@ -1501,7 +1535,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
        }
 #endif
 
-       if (sock->fd >= (int)FD_SETSIZE) {
+       if (sock->fd >= (int)manager->fdsize) {
                (void)close(sock->fd);
                isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
                               ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
@@ -2042,7 +2076,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) {
                                         sock->pf);
                        (void)close(fd);
                        goto soft_error;
-               } else if (fd >= (int)FD_SETSIZE) {
+               } else if (fd >= (int)manager->fdsize) {
                        isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
                                       ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
                                       isc_msgcat, ISC_MSGSET_SOCKET,
@@ -2254,7 +2288,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
        isc_socket_t *sock;
        isc_boolean_t unlock_sock;
 
-       REQUIRE(maxfd <= (int)FD_SETSIZE);
+       REQUIRE(maxfd <= (int)manager->fdsize);
 
        /*
         * Process read/writes on other fds here.  Avoid locking
@@ -2268,8 +2302,8 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
 
                if (manager->fdstate[i] == CLOSE_PENDING) {
                        manager->fdstate[i] = CLOSED;
-                       FD_CLR(i, &manager->read_fds);
-                       FD_CLR(i, &manager->write_fds);
+                       FD_CLR(i, manager->read_fds);
+                       FD_CLR(i, manager->write_fds);
 
                        (void)close(i);
 
@@ -2280,7 +2314,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                unlock_sock = ISC_FALSE;
                if (FD_ISSET(i, readfds)) {
                        if (sock == NULL) {
-                               FD_CLR(i, &manager->read_fds);
+                               FD_CLR(i, manager->read_fds);
                                goto check_write;
                        }
                        unlock_sock = ISC_TRUE;
@@ -2291,12 +2325,12 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                                else
                                        dispatch_recv(sock);
                        }
-                       FD_CLR(i, &manager->read_fds);
+                       FD_CLR(i, manager->read_fds);
                }
        check_write:
                if (FD_ISSET(i, writefds)) {
                        if (sock == NULL) {
-                               FD_CLR(i, &manager->write_fds);
+                               FD_CLR(i, manager->write_fds);
                                continue;
                        }
                        if (!unlock_sock) {
@@ -2309,7 +2343,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                                else
                                        dispatch_send(sock);
                        }
-                       FD_CLR(i, &manager->write_fds);
+                       FD_CLR(i, manager->write_fds);
                }
                if (unlock_sock)
                        UNLOCK(&sock->lock);
@@ -2330,8 +2364,6 @@ watcher(void *uap) {
        isc_boolean_t done;
        int ctlfd;
        int cc;
-       fd_set readfds;
-       fd_set writefds;
        int msg, fd;
        int maxfd;
        char strbuf[ISC_STRERRORSIZE];
@@ -2345,13 +2377,16 @@ watcher(void *uap) {
        done = ISC_FALSE;
        while (!done) {
                do {
-                       readfds = manager->read_fds;
-                       writefds = manager->write_fds;
+                       memcpy(manager->read_fds_copy, manager->read_fds,
+                              manager->fd_bufsize);
+                       memcpy(manager->write_fds_copy, manager->write_fds,
+                              manager->fd_bufsize);
                        maxfd = manager->maxfd + 1;
 
                        UNLOCK(&manager->lock);
 
-                       cc = select(maxfd, &readfds, &writefds, NULL, NULL);
+                       cc = select(maxfd, manager->read_fds_copy,
+                                   manager->write_fds_copy, NULL, NULL);
                        if (cc < 0) {
                                if (!SOFT_ERROR(errno)) {
                                        isc__strerror(errno, strbuf,
@@ -2373,7 +2408,7 @@ watcher(void *uap) {
                /*
                 * Process reads on internal, control fd.
                 */
-               if (FD_ISSET(ctlfd, &readfds)) {
+               if (FD_ISSET(ctlfd, manager->read_fds_copy)) {
                        for (;;) {
                                select_readmsg(manager, &fd, &msg);
 
@@ -2412,7 +2447,8 @@ watcher(void *uap) {
                        }
                }
 
-               process_fds(manager, maxfd, &readfds, &writefds);
+               process_fds(manager, maxfd, manager->read_fds_copy,
+                           manager->write_fds_copy);
        }
 
        manager_log(manager, TRACE,
@@ -2432,6 +2468,80 @@ isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
        manager->reserved = reserved;
 }
 
+/*
+ * Initialize fdsets in socketmgr structure.
+ */
+static isc_result_t
+create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+       manager->fdsize = ISC_SOCKET_FDSETSIZE;
+       manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) *
+               sizeof(fd_mask);
+#else
+       manager->fdsize = FD_SETSIZE;
+       manager->fd_bufsize = sizeof(fd_set);
+#endif
+
+       manager->fds = NULL;
+       manager->fdstate = NULL;
+       manager->read_fds = NULL;
+       manager->read_fds_copy = NULL;
+       manager->write_fds = NULL;
+       manager->write_fds_copy = NULL;
+
+       manager->fds = isc_mem_get(mctx,
+                                  manager->fdsize * sizeof(manager->fds[0]));
+       if (manager->fds == NULL)
+               goto fail;
+
+       manager->fdstate = isc_mem_get(mctx, manager->fdsize *
+                                      sizeof(manager->fdstate[0]));
+       if (manager->fdstate == NULL)
+               goto fail;
+
+       manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->read_fds == NULL)
+               goto fail;
+       manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->read_fds_copy == NULL)
+               goto fail;
+       manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->write_fds == NULL)
+               goto fail;
+       manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->write_fds_copy == NULL)
+               goto fail;
+
+       return (ISC_R_SUCCESS);
+
+  fail:
+       cleanup_fdsets(manager, mctx);
+       return (ISC_R_NOMEMORY);
+}
+
+/*
+ * Clean up fdsets in socketmgr structure.
+ */
+static void
+cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+       if (manager->fds != NULL) {
+               isc_mem_put(mctx, manager->fds,
+                           manager->fdsize * sizeof(manager->fds[0]));
+       }
+       if (manager->fdstate != NULL) {
+               isc_mem_put(mctx, manager->fdstate,
+                           manager->fdsize * sizeof(manager->fdstate[0]));
+       }
+       if (manager->read_fds != NULL)
+               isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize);
+       if (manager->read_fds_copy != NULL)
+               isc_mem_put(mctx, manager->read_fds_copy, manager->fd_bufsize);
+       if (manager->write_fds != NULL)
+               isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize);
+       if (manager->write_fds_copy != NULL)
+               isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize);
+}
+
 /*
  * Create a new socket manager.
  */
@@ -2457,17 +2567,26 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        if (manager == NULL)
                return (ISC_R_NOMEMORY);
 
+       result = create_fdsets(manager, mctx);
+       if (result != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
+               isc_mem_put(mctx, manager, sizeof(*manager));
+               return (result);
+       }
+
        manager->magic = SOCKET_MANAGER_MAGIC;
        manager->mctx = NULL;
-       memset(manager->fds, 0, sizeof(manager->fds));
+       memset(manager->fds, 0, sizeof(manager->fds[0]) * manager->fdsize);
        ISC_LIST_INIT(manager->socklist);
        result = isc_mutex_init(&manager->lock);
        if (result != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
                isc_mem_put(mctx, manager, sizeof(*manager));
                return (result);
        }
 #ifdef ISC_PLATFORM_USETHREADS
        if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
                DESTROYLOCK(&manager->lock);
                isc_mem_put(mctx, manager, sizeof(*manager));
                UNEXPECTED_ERROR(__FILE__, __LINE__,
@@ -2482,6 +2601,7 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
         * select/poll loop when something internal needs to be done.
         */
        if (pipe(manager->pipe_fds) != 0) {
+               cleanup_fdsets(manager, mctx);
                DESTROYLOCK(&manager->lock);
                isc_mem_put(mctx, manager, sizeof(*manager));
                isc__strerror(errno, strbuf, sizeof(strbuf));
@@ -2505,16 +2625,17 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        /*
         * Set up initial state for the select loop
         */
-       FD_ZERO(&manager->read_fds);
-       FD_ZERO(&manager->write_fds);
+       memset(manager->read_fds, 0, manager->fd_bufsize);
+       memset(manager->write_fds, 0, manager->fd_bufsize);
 #ifdef ISC_PLATFORM_USETHREADS
-       FD_SET(manager->pipe_fds[0], &manager->read_fds);
+       FD_SET(manager->pipe_fds[0], manager->read_fds);
        manager->maxfd = manager->pipe_fds[0];
 #else /* ISC_PLATFORM_USETHREADS */
        manager->maxfd = 0;
 #endif /* ISC_PLATFORM_USETHREADS */
        manager->reserved = 0;
-       memset(manager->fdstate, 0, sizeof(manager->fdstate));
+       memset(manager->fdstate, 0,
+              manager->fdsize * sizeof(manager->fdstate[0]));
 
 #ifdef ISC_PLATFORM_USETHREADS
        /*
@@ -2620,11 +2741,12 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
        (void)isc_condition_destroy(&manager->shutdown_ok);
 #endif /* ISC_PLATFORM_USETHREADS */
 
-       for (i = 0; i < (int)FD_SETSIZE; i++)
+       for (i = 0; i < (int)manager->fdsize; i++)
                if (manager->fdstate[i] == CLOSE_PENDING)
                        (void)close(i);
 
        DESTROYLOCK(&manager->lock);
+       cleanup_fdsets(manager, manager->mctx);
        manager->magic = 0;
        mctx= manager->mctx;
        isc_mem_put(mctx, manager, sizeof(*manager));
@@ -3843,12 +3965,17 @@ isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
 
 #ifndef ISC_PLATFORM_USETHREADS
 void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) {
        if (socketmgr == NULL)
                *maxfd = 0;
        else {
-               *readset = socketmgr->read_fds;
-               *writeset = socketmgr->write_fds;
+               /* Prepare duplicates of fd_sets, as select() will modify */
+               memcpy(socketmgr->read_fds_copy, socketmgr->read_fds,
+                      socketmgr->fd_bufsize);
+               memcpy(socketmgr->write_fds_copy, socketmgr->write_fds,
+                      socketmgr->fd_bufsize);
+               *readset = socketmgr->read_fds_copy;
+               *writeset = socketmgr->write_fds_copy;
                *maxfd = socketmgr->maxfd + 1;
        }
 }
index c260bbc9128fde2ac42d260f56f51a7b13ba37b0..4f9cf27bf1f2b53adc8551dcc3c40cb31548efb0 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket_p.h,v 1.7.18.2 2005/04/29 00:17:08 marka Exp $ */
+/* $Id: socket_p.h,v 1.7.18.2.52.1 2008/07/29 04:47:31 each Exp $ */
 
 #ifndef ISC_SOCKET_P_H
 #define ISC_SOCKET_P_H
@@ -27,7 +27,7 @@
 #endif
 
 void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd);
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd);
 
 isc_result_t
 isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd);