]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
ISC_SOCKET_FDSETSIZE enables use of larger fdsets with select() on
authorEvan Hunt <each@isc.org>
Tue, 29 Jul 2008 04:43:57 +0000 (04:43 +0000)
committerEvan Hunt <each@isc.org>
Tue, 29 Jul 2008 04:43:57 +0000 (04:43 +0000)
systems with small FD_SETSIZE values [rt18328]

CHANGES
lib/isc/unix/app.c
lib/isc/unix/socket.c
lib/isc/unix/socket_p.h

diff --git a/CHANGES b/CHANGES
index ae2d723b0bb60346c0b5629b5f79d4d260efb272..b3622e0b245145dd6c57e59200653c621be30092 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,13 @@
+2406.   [bug]           Some operating systems have FD_SETSIZE set to a
+                       low value by default, which can cause resource
+                       exhaustion when many simultaneous connections are
+                       open.  Linux in particular makes it difficult to
+                       increase this value.  To use more sockets with
+                       select(), set ISC_SOCKET_FDSETSIZE.  Example:
+                       STD_CDEFINES="-DISC_SOCKET_FDSETSIZE=4096" ./configure
+                       (This should not be necessary in most cases, and
+                       never for an authoritative-only server.) [RT #18328]
+
 2404.  [port]          hpux: files unlimited support.
 
 2403.  [bug]           TSIG context leak. [RT #18341]
@@ -24,7 +34,6 @@
                        open files to 'unlimited' as described in the
                        documentation. [RT #18331]
 
-
        --- 9.3.5-P1 released ---
 
 2375.   [security]      Fully randomize UDP query ports to improve
                        [RT #17113]
 
 2249.   [bug]           Only set Authentic Data bit if client requested
-                        DNSSEC, per RFC 3655 [RT #17175]
+                       DNSSEC, per RFC 3655 [RT #17175]
 
 2248.   [cleanup]       Fix several errors reported by Coverity. [RT #17160]
 
                        query order sensitive. [RT #14933]
 
 1905.  [bug]           Strings returned from cfg_obj_asstring() should be
-                        treated as read-only.  [RT #15256]
+                       treated as read-only.  [RT #15256]
 
 1901.  [cleanup]       Don't add DNSKEY records to the additional section.
 
index 8e1f0fafd306bab89043eb83bba133a1a1a87feb..382e445ead5c92ed71cdf9069d4829032c983007 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: app.c,v 1.43.2.3.8.8 2008/01/17 23:45:28 tbox Exp $ */
+/* $Id: app.c,v 1.43.2.3.8.8.4.1 2008/07/29 04:43:57 each Exp $ */
 
 #include <config.h>
 
@@ -301,7 +301,7 @@ evloop() {
                int n;
                isc_time_t when, now;
                struct timeval tv, *tvp;
-               fd_set readfds, writefds;
+               fd_set *readfds, *writefds;
                int maxfd;
                isc_boolean_t readytasks;
                isc_boolean_t call_timer_dispatch = ISC_FALSE;
@@ -330,7 +330,7 @@ evloop() {
                }
 
                isc__socketmgr_getfdsets(&readfds, &writefds, &maxfd);
-               n = select(maxfd, &readfds, &writefds, NULL, tvp);
+               n = select(maxfd, readfds, writefds, NULL, tvp);
 
                if (n == 0 || call_timer_dispatch) {
                        /*
@@ -350,7 +350,7 @@ evloop() {
                        isc__timermgr_dispatch();
                }
                if (n > 0)
-                       (void)isc__socketmgr_dispatch(&readfds, &writefds,
+                       (void)isc__socketmgr_dispatch(readfds, writefds,
                                                      maxfd);
                (void)isc__taskmgr_dispatch();
 
index cb750141b1253d559cc4165822af333013583eaf..3ee796c24bbaa0c15c05563ed9f380c0b821d045 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket.c,v 1.207.2.19.2.35.4.5 2008/07/24 10:35:48 fdupont Exp $ */
+/* $Id: socket.c,v 1.207.2.19.2.35.4.6 2008/07/29 04:43:57 each Exp $ */
 
 #include <config.h>
 
 #include <sys/utsname.h>
 #endif
 
-/*
+/*%
+ * Max number of open sockets.  In the vast majority of cases the default size  
+ * of FD_SETSIZE should be fine, and this constant should be increased only
+ * when absolutely necessary and possible, i.e., the server is exhausting all   
+ * available file descriptors (up to FD_SETSIZE) and the select() function
+ * and FD_xxx macros support larger values than FD_SETSIZE (which may not
+ * always by true, but we keep using some of them to ensure as much
+ * portability as possible).  Note also that overall server performance
+ * may be rather worsened with a larger value of this constant due to
+ * inherent scalability problems of select().
+ *
+ * As a special note, this value shouldn't have to be touched if
+ * this is a build for an authoritative only DNS server.
+ */
+
+#ifndef ISC_SOCKET_FDSETSIZE
+#define ISC_SOCKET_FDSETSIZE FD_SETSIZE
+#endif
+
+/*%
+ * Mac OS X needs a special definition to support larger values in select()
+ */
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+#ifdef __APPLE__
+#define _DARWIN_UNLIMITED_SELECT
+#endif /* __APPLE__ */
+#endif
+
+/*%
  * Some systems define the socket length argument as an int, some as size_t,
  * some as socklen_t.  This is here so it can be easily changed if needed.
  */
@@ -190,12 +218,16 @@ struct isc_socketmgr {
        unsigned int            magic;
        isc_mem_t              *mctx;
        isc_mutex_t             lock;
+       int                     fd_bufsize;
+       int                     fdsize;
        /* Locked by manager lock. */
        ISC_LIST(isc_socket_t)  socklist;
-       fd_set                  read_fds;
-       fd_set                  write_fds;
-       isc_socket_t           *fds[FD_SETSIZE];
-       int                     fdstate[FD_SETSIZE];
+       fd_set                  *read_fds;
+       fd_set                  *read_fds_copy;
+       fd_set                  *write_fds;
+       fd_set                  *write_fds_copy;
+       isc_socket_t           **fds;
+       int                     *fdstate;
        int                     maxfd;
        int                     reserved;       /* unlocked */
 #ifdef ISC_PLATFORM_USETHREADS
@@ -240,6 +272,8 @@ static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
                              struct msghdr *, struct iovec *, size_t *);
 static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
                              struct msghdr *, struct iovec *, size_t *);
+static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *);
+static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *);
 
 #define SELECT_POKE_SHUTDOWN           (-1)
 #define SELECT_POKE_NOTHING            (-2)
@@ -318,12 +352,12 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
         * or writes.
         */
 
-       INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
+       INSIST(fd >= 0 && fd < manager->fdsize);
 
        if (manager->fdstate[fd] == CLOSE_PENDING) {
                manager->fdstate[fd] = CLOSED;
-               FD_CLR(fd, &manager->read_fds);
-               FD_CLR(fd, &manager->write_fds);
+               FD_CLR(fd, manager->read_fds);
+               FD_CLR(fd, manager->write_fds);
                (void)close(fd);
                return;
        }
@@ -336,9 +370,9 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
         * Set requested bit.
         */
        if (msg == SELECT_POKE_READ)
-               FD_SET(sock->fd, &manager->read_fds);
+               FD_SET(sock->fd, manager->read_fds);
        if (msg == SELECT_POKE_WRITE)
-               FD_SET(sock->fd, &manager->write_fds);
+               FD_SET(sock->fd, manager->write_fds);
 }
 
 #ifdef ISC_PLATFORM_USETHREADS
@@ -1199,7 +1233,7 @@ destroy(isc_socket_t **sockp) {
        INSIST(ISC_LIST_EMPTY(sock->recv_list));
        INSIST(ISC_LIST_EMPTY(sock->send_list));
        INSIST(sock->connect_ev == NULL);
-       REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
+       REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize);
 
        LOCK(&manager->lock);
 
@@ -1475,7 +1509,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
        }
 #endif
 
-       if (sock->fd >= (int)FD_SETSIZE) {
+       if (sock->fd >= (int)manager->fdsize) {
                (void)close(sock->fd);
                isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
                               ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
@@ -2007,7 +2041,7 @@ internal_accept(isc_task_t *me, isc_event_t *ev) {
                                         sock->pf);
                        (void)close(fd);
                        goto soft_error;
-               } else if (fd >= (int)FD_SETSIZE) {
+               } else if (fd >= (int)manager->fdsize) {
                        isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
                                       ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
                                       isc_msgcat, ISC_MSGSET_SOCKET,
@@ -2219,7 +2253,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
        isc_socket_t *sock;
        isc_boolean_t unlock_sock;
 
-       REQUIRE(maxfd <= (int)FD_SETSIZE);
+       REQUIRE(maxfd <= (int)manager->fdsize);
 
        /*
         * Process read/writes on other fds here.  Avoid locking
@@ -2233,8 +2267,8 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
 
                if (manager->fdstate[i] == CLOSE_PENDING) {
                        manager->fdstate[i] = CLOSED;
-                       FD_CLR(i, &manager->read_fds);
-                       FD_CLR(i, &manager->write_fds);
+                       FD_CLR(i, manager->read_fds);
+                       FD_CLR(i, manager->write_fds);
 
                        (void)close(i);
 
@@ -2245,7 +2279,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                unlock_sock = ISC_FALSE;
                if (FD_ISSET(i, readfds)) {
                        if (sock == NULL) {
-                               FD_CLR(i, &manager->read_fds);
+                               FD_CLR(i, manager->read_fds);
                                goto check_write;
                        }
                        unlock_sock = ISC_TRUE;
@@ -2256,12 +2290,12 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                                else
                                        dispatch_recv(sock);
                        }
-                       FD_CLR(i, &manager->read_fds);
+                       FD_CLR(i, manager->read_fds);
                }
        check_write:
                if (FD_ISSET(i, writefds)) {
                        if (sock == NULL) {
-                               FD_CLR(i, &manager->write_fds);
+                               FD_CLR(i, manager->write_fds);
                                continue;
                        }
                        if (!unlock_sock) {
@@ -2274,7 +2308,7 @@ process_fds(isc_socketmgr_t *manager, int maxfd,
                                else
                                        dispatch_send(sock);
                        }
-                       FD_CLR(i, &manager->write_fds);
+                       FD_CLR(i, manager->write_fds);
                }
                if (unlock_sock)
                        UNLOCK(&sock->lock);
@@ -2295,8 +2329,6 @@ watcher(void *uap) {
        isc_boolean_t done;
        int ctlfd;
        int cc;
-       fd_set readfds;
-       fd_set writefds;
        int msg, fd;
        int maxfd;
        char strbuf[ISC_STRERRORSIZE];
@@ -2310,13 +2342,16 @@ watcher(void *uap) {
        done = ISC_FALSE;
        while (!done) {
                do {
-                       readfds = manager->read_fds;
-                       writefds = manager->write_fds;
+                       memcpy(manager->read_fds_copy, manager->read_fds,
+                              manager->fd_bufsize);
+                       memcpy(manager->write_fds_copy, manager->write_fds,
+                              manager->fd_bufsize);
                        maxfd = manager->maxfd + 1;
 
                        UNLOCK(&manager->lock);
 
-                       cc = select(maxfd, &readfds, &writefds, NULL, NULL);
+                       cc = select(maxfd, manager->read_fds_copy,
+                                   manager->write_fds_copy, NULL, NULL);
                        if (cc < 0) {
                                if (!SOFT_ERROR(errno)) {
                                        isc__strerror(errno, strbuf,
@@ -2338,7 +2373,7 @@ watcher(void *uap) {
                /*
                 * Process reads on internal, control fd.
                 */
-               if (FD_ISSET(ctlfd, &readfds)) {
+               if (FD_ISSET(ctlfd, manager->read_fds_copy)) {
                        for (;;) {
                                select_readmsg(manager, &fd, &msg);
 
@@ -2377,7 +2412,8 @@ watcher(void *uap) {
                        }
                }
 
-               process_fds(manager, maxfd, &readfds, &writefds);
+               process_fds(manager, maxfd, manager->read_fds_copy,
+                           manager->write_fds_copy);
        }
 
        manager_log(manager, TRACE,
@@ -2397,6 +2433,80 @@ isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
        manager->reserved = reserved;
 }
 
+/*
+ * Initialize fdsets in socketmgr structure.
+ */
+static isc_result_t
+create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+       manager->fdsize = ISC_SOCKET_FDSETSIZE;
+       manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) *
+               sizeof(fd_mask);
+#else
+       manager->fdsize = FD_SETSIZE;
+       manager->fd_bufsize = sizeof(fd_set);
+#endif
+
+       manager->fds = NULL;
+       manager->fdstate = NULL;
+       manager->read_fds = NULL;
+       manager->read_fds_copy = NULL;
+       manager->write_fds = NULL;
+       manager->write_fds_copy = NULL;
+
+       manager->fds = isc_mem_get(mctx,
+                                  manager->fdsize * sizeof(manager->fds[0]));
+       if (manager->fds == NULL)
+               goto fail;
+
+       manager->fdstate = isc_mem_get(mctx, manager->fdsize *
+                                      sizeof(manager->fdstate[0]));
+       if (manager->fdstate == NULL)
+               goto fail;
+
+       manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->read_fds == NULL)
+               goto fail;
+       manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->read_fds_copy == NULL)
+               goto fail;
+       manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->write_fds == NULL)
+               goto fail;
+       manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+       if (manager->write_fds_copy == NULL)
+               goto fail;
+
+       return (ISC_R_SUCCESS);
+
+  fail:
+       cleanup_fdsets(manager, mctx);
+       return (ISC_R_NOMEMORY);
+}
+
+/*
+ * Clean up fdsets in socketmgr structure.
+ */
+static void
+cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+       if (manager->fds != NULL) {
+               isc_mem_put(mctx, manager->fds,
+                           manager->fdsize * sizeof(manager->fds[0]));
+       }
+       if (manager->fdstate != NULL) {
+               isc_mem_put(mctx, manager->fdstate,
+                           manager->fdsize * sizeof(manager->fdstate[0]));
+       }
+       if (manager->read_fds != NULL)
+               isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize);
+       if (manager->read_fds_copy != NULL)
+               isc_mem_put(mctx, manager->read_fds_copy, manager->fd_bufsize);
+       if (manager->write_fds != NULL)
+               isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize);
+       if (manager->write_fds_copy != NULL)
+               isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize);
+}
+
 /*
  * Create a new socket manager.
  */
@@ -2406,6 +2516,7 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
 #ifdef ISC_PLATFORM_USETHREADS
        char strbuf[ISC_STRERRORSIZE];
 #endif
+       isc_result_t result;
 
        REQUIRE(managerp != NULL && *managerp == NULL);
 
@@ -2421,11 +2532,19 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        if (manager == NULL)
                return (ISC_R_NOMEMORY);
 
+       result = create_fdsets(manager, mctx);
+       if (result != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
+               isc_mem_put(mctx, manager, sizeof(*manager));
+               return (result);
+       }
+
        manager->magic = SOCKET_MANAGER_MAGIC;
        manager->mctx = NULL;
-       memset(manager->fds, 0, sizeof(manager->fds));
        ISC_LIST_INIT(manager->socklist);
-       if (isc_mutex_init(&manager->lock) != ISC_R_SUCCESS) {
+       result = isc_mutex_init(&manager->lock);
+       if (result != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
                isc_mem_put(mctx, manager, sizeof(*manager));
                UNEXPECTED_ERROR(__FILE__, __LINE__,
                                 "isc_mutex_init() %s",
@@ -2435,6 +2554,7 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        }
 #ifdef ISC_PLATFORM_USETHREADS
        if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
+               cleanup_fdsets(manager, mctx);
                DESTROYLOCK(&manager->lock);
                isc_mem_put(mctx, manager, sizeof(*manager));
                UNEXPECTED_ERROR(__FILE__, __LINE__,
@@ -2449,6 +2569,7 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
         * select/poll loop when something internal needs to be done.
         */
        if (pipe(manager->pipe_fds) != 0) {
+               cleanup_fdsets(manager, mctx);
                DESTROYLOCK(&manager->lock);
                isc_mem_put(mctx, manager, sizeof(*manager));
                isc__strerror(errno, strbuf, sizeof(strbuf));
@@ -2472,16 +2593,17 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
        /*
         * Set up initial state for the select loop
         */
-       FD_ZERO(&manager->read_fds);
-       FD_ZERO(&manager->write_fds);
+       memset(manager->read_fds, 0, manager->fd_bufsize);
+       memset(manager->write_fds, 0, manager->fd_bufsize);
 #ifdef ISC_PLATFORM_USETHREADS
-       FD_SET(manager->pipe_fds[0], &manager->read_fds);
+       FD_SET(manager->pipe_fds[0], manager->read_fds);
        manager->maxfd = manager->pipe_fds[0];
 #else /* ISC_PLATFORM_USETHREADS */
        manager->maxfd = 0;
 #endif /* ISC_PLATFORM_USETHREADS */
        manager->reserved = 0;
-       memset(manager->fdstate, 0, sizeof(manager->fdstate));
+       memset(manager->fdstate, 0,
+              manager->fdsize * sizeof(manager->fdstate[0]));
 
 #ifdef ISC_PLATFORM_USETHREADS
        /*
@@ -2587,11 +2709,12 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
        (void)isc_condition_destroy(&manager->shutdown_ok);
 #endif /* ISC_PLATFORM_USETHREADS */
 
-       for (i = 0; i < (int)FD_SETSIZE; i++)
+       for (i = 0; i < (int)manager->fdsize; i++)
                if (manager->fdstate[i] == CLOSE_PENDING)
                        (void)close(i);
 
        DESTROYLOCK(&manager->lock);
+       cleanup_fdsets(manager, manager->mctx);
        manager->magic = 0;
        mctx= manager->mctx;
        isc_mem_put(mctx, manager, sizeof(*manager));
@@ -3618,12 +3741,17 @@ isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
 
 #ifndef ISC_PLATFORM_USETHREADS
 void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) {
        if (socketmgr == NULL)
                *maxfd = 0;
        else {
-               *readset = socketmgr->read_fds;
-               *writeset = socketmgr->write_fds;
+               /* Prepare duplicates of fd_sets, as select() will modify */
+               memcpy(socketmgr->read_fds_copy, socketmgr->read_fds,
+                      socketmgr->fd_bufsize);
+               memcpy(socketmgr->write_fds_copy, socketmgr->write_fds,
+                      socketmgr->fd_bufsize);
+               *readset = socketmgr->read_fds_copy;
+               *writeset = socketmgr->write_fds_copy;
                *maxfd = socketmgr->maxfd + 1;
        }
 }
index f430bf22e1c611b6b0d4667baf709f7aacdaca2a..07720eef656c77c0fb1e44fc4cb9b2a71d9bef19 100644 (file)
@@ -15,7 +15,7 @@
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* $Id: socket_p.h,v 1.6.206.1 2004/03/06 08:15:02 marka Exp $ */
+/* $Id: socket_p.h,v 1.6.206.1.34.1 2008/07/29 04:43:57 each Exp $ */
 
 #ifndef ISC_SOCKET_P_H
 #define ISC_SOCKET_P_H
@@ -25,7 +25,7 @@
 #endif
 
 void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd);
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd);
 
 isc_result_t
 isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd);