* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: socket.c,v 1.207.2.19.2.35.4.5 2008/07/24 10:35:48 fdupont Exp $ */
+/* $Id: socket.c,v 1.207.2.19.2.35.4.6 2008/07/29 04:43:57 each Exp $ */
#include <config.h>
#include <sys/utsname.h>
#endif
-/*
+/*%
+ * Max number of open sockets. In the vast majority of cases the default size
+ * of FD_SETSIZE should be fine, and this constant should be increased only
+ * when absolutely necessary and possible, i.e., the server is exhausting all
+ * available file descriptors (up to FD_SETSIZE) and the select() function
+ * and FD_xxx macros support larger values than FD_SETSIZE (which may not
+ * always by true, but we keep using some of them to ensure as much
+ * portability as possible). Note also that overall server performance
+ * may be rather worsened with a larger value of this constant due to
+ * inherent scalability problems of select().
+ *
+ * As a special note, this value shouldn't have to be touched if
+ * this is a build for an authoritative only DNS server.
+ */
+
+#ifndef ISC_SOCKET_FDSETSIZE
+#define ISC_SOCKET_FDSETSIZE FD_SETSIZE
+#endif
+
+/*%
+ * Mac OS X needs a special definition to support larger values in select()
+ */
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+#ifdef __APPLE__
+#define _DARWIN_UNLIMITED_SELECT
+#endif /* __APPLE__ */
+#endif
+
+/*%
* Some systems define the socket length argument as an int, some as size_t,
* some as socklen_t. This is here so it can be easily changed if needed.
*/
unsigned int magic;
isc_mem_t *mctx;
isc_mutex_t lock;
+ int fd_bufsize;
+ int fdsize;
/* Locked by manager lock. */
ISC_LIST(isc_socket_t) socklist;
- fd_set read_fds;
- fd_set write_fds;
- isc_socket_t *fds[FD_SETSIZE];
- int fdstate[FD_SETSIZE];
+ fd_set *read_fds;
+ fd_set *read_fds_copy;
+ fd_set *write_fds;
+ fd_set *write_fds_copy;
+ isc_socket_t **fds;
+ int *fdstate;
int maxfd;
int reserved; /* unlocked */
#ifdef ISC_PLATFORM_USETHREADS
struct msghdr *, struct iovec *, size_t *);
static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
struct msghdr *, struct iovec *, size_t *);
+static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *);
+static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *);
#define SELECT_POKE_SHUTDOWN (-1)
#define SELECT_POKE_NOTHING (-2)
* or writes.
*/
- INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
+ INSIST(fd >= 0 && fd < manager->fdsize);
if (manager->fdstate[fd] == CLOSE_PENDING) {
manager->fdstate[fd] = CLOSED;
- FD_CLR(fd, &manager->read_fds);
- FD_CLR(fd, &manager->write_fds);
+ FD_CLR(fd, manager->read_fds);
+ FD_CLR(fd, manager->write_fds);
(void)close(fd);
return;
}
* Set requested bit.
*/
if (msg == SELECT_POKE_READ)
- FD_SET(sock->fd, &manager->read_fds);
+ FD_SET(sock->fd, manager->read_fds);
if (msg == SELECT_POKE_WRITE)
- FD_SET(sock->fd, &manager->write_fds);
+ FD_SET(sock->fd, manager->write_fds);
}
#ifdef ISC_PLATFORM_USETHREADS
INSIST(ISC_LIST_EMPTY(sock->recv_list));
INSIST(ISC_LIST_EMPTY(sock->send_list));
INSIST(sock->connect_ev == NULL);
- REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
+ REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize);
LOCK(&manager->lock);
}
#endif
- if (sock->fd >= (int)FD_SETSIZE) {
+ if (sock->fd >= (int)manager->fdsize) {
(void)close(sock->fd);
isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
sock->pf);
(void)close(fd);
goto soft_error;
- } else if (fd >= (int)FD_SETSIZE) {
+ } else if (fd >= (int)manager->fdsize) {
isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
isc_msgcat, ISC_MSGSET_SOCKET,
isc_socket_t *sock;
isc_boolean_t unlock_sock;
- REQUIRE(maxfd <= (int)FD_SETSIZE);
+ REQUIRE(maxfd <= (int)manager->fdsize);
/*
* Process read/writes on other fds here. Avoid locking
if (manager->fdstate[i] == CLOSE_PENDING) {
manager->fdstate[i] = CLOSED;
- FD_CLR(i, &manager->read_fds);
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->read_fds);
+ FD_CLR(i, manager->write_fds);
(void)close(i);
unlock_sock = ISC_FALSE;
if (FD_ISSET(i, readfds)) {
if (sock == NULL) {
- FD_CLR(i, &manager->read_fds);
+ FD_CLR(i, manager->read_fds);
goto check_write;
}
unlock_sock = ISC_TRUE;
else
dispatch_recv(sock);
}
- FD_CLR(i, &manager->read_fds);
+ FD_CLR(i, manager->read_fds);
}
check_write:
if (FD_ISSET(i, writefds)) {
if (sock == NULL) {
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->write_fds);
continue;
}
if (!unlock_sock) {
else
dispatch_send(sock);
}
- FD_CLR(i, &manager->write_fds);
+ FD_CLR(i, manager->write_fds);
}
if (unlock_sock)
UNLOCK(&sock->lock);
isc_boolean_t done;
int ctlfd;
int cc;
- fd_set readfds;
- fd_set writefds;
int msg, fd;
int maxfd;
char strbuf[ISC_STRERRORSIZE];
done = ISC_FALSE;
while (!done) {
do {
- readfds = manager->read_fds;
- writefds = manager->write_fds;
+ memcpy(manager->read_fds_copy, manager->read_fds,
+ manager->fd_bufsize);
+ memcpy(manager->write_fds_copy, manager->write_fds,
+ manager->fd_bufsize);
maxfd = manager->maxfd + 1;
UNLOCK(&manager->lock);
- cc = select(maxfd, &readfds, &writefds, NULL, NULL);
+ cc = select(maxfd, manager->read_fds_copy,
+ manager->write_fds_copy, NULL, NULL);
if (cc < 0) {
if (!SOFT_ERROR(errno)) {
isc__strerror(errno, strbuf,
/*
* Process reads on internal, control fd.
*/
- if (FD_ISSET(ctlfd, &readfds)) {
+ if (FD_ISSET(ctlfd, manager->read_fds_copy)) {
for (;;) {
select_readmsg(manager, &fd, &msg);
}
}
- process_fds(manager, maxfd, &readfds, &writefds);
+ process_fds(manager, maxfd, manager->read_fds_copy,
+ manager->write_fds_copy);
}
manager_log(manager, TRACE,
manager->reserved = reserved;
}
+/*
+ * Initialize fdsets in socketmgr structure.
+ */
+static isc_result_t
+create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE
+ manager->fdsize = ISC_SOCKET_FDSETSIZE;
+ manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) *
+ sizeof(fd_mask);
+#else
+ manager->fdsize = FD_SETSIZE;
+ manager->fd_bufsize = sizeof(fd_set);
+#endif
+
+ manager->fds = NULL;
+ manager->fdstate = NULL;
+ manager->read_fds = NULL;
+ manager->read_fds_copy = NULL;
+ manager->write_fds = NULL;
+ manager->write_fds_copy = NULL;
+
+ manager->fds = isc_mem_get(mctx,
+ manager->fdsize * sizeof(manager->fds[0]));
+ if (manager->fds == NULL)
+ goto fail;
+
+ manager->fdstate = isc_mem_get(mctx, manager->fdsize *
+ sizeof(manager->fdstate[0]));
+ if (manager->fdstate == NULL)
+ goto fail;
+
+ manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->read_fds == NULL)
+ goto fail;
+ manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->read_fds_copy == NULL)
+ goto fail;
+ manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->write_fds == NULL)
+ goto fail;
+ manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize);
+ if (manager->write_fds_copy == NULL)
+ goto fail;
+
+ return (ISC_R_SUCCESS);
+
+ fail:
+ cleanup_fdsets(manager, mctx);
+ return (ISC_R_NOMEMORY);
+}
+
+/*
+ * Clean up fdsets in socketmgr structure.
+ */
+static void
+cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) {
+ if (manager->fds != NULL) {
+ isc_mem_put(mctx, manager->fds,
+ manager->fdsize * sizeof(manager->fds[0]));
+ }
+ if (manager->fdstate != NULL) {
+ isc_mem_put(mctx, manager->fdstate,
+ manager->fdsize * sizeof(manager->fdstate[0]));
+ }
+ if (manager->read_fds != NULL)
+ isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize);
+ if (manager->read_fds_copy != NULL)
+ isc_mem_put(mctx, manager->read_fds_copy, manager->fd_bufsize);
+ if (manager->write_fds != NULL)
+ isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize);
+ if (manager->write_fds_copy != NULL)
+ isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize);
+}
+
/*
* Create a new socket manager.
*/
#ifdef ISC_PLATFORM_USETHREADS
char strbuf[ISC_STRERRORSIZE];
#endif
+ isc_result_t result;
REQUIRE(managerp != NULL && *managerp == NULL);
if (manager == NULL)
return (ISC_R_NOMEMORY);
+ result = create_fdsets(manager, mctx);
+ if (result != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
+ isc_mem_put(mctx, manager, sizeof(*manager));
+ return (result);
+ }
+
manager->magic = SOCKET_MANAGER_MAGIC;
manager->mctx = NULL;
- memset(manager->fds, 0, sizeof(manager->fds));
ISC_LIST_INIT(manager->socklist);
- if (isc_mutex_init(&manager->lock) != ISC_R_SUCCESS) {
+ result = isc_mutex_init(&manager->lock);
+ if (result != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
isc_mem_put(mctx, manager, sizeof(*manager));
UNEXPECTED_ERROR(__FILE__, __LINE__,
"isc_mutex_init() %s",
}
#ifdef ISC_PLATFORM_USETHREADS
if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
+ cleanup_fdsets(manager, mctx);
DESTROYLOCK(&manager->lock);
isc_mem_put(mctx, manager, sizeof(*manager));
UNEXPECTED_ERROR(__FILE__, __LINE__,
* select/poll loop when something internal needs to be done.
*/
if (pipe(manager->pipe_fds) != 0) {
+ cleanup_fdsets(manager, mctx);
DESTROYLOCK(&manager->lock);
isc_mem_put(mctx, manager, sizeof(*manager));
isc__strerror(errno, strbuf, sizeof(strbuf));
/*
* Set up initial state for the select loop
*/
- FD_ZERO(&manager->read_fds);
- FD_ZERO(&manager->write_fds);
+ memset(manager->read_fds, 0, manager->fd_bufsize);
+ memset(manager->write_fds, 0, manager->fd_bufsize);
#ifdef ISC_PLATFORM_USETHREADS
- FD_SET(manager->pipe_fds[0], &manager->read_fds);
+ FD_SET(manager->pipe_fds[0], manager->read_fds);
manager->maxfd = manager->pipe_fds[0];
#else /* ISC_PLATFORM_USETHREADS */
manager->maxfd = 0;
#endif /* ISC_PLATFORM_USETHREADS */
manager->reserved = 0;
- memset(manager->fdstate, 0, sizeof(manager->fdstate));
+ memset(manager->fdstate, 0,
+ manager->fdsize * sizeof(manager->fdstate[0]));
#ifdef ISC_PLATFORM_USETHREADS
/*
(void)isc_condition_destroy(&manager->shutdown_ok);
#endif /* ISC_PLATFORM_USETHREADS */
- for (i = 0; i < (int)FD_SETSIZE; i++)
+ for (i = 0; i < (int)manager->fdsize; i++)
if (manager->fdstate[i] == CLOSE_PENDING)
(void)close(i);
DESTROYLOCK(&manager->lock);
+ cleanup_fdsets(manager, manager->mctx);
manager->magic = 0;
mctx= manager->mctx;
isc_mem_put(mctx, manager, sizeof(*manager));
#ifndef ISC_PLATFORM_USETHREADS
void
-isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
+isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) {
if (socketmgr == NULL)
*maxfd = 0;
else {
- *readset = socketmgr->read_fds;
- *writeset = socketmgr->write_fds;
+ /* Prepare duplicates of fd_sets, as select() will modify */
+ memcpy(socketmgr->read_fds_copy, socketmgr->read_fds,
+ socketmgr->fd_bufsize);
+ memcpy(socketmgr->write_fds_copy, socketmgr->write_fds,
+ socketmgr->fd_bufsize);
+ *readset = socketmgr->read_fds_copy;
+ *writeset = socketmgr->write_fds_copy;
*maxfd = socketmgr->maxfd + 1;
}
}