knot/common/evsched.h \
knot/common/fdset.c \
knot/common/fdset.h \
- knot/common/epoll_set.c \
- knot/common/epoll_set.h \
- knot/common/aioset.c \
- knot/common/aioset.h \
+ knot/common/kqueueset.c \
+ knot/common/kqueueset.h \
knot/common/log.c \
knot/common/log.h \
knot/common/process.c \
+++ /dev/null
-/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <time.h>
-#include <assert.h>
-#include <linux/aio_abi.h>
-#include <sys/syscall.h>
-#include "knot/common/aioset.h"
-#include "contrib/time.h"
-#include "libknot/errcode.h"
-
-inline static int io_setup(unsigned nr, aio_context_t *ctxp)
-{
- return syscall(__NR_io_setup, nr, ctxp);
-}
-
-inline static int io_destroy(aio_context_t ctx)
-{
- return syscall(__NR_io_destroy, ctx);
-}
-
-inline static int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
-{
- return syscall(__NR_io_submit, ctx, nr, iocbpp);
-}
-
-#define AIO_RING_MAGIC 0xa10a10a1
-struct aio_ring {
- unsigned id; /** kernel internal index number */
- unsigned nr; /** number of io_events */
- unsigned head;
- unsigned tail;
-
- unsigned magic;
- unsigned compat_features;
- unsigned incompat_features;
- unsigned header_length; /** size of aio_ring */
-
- struct io_event events[0];
-};
-
-/* Stolen from kernel arch/x86_64.h */
-#ifdef __x86_64__
-#define read_barrier() __asm__ __volatile__("lfence" ::: "memory")
-#else
-#ifdef __i386__
-#define read_barrier() __asm__ __volatile__("" : : : "memory")
-#else
-#define read_barrier() __sync_synchronize()
-#endif
-#endif
-
-/* Code based on axboe/fio:
- * https://github.com/axboe/fio/blob/702906e9e3e03e9836421d5e5b5eaae3cd99d398/engines/libaio.c#L149-L172
- */
-inline static int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
- struct io_event *events,
- struct timespec *timeout)
-{
- int i = 0;
-
- struct aio_ring *ring = (struct aio_ring *)ctx;
- if (ring == NULL || ring->magic != AIO_RING_MAGIC) {
- goto do_syscall;
- }
-
- while (i < max_nr) {
- unsigned head = ring->head;
- if (head == ring->tail) {
- /* There are no more completions */
- break;
- } else {
- /* There is another completion to reap */
- events[i] = ring->events[head];
- read_barrier();
- ring->head = (head + 1) % ring->nr;
- i++;
- }
- }
-
- if (i == 0 && timeout != NULL && timeout->tv_sec == 0 &&
- timeout->tv_nsec == 0) {
- /* Requested non blocking operation. */
- return 0;
- }
-
- if (i && i >= min_nr) {
- return i;
- }
-
-do_syscall:
- return syscall(__NR_io_getevents, ctx, min_nr - i, max_nr - i,
- &events[i], timeout);
-}
-
-#ifndef IOCB_CMD_POLL
-#define IOCB_CMD_POLL 5 /* from 4.18 */
-#endif
-
-/* Realloc memory or return error (part of fdset_resize). */
-#define MEM_RESIZE(tmp, p, n) \
- if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \
- return KNOT_ENOMEM; \
- (p) = tmp;
-
-static int epoll_set_resize(aioset_t *set, unsigned size)
-{
- void *tmp = NULL;
- MEM_RESIZE(tmp, set->usrctx, size);
- MEM_RESIZE(tmp, set->timeout, size);
- MEM_RESIZE(tmp, set->ev, size);
- set->size = size;
- return KNOT_EOK;
-}
-
-int aioset_init(aioset_t *set, unsigned size)
-{
- if (set == NULL) {
- return KNOT_EINVAL;
- }
-
- memset(set, 0, sizeof(aioset_t));
-
- int ret = io_setup(128, &set->ctx);
- if (ret < 0) {
- return KNOT_ENOMEM;
- }
-
- return epoll_set_resize(set, size);
-}
-
-int aioset_clear(aioset_t* set)
-{
- if (set == NULL) {
- return KNOT_EINVAL;
- }
-
- free(set->usrctx);
- free(set->timeout);
- free(set->ev);
- memset(set, 0, sizeof(aioset_t));
- return KNOT_EOK;
-}
-
-void aioset_close(aioset_t* set)
-{
- io_destroy(set->ctx);
-}
-
-int aioset_add(aioset_t *set, int fd, unsigned events, void *ctx)
-{
- if (set == NULL || fd < 0) {
- return KNOT_EINVAL;
- }
-
- /* Realloc needed. */
- if (set->n == set->size && epoll_set_resize(set, set->size + FDSET_INIT_SIZE))
- return KNOT_ENOMEM;
-
- /* Initialize. */
- int i = set->n++;
- set->ev[i].aio_fildes = fd;
- set->ev[i].aio_lio_opcode = IOCB_CMD_POLL;
- set->ev[i].aio_buf = events;
- set->usrctx[i] = ctx;
- set->timeout[i] = 0;
- return i;
-}
-
-int aioset_wait(aioset_t *set, struct io_event *ev, size_t ev_size, struct timespec *timeout)
-{
- struct iocb *list_of_iocb[set->n];
- for (int i = 0; i < set->n; ++i) {
- list_of_iocb[i] = &(set->ev[i]);
- }
-
- int ret = io_submit(set->ctx, set->n, list_of_iocb);
- if (ret < 0) {
- return ret;
- }
- ret = io_getevents(set->ctx, 1, set->n, ev, timeout);
-
- return ret;
-}
-
-int aioset_remove(aioset_t *set, unsigned i)
-{
- if (set == NULL || i >= set->n) {
- return KNOT_EINVAL;
- }
-
- /* Decrement number of elms. */
- --set->n;
-
- /* Nothing else if it is the last one.
- * Move last -> i if some remain. */
- unsigned last = set->n; /* Already decremented */
- if (i < last) {
- set->ev[i] = set->ev[last];
- set->timeout[i] = set->timeout[last];
- set->usrctx[i] = set->usrctx[last];
- }
-
- return KNOT_EOK;
-}
-
-int aioset_set_watchdog(aioset_t* set, int i, int interval)
-{
- if (set == NULL || i >= set->n) {
- return KNOT_EINVAL;
- }
-
- /* Lift watchdog if interval is negative. */
- if (interval < 0) {
- set->timeout[i] = 0;
- return KNOT_EOK;
- }
-
- /* Update clock. */
- struct timespec now = time_now();
-
- set->timeout[i] = now.tv_sec + interval; /* Only seconds precision. */
- return KNOT_EOK;
-}
-
-int aioset_sweep(aioset_t* set, epoll_set_sweep_cb_t cb, void *data)
-{
- if (set == NULL || cb == NULL) {
- return KNOT_EINVAL;
- }
-
- /* Get time threshold. */
- struct timespec now = time_now();
-
- unsigned i = 0;
- while (i < set->n) {
-
- /* Check sweep state, remove if requested. */
- if (set->timeout[i] > 0 && set->timeout[i] <= now.tv_sec) {
- if (cb(set, i, data) == EPOLL_SET_SWEEP) {
- if (aioset_remove(set, i) == KNOT_EOK)
- continue; /* Stay on the index. */
- }
- }
-
- /* Next descriptor. */
- ++i;
- }
-
- return KNOT_EOK;
-}
--- /dev/null
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+#include <poll.h>
+#include <sys/syscall.h>
+#include "knot/common/kqueueset.h"
+#include "contrib/time.h"
+#include "libknot/errcode.h"
+
+/* Realloc memory or return error (part of fdset_resize). */
+#define MEM_RESIZE(tmp, p, n) \
+ if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \
+ return KNOT_ENOMEM; \
+ (p) = tmp;
+
+static int epoll_set_resize(kqueueset_t *set, unsigned size)
+{
+ void *tmp = NULL;
+ MEM_RESIZE(tmp, set->usrctx, size);
+ MEM_RESIZE(tmp, set->timeout, size);
+ MEM_RESIZE(tmp, set->ev, size);
+ set->size = size;
+ return KNOT_EOK;
+}
+
+int aioset_init(kqueueset_t *set, unsigned size)
+{
+ if (set == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ memset(set, 0, sizeof(kqueueset_t));
+
+ set->ctx = kqueue();
+ if (set->ctx < 0) {
+ return KNOT_ENOMEM;
+ }
+
+ return epoll_set_resize(set, size);
+}
+
+int aioset_clear(kqueueset_t* set)
+{
+ if (set == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ free(set->usrctx);
+ free(set->timeout);
+ free(set->ev);
+ memset(set, 0, sizeof(kqueueset_t));
+ return KNOT_EOK;
+}
+
+void aioset_close(kqueueset_t* set)
+{
+ //io_destroy(set->ctx);
+}
+
+int aioset_add(kqueueset_t *set, int fd, unsigned events, void *ctx)
+{
+ if (set == NULL || fd < 0) {
+ return KNOT_EINVAL;
+ }
+
+ if (events != POLLIN) {
+ return KNOT_ENOTSUP;
+ }
+
+ /* Realloc needed. */
+ if (set->n == set->size && epoll_set_resize(set, set->size + FDSET_INIT_SIZE))
+ return KNOT_ENOMEM;
+
+ /* Initialize. */
+ int i = set->n++;
+ EV_SET(&set->ev[i], fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, (void*)(intptr_t)i);
+ set->usrctx[i] = ctx;
+ set->timeout[i] = 0;
+ kevent(set->ctx, set->ev, set->n, NULL, 0, NULL);
+ return i;
+}
+
+int aioset_wait(kqueueset_t *set, struct kevent *ev, size_t ev_size, int timeout)
+{
+ struct timespec to = {
+ .tv_sec = timeout,
+ .tv_nsec = 0
+ };
+ return kevent(set->ctx, NULL, 0, ev, ev_size, (timeout > 0) ? &to : NULL);
+}
+
+int aioset_remove(kqueueset_t *set, unsigned i)
+{
+ if (set == NULL || i >= set->n) {
+ return KNOT_EINVAL;
+ }
+
+ /* Decrement number of elms. */
+ --set->n;
+
+ /* Nothing else if it is the last one.
+ * Move last -> i if some remain. */
+ unsigned last = set->n; /* Already decremented */
+ if (i < last) {
+ set->ev[i] = set->ev[last];
+ set->timeout[i] = set->timeout[last];
+ set->usrctx[i] = set->usrctx[last];
+ EV_SET(&set->ev[i], set->ev[last].ident, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, (void*)(intptr_t)i);
+ }
+ kevent(set->ctx, set->ev, set->n, NULL, 0, NULL);
+
+ return KNOT_EOK;
+}
+
+int aioset_set_watchdog(kqueueset_t* set, int i, int interval)
+{
+ if (set == NULL || i >= set->n) {
+ return KNOT_EINVAL;
+ }
+
+ /* Lift watchdog if interval is negative. */
+ if (interval < 0) {
+ set->timeout[i] = 0;
+ return KNOT_EOK;
+ }
+
+ /* Update clock. */
+ struct timespec now = time_now();
+
+ set->timeout[i] = now.tv_sec + interval; /* Only seconds precision. */
+ return KNOT_EOK;
+}
+
+int aioset_sweep(kqueueset_t* set, epoll_set_sweep_cb_t cb, void *data)
+{
+ if (set == NULL || cb == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Get time threshold. */
+ struct timespec now = time_now();
+
+ unsigned i = 0;
+ while (i < set->n) {
+
+ /* Check sweep state, remove if requested. */
+ if (set->timeout[i] > 0 && set->timeout[i] <= now.tv_sec) {
+ if (cb(set, i, data) == EPOLL_SET_SWEEP) {
+ if (aioset_remove(set, i) == KNOT_EOK)
+ continue; /* Stay on the index. */
+ }
+ }
+
+ /* Next descriptor. */
+ ++i;
+ }
+
+ return KNOT_EOK;
+}
#include <stddef.h>
#include <signal.h>
-#include <sys/epoll.h>
#include <sys/time.h>
-#include <linux/aio_abi.h>
+#include <sys/event.h>
#define FDSET_INIT_SIZE 256 /* Resize step. */
/*! \brief Set of filedescriptors with associated context and timeouts. */
-typedef struct aioset {
- aio_context_t ctx;
+typedef struct kqueueset {
+ int ctx;
unsigned n; /*!< Active fds. */
unsigned size; /*!< Array size (allocated). */
- struct iocb *ev; /*!< Epoll event storage for each fd */
+ struct kevent *ev; /*!< Epoll event storage for each fd */
void* *usrctx; /*!< Context for each fd. */
time_t *timeout; /*!< Timeout for each fd (seconds precision). */
-} aioset_t;
+} kqueueset_t;
/*! \brief Mark-and-sweep state. */
enum epoll_set_sweep_state {
};
/*! \brief Sweep callback (set, index, data) */
-typedef enum epoll_set_sweep_state (*epoll_set_sweep_cb_t)(aioset_t*, int, void*);
+typedef enum epoll_set_sweep_state (*epoll_set_sweep_cb_t)(kqueueset_t*, int, void*);
/*!
* \brief Initialize fdset to given size.
*/
-int aioset_init(aioset_t *set, unsigned size);
+int aioset_init(kqueueset_t *set, unsigned size);
/*!
* \brief Destroy FDSET.
* \retval 0 if successful.
* \retval -1 on error.
*/
-int aioset_clear(aioset_t* set);
+int aioset_clear(kqueueset_t* set);
-void aioset_close(aioset_t* set);
+void aioset_close(kqueueset_t* set);
/*!
* \brief Add file descriptor to watched set.
* \retval index of the added fd if successful.
* \retval -1 on errors.
*/
-int aioset_add(aioset_t *set, int fd, unsigned events, void *ctx);
+int aioset_add(kqueueset_t *set, int fd, unsigned events, void *ctx);
/*!
* \brief Remove file descriptor from watched set.
* \retval 0 if successful.
* \retval -1 on errors.
*/
-int aioset_remove(aioset_t *set, unsigned i);
+int aioset_remove(kqueueset_t *set, unsigned i);
-int aioset_wait(aioset_t *set, struct io_event *ev, size_t ev_size, struct timespec *timeout);
+int aioset_wait(kqueueset_t *set, struct kevent *ev, size_t ev_size, int timeout);
/*!
* \brief Set file descriptor watchdog interval.
* \retval 0 if successful.
* \retval -1 on errors.
*/
-int aioset_set_watchdog(aioset_t* set, int i, int interval);
+int aioset_set_watchdog(kqueueset_t* set, int i, int interval);
/*!
* \brief Sweep file descriptors with exceeding inactivity period.
* \retval number of sweeped descriptors.
* \retval -1 on errors.
*/
-int aioset_sweep(aioset_t* set, epoll_set_sweep_cb_t cb, void *data);
+int aioset_sweep(kqueueset_t* set, epoll_set_sweep_cb_t cb, void *data);
#include "knot/server/server.h"
#include "knot/server/tcp-handler.h"
#include "knot/common/log.h"
-#include "knot/common/aioset.h"
+#include "knot/common/kqueueset.h"
#include "knot/nameserver/process_query.h"
#include "knot/query/layer.h"
#include "contrib/macros.h"
unsigned client_threshold; /*!< Index of first TCP client. */
struct timespec last_poll_time; /*!< Time of the last socket poll. */
bool is_throttled; /*!< TCP connections throttling switch. */
- aioset_t set; /*!< Set of server/client sockets. */
+ kqueueset_t set; /*!< Set of server/client sockets. */
unsigned thread_id; /*!< Thread identifier. */
unsigned max_worker_fds; /*!< Max TCP clients per worker configuration + no. of ifaces. */
int idle_timeout; /*!< [s] TCP idle timeout configuration. */
}
/*! \brief Sweep TCP connection. */
-static enum epoll_set_sweep_state tcp_sweep(aioset_t *set, int i, void *data)
+static enum epoll_set_sweep_state tcp_sweep(kqueueset_t *set, int i, void *data)
{
UNUSED(data);
assert(set && i < set->n && i >= 0);
- int fd = set->ev[i].aio_fildes;
+ int fd = (int)(intptr_t)set->ev[i].ident;
/* Best-effort, name and shame. */
struct sockaddr_storage ss;
}
static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
- aioset_t *fds, int thread_id)
+ kqueueset_t *fds, int thread_id)
{
if (n_ifaces == 0) {
return 0;
static void tcp_event_accept(tcp_context_t *tcp, unsigned i)
{
/* Accept client. */
- int fd = tcp->set.ev[i].aio_fildes;
+ int fd = (int)(intptr_t)tcp->set.ev[i].ident;
int client = net_accept(fd, NULL);
if (client >= 0) {
/* Assign to fdset. */
static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
{
- int fd = tcp->set.ev[i].aio_fildes;
+ int fd = (int)(intptr_t)tcp->set.ev[i].ident;
int ret = tcp_handle(tcp, fd, &tcp->iov[0], &tcp->iov[1]);
if (ret == KNOT_EOK) {
/* Update socket activity timer. */
static void tcp_wait_for_events(tcp_context_t *tcp)
{
- aioset_t *set = &tcp->set;
+ kqueueset_t *set = &tcp->set;
/* Check if throttled with many open TCP connections. */
assert(set->n <= tcp->max_worker_fds);
tcp->is_throttled = set->n == tcp->max_worker_fds;
/* Wait for events. */
- struct io_event events[set->n];
- struct timespec timeout = {
- .tv_nsec = 0,
- .tv_sec = TCP_SWEEP_INTERVAL
- };
- int nfds = aioset_wait(set, events, set->n, &timeout);
+ struct kevent events[set->n];
+ int nfds = aioset_wait(set, events, set->n, TCP_SWEEP_INTERVAL);
/* Mark the time of last poll call. */
tcp->last_poll_time = time_now();
/* Process events. */
- for (struct io_event *it = events; nfds > 0; ++it) {
- struct iocb *dit = (struct iocb *)it->obj;
+ for (struct kevent *it = events; nfds > 0; ++it) {
bool should_close = false;
- if (dit->aio_buf & (POLLERR|POLLHUP|POLLNVAL)) {
- should_close = (dit - set->ev >= tcp->client_threshold);
+ if ((it->filter & EVFILT_READ) == 0) {
+ should_close = ((int)(intptr_t)it->udata >= tcp->client_threshold);
--nfds;
- } else if (dit->aio_buf & (POLLIN)) {
+ } else if (it->filter & EVFILT_READ) {
/* Master sockets - new connection to accept. */
- if (dit - set->ev < tcp->client_threshold) {
+ if ((int)(intptr_t)it->udata < tcp->client_threshold) {
/* Don't accept more clients than configured. */
if (set->n < tcp->max_worker_fds) {
- tcp_event_accept(tcp, dit - set->ev);
+ tcp_event_accept(tcp, (int)(intptr_t)it->udata);
}
/* Client sockets - already accepted connection or
closed connection :-( */
- } else if (tcp_event_serve(tcp, dit - set->ev) != KNOT_EOK) {
+ } else if (tcp_event_serve(tcp, (int)(intptr_t)it->udata) != KNOT_EOK) {
should_close = true;
}
--nfds;
/* Evaluate. */
if (should_close) {
- close(dit->aio_fildes);
- aioset_remove(set, dit - set->ev);
+ close(it->ident);
+ aioset_remove(set, (int)(intptr_t)it->udata);
}
}
}
#include "knot/query/layer.h"
#include "knot/server/server.h"
#include "knot/server/udp-handler.h"
-#include "knot/common/aioset.h"
+#include "knot/common/kqueueset.h"
/* Buffer identifiers. */
enum {
}
}
-static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, aioset_t *fds,
+static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, kqueueset_t *fds,
int thread_id, void **xdp_socket)
{
if (n_ifaces == 0) {
/* Allocate descriptors for the configured interfaces. */
void *xdp_socket = NULL;
size_t nifs = handler->server->n_ifaces;
- aioset_t epollset;
+ kqueueset_t epollset;
aioset_init(&epollset, nifs);
unsigned fds = udp_set_ifaces(handler->server->ifaces, nifs, &epollset,
thread_id, &xdp_socket);
}
/* Wait for events. */
- struct io_event events[epollset.n];
- int nfds = aioset_wait(&epollset, events, epollset.n, NULL);
+ struct kevent events[epollset.n];
+ int nfds = aioset_wait(&epollset, events, epollset.n, 0);
if (nfds < 0) {
if (errno == EINTR || errno == EAGAIN) {
continue;
}
/* Process the events. */
- for (struct io_event *it = events; nfds > 0; ++it) {
- struct iocb *dit = (struct iocb *)it->obj;
- if (api->udp_recv(dit->aio_fildes, rq, xdp_socket) > 0) {
+ for (struct kevent *it = events; nfds > 0; ++it) {
+ if (api->udp_recv((int)(intptr_t)it->ident, rq, xdp_socket) > 0) {
api->udp_handle(&udp, rq, xdp_socket);
api->udp_send(rq, xdp_socket);
}