From: Daniel Salzman Date: Mon, 6 Oct 2025 13:18:12 +0000 (+0200) Subject: init X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1a380ecf899c26b384ba9e11c6068354acbf8028;p=thirdparty%2Fknot-dns.git init --- diff --git a/configure.ac b/configure.ac index afe3e276dc..be6e1162a9 100644 --- a/configure.ac +++ b/configure.ac @@ -386,12 +386,15 @@ static_modules_declars="" static_modules_init="" doc_modules="" +KNOT_MODULE([azuredb], "yes") KNOT_MODULE([cookies], "yes") KNOT_MODULE([dnsproxy], "yes", "non-shareable") KNOT_MODULE([dnstap], "no") +KNOT_MODULE([delay], "no") KNOT_MODULE([geoip], "yes") KNOT_MODULE([noudp], "yes") KNOT_MODULE([onlinesign], "yes", "non-shareable") +KNOT_MODULE([azurednssec], "yes") KNOT_MODULE([probe], "yes") KNOT_MODULE([queryacl], "yes") KNOT_MODULE([rrl], "yes") @@ -699,12 +702,30 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[cpuset_t* set = cpuset [AC_DEFINE(HAVE_CPUSET_NETBSD, 1, [Define if cpuset_t and cpuset(3) exists.])]) # Check for '__atomic' compiler builtin atomic functions. +AC_SEARCH_LIBS([__atomic_compare_exchange_8], [atomic], [], []) AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], - [[uint64_t val = 0; __atomic_add_fetch(&val, 1, __ATOMIC_RELAXED);]])], - [AC_DEFINE(HAVE_ATOMIC, 1, [Define to 1 if you have '__atomic' functions.])] + [[uint64_t val = 0; + __atomic_store_n(&val, 0UL, __ATOMIC_SEQ_CST); + uint64_t expect = __atomic_add_fetch(&val, 1, __ATOMIC_RELAXED); + __atomic_compare_exchange(&val, &expect, &expect, 0, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME); + ]])], + [ + AC_DEFINE(HAVE_ATOMIC, 1, [Define to 1 if you have '__atomic' functions.]) + have_atomic=yes + have_atomic8_primitives=yes + ] ) +AC_CHECK_HEADER([stdalign.h], [AC_DEFINE(HAVE_STDALIGN, 1, [Define to 1 if you have stdalign.h])]) +AC_CHECK_HEADER([stdatomic.h], [ + AC_DEFINE(HAVE_STDATOMIC, 1, [Define to 1 if you have stdatomic.h]) + have_stdatomic=yes + have_atomic8_primitives=yes +]) + +AM_CONDITIONAL([HAVE_ATOMIC_PRIMITIVES], [test "x$have_atomic8_primitives" = "xyes"]) + # Check for '__sync' compiler builtin atomic functions. AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include ]], @@ -745,6 +766,141 @@ AM_CONDITIONAL([HAVE_MAKEINFO], test "$MAKEINFO" != "false") filtered_cflags=$(echo -n "$CFLAGS" | sed 's/\W-f\S*-prefix-map=\S*\W/ /g') filtered_cppflags=$(echo -n "$CPPFLAGS" | sed 's/\W-f\S*-prefix-map=\S*\W/ /g') +AC_ARG_WITH([memcheck], + AC_HELP_STRING([--with-memcheck=auto|yes|no|memcheck_include_folder], [Build with memcheck api. If valgrind/memcheck.h is not in path, define include path in CFLAGS or pass the valgrind/memcheck.h include folder for this parameter. [default=no]]), + [with_memcheck=$withval], [with_memcheck=no]) + +AS_CASE([$with_memcheck], + [no], [], + [auto], [ + AC_CHECK_HEADER([valgrind/memcheck.h], [with_memcheck=yes], [with_memcheck=no]) + ], + [yes|*], [ + AS_IF([test "x$with_memcheck" != "xyes"], [AC_SUBST(CPPFLAGS, "$CPPFLAGS -I$with_memcheck")]) + AC_CHECK_HEADER([valgrind/memcheck.h], [], [AC_MSG_ERROR([--with-memcheck requries valgrind/memcheck.h header files. If valgrind/memcheck.h is not in path, define include path in CFLAGS or pass the valgrind/memcheck.h include folder for --with-memcheck parameter.])]) + AC_DEFINE_UNQUOTED([KNOT_ENABLE_MEMCHECK],[1], [Memcheck enabled for this build (1)]) + with_memcheck=yes + ]) + +AC_ARG_WITH([numa], + AC_HELP_STRING([--with-numa=yes|no|numa_include_folder], [Build with numa api. If numa.h is not in path, define include path in CFLAGS or pass the numa.h include folder for this parameter. [default=no]]), + [with_numa=$withval], [with_numa=no]) + +AS_CASE([$with_numa], + [no], [], + [yes|*], [ + AC_SEARCH_LIBS([numa_node_of_cpu], [numa], + [ + AC_DEFINE_UNQUOTED([KNOT_ENABLE_NUMA],[1], [NUMA enabled for this build (1)]) + with_numa=yes + ], + [AC_MSG_ERROR([--with-numa requries numa.h header files. If numa.h is not in path, define include path in CFLAGS or pass the numa.h include folder for --with-numa parameter.])], + []) + ]) + +AC_ARG_WITH([udp-max-size], + AC_HELP_STRING([--with-udp-max-size=size], [Size of UDP request/response max size supported. [default=65535]]), + [udp_max_size=$withval], [udp_max_size=65535]) +AC_DEFINE_UNQUOTED([KNOT_MAX_UDP_REQRESP_SIZE_BYTES],[$udp_max_size], [Value of udp max message size]) + +AC_ARG_ENABLE([throttle-dnstap-logs], + AS_HELP_STRING([--enable-throttle-dnstap-logs=auto|yes|no], [enable throttling support for dnstap logs [default=auto]]), + [enable_throttle_dnstap_logs="$enableval"], [enable_throttle_dnstap_logs=auto]) + +AS_CASE([$enable_throttle_dnstap_logs], + [auto],[ + AS_IF([test "x$have_atomic8_primitives" = xyes], [ + AC_DEFINE([ENABLE_THROTTLE_DNSTAP_LOGS], [1], [Define to 1 to enable support for throttling dnstap logs.]) + enable_throttle_dnstap_logs=yes + ], + [enable_throttle_dnstap_logs=no]) + ], + [yes],[ + AS_IF([test "x$have_atomic8_primitives" = xyes], [ + AC_DEFINE([ENABLE_THROTTLE_DNSTAP_LOGS], [1], [Define to 1 to enable support for throttling dnstap logs.]) + ], + [AC_MSG_ERROR([Throttling support needs __atomic or stdatomic support])]) + ], + [no],[], + [*], [AC_MSG_ERROR([Invalid value of --enable-throttle-dnstap-logs.] + )]) + + AC_ARG_ENABLE([async-query], + AS_HELP_STRING([--enable-async-query=yes|no], [Enable asynchronous query handling [default=no]]), [], [enable_async_query=no]) + +AS_CASE([$enable_async_query], + [yes],[ + AC_DEFINE([ENABLE_ASYNC_QUERY_HANDLING], [], [Enable support for handling queries asynchronously]) + AS_IF([test "x$have_atomic8_primitives" != "xyes"], + [AC_MSG_ERROR([Async query support needs __atomic or stdatomic support])]) + ], + [no],[], + [*], [AC_MSG_ERROR([Invalid value of --enable-async-query.] + )]) + +AS_IF([test "x$STATIC_MODULE_delay" = "xyes" -o "x$SHARED_MODULE_delay" = "xyes"],[ + AS_IF([test "x$enable_async_query" != "xyes"], [ + AC_MSG_ERROR([delay module requires async query support in knot. Enable with --enable-async-query]) + ]) +]) + +AC_ARG_ENABLE([trailing-bytes], + AS_HELP_STRING([--enable-trailing-bytes=yes|no], [enable trailing bytes in DNS query [default=no]]), + [enable_trailing_bytes="$enableval"], [enable_trailing_bytes=no]) + +AS_IF([test "$enable_trailing_bytes" = yes], [AC_DEFINE([ENABLE_TRAILING_BYTES], [1], [Define to 1 to enable support for queries with additional trailing 0 bytes.])]) + +AC_ARG_WITH([unbound], + AC_HELP_STRING([--with-unbound=yes|no|unbound_folder], [Build with libunbound api. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for this parameter. [default=no]]), + [with_unbound=$withval], [with_unbound=no]) + +AS_CASE([$with_unbound], + [no], [], + [yes|*], [ + AS_IF([test "x$with_unbound" != "xyes"], + [ + AC_SUBST(CPPFLAGS, "$CPPFLAGS -I$with_unbound/include") + AC_SUBST(LDFLAGS, "$LDFLAGS -L$with_unbound/lib") + ]) + AC_CHECK_HEADER([unbound.h], [], [AC_MSG_ERROR([--with-unbound requries unbound.h header files. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for --with-unbound parameter.])]) + AC_SEARCH_LIBS([ub_resolve_async], [unbound], [], + [ + AC_MSG_ERROR([--with-unbound requries unbound.h header nad library files. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for --with-unbound parameter.]) + ]) + AC_DEFINE_UNQUOTED([KNOT_ENABLE_UNBOUND],[1], [unbound enabled for this build (1)]) + with_unbound=yes + ]) + +with_aliasrr=no +AS_IF([test "x$enable_async_query" = "xyes"] && [test "x$SUPPORTS_MODULE_azuredb" = "xyes"] && [test "x$with_unbound" = "xyes"], + [ + AM_CONDITIONAL([HAVE_ALIASRR], [true]) + AC_DEFINE_UNQUOTED([KNOT_ENABLE_ALIASRR],[1], [AliasRR enabled for this build (1)]) + with_aliasrr=yes + ], + AM_CONDITIONAL([HAVE_ALIASRR], [false])) + +# TrafficManager support +AC_ARG_ENABLE([trafficmanager], + AS_HELP_STRING([--enable-trafficmanager=yes|no], [enable TrafficManager module [default=no]]), + [enable_trafficmanager="$enableval"], [enable_trafficmanager=no]) + +libnethost_CFLAGS= +libnethost_LIBS= +trafficmanager_CFLAGS= +AS_IF([test "$enable_daemon" = "no"],[enable_trafficmanager=no]) +AS_IF([test "$enable_trafficmanager" = yes], [ + NETCOREAPP_VERSION=$(ls /usr/share/dotnet/packs/Microsoft.NETCore.App.Host.linux-x64 | tail -n 1) + libnethost_CFLAGS="-I/usr/share/dotnet/packs/Microsoft.NETCore.App.Host.linux-x64/$NETCOREAPP_VERSION/runtimes/linux-x64/native" + libnethost_LIBS="" + trafficmanager_CFLAGS="-I../microsoft/trafficmanager/PolicyProvider.Rescue.Native/include" + AC_SUBST([libnethost_CFLAGS]) + AC_SUBST([libnethost_LIBS]) + AC_SUBST([trafficmanager_CFLAGS]) +]) +AS_IF([test "$enable_trafficmanager" = yes], [AC_DEFINE([USE_TRAFFICMANAGER], [1], [Define to 1 to enable TrafficManager module.])]) +AM_CONDITIONAL([USE_TRAFFICMANAGER], [test "$enable_trafficmanager" = yes]) + result_msg_base=" Knot DNS $VERSION Target: $host_os $host_cpu $endianity @@ -762,6 +918,8 @@ result_msg_base=" Knot DNS $VERSION Storage dir: ${storage_dir} Config dir: ${config_dir} Module dir: ${module_dir} + With valgrind memcheck: ${with_memcheck} + UDP Max Size: ${udp_max_size} Static modules: ${static_modules} Shared modules: ${shared_modules} @@ -774,6 +932,11 @@ result_msg_base=" Knot DNS $VERSION Use recvmmsg: ${enable_recvmmsg} Use SO_REUSEPORT(_LB): ${enable_reuseport} XDP support: ${enable_xdp} + Async Query Handling: ${enable_async_query} + AzureDB Module: ${SUPPORTS_MODULE_azuredb} + AliasRR Support: ${with_aliasrr} + Unbound library support ${with_unbound} + NUMA Support ${with_numa} Socket polling: ${socket_polling} Memory allocator: ${with_memory_allocator} Fast zone parser: ${enable_fastparser} @@ -787,6 +950,9 @@ result_msg_base=" Knot DNS $VERSION Ed25519 support: ${enable_ed25519} Ed448 support: ${enable_ed448} Reproducible signing: ${enable_repro_signing} + Throttle sup for dnstap ${enable_throttle_dnstap_logs} + Allow Trail Query bytes ${enable_trailing_bytes} + Traffic Manager ${enable_trafficmanager} Code coverage: ${enable_code_coverage} Sanitizer: ${with_sanitizer} LibFuzzer: ${with_fuzzer} diff --git a/src/contrib/Makefile.inc b/src/contrib/Makefile.inc index ee987071d8..f71d136c23 100644 --- a/src/contrib/Makefile.inc +++ b/src/contrib/Makefile.inc @@ -1,7 +1,7 @@ noinst_LTLIBRARIES += libcontrib.la -libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) -libcontrib_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) +libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) ${fuzzer_CFLAGS} +libcontrib_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) ${fuzzer_CFLAGS} libcontrib_la_LIBADD = $(pthread_LIBS) libcontrib_LIBS = libcontrib.la if USE_GNUTLS_MEMSET @@ -56,6 +56,7 @@ libcontrib_la_SOURCES = \ contrib/tolower.h \ contrib/trim.h \ contrib/wire_ctx.h \ + contrib/memcheck.h \ contrib/openbsd/siphash.c \ contrib/openbsd/siphash.h \ contrib/openbsd/strlcat.c \ @@ -133,7 +134,7 @@ libdnstap_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(DNSTAP_CFLAGS) libdnstap_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) libdnstap_LIBS = libdnstap.la $(DNSTAP_LIBS) -SUFFIXES = .proto .pb-c.c .pb-c.h +SUFFIXES = .proto .pb-c.c .pb-c.h .c .pp .proto.pb-c.c: $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $< @@ -141,6 +142,10 @@ SUFFIXES = .proto .pb-c.c .pb-c.h .proto.pb-c.h: $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $< +# Allows the creation of preprocessed file using "make .pp" +.c.pp: + $(COMPILE) -o $@ -E $< + libdnstap_la_SOURCES = \ contrib/dnstap/convert.c \ contrib/dnstap/convert.h \ diff --git a/src/contrib/dnstap/message.c b/src/contrib/dnstap/message.c index a5f798e2b7..a9057609b5 100644 --- a/src/contrib/dnstap/message.c +++ b/src/contrib/dnstap/message.c @@ -73,7 +73,10 @@ int dt_message_fill(Dnstap__Message *m, const int protocol, const void *wire, const size_t len_wire, - const struct timespec *mtime) + const struct timespec *mtime, + const void *wire2, + const size_t len_wire2, + const struct timespec *mtime2) { if (m == NULL) { return KNOT_EINVAL; @@ -124,6 +127,20 @@ int dt_message_fill(Dnstap__Message *m, m->has_response_time_sec = 1; m->has_response_time_nsec = 1; } + + if (len_wire2 > 0) { + // Message.query_message + m->query_message.len = len_wire2; + m->query_message.data = (uint8_t *)wire2; + m->has_query_message = 1; + // Message.query_time_sec, Message.query_time_nsec + if (mtime != NULL) { + m->query_time_sec = mtime2->tv_sec; + m->query_time_nsec = mtime2->tv_nsec; + m->has_query_time_sec = 1; + m->has_query_time_nsec = 1; + } + } } return KNOT_EOK; diff --git a/src/contrib/dnstap/message.h b/src/contrib/dnstap/message.h index b9e3aff66b..425e01b8f2 100644 --- a/src/contrib/dnstap/message.h +++ b/src/contrib/dnstap/message.h @@ -49,6 +49,12 @@ * Length in bytes of 'wire'. * \param mtime * Message time. May be NULL. + * \param wire2 + * Wire-format request message received (used only when logging query and response in same message) + * \param len_wire2 + * Length in bytes of 'wire2'. + * \param mtime2 + * Request message time. * * \retval KNOT_EOK * \retval KNOT_EINVAL @@ -60,4 +66,7 @@ int dt_message_fill(Dnstap__Message *m, const int protocol, const void *wire, const size_t len_wire, - const struct timespec *mtime); + const struct timespec *mtime, + const void *wire2, + const size_t len_wire2, + const struct timespec *mtime2); diff --git a/src/contrib/memcheck.h b/src/contrib/memcheck.h new file mode 100644 index 0000000000..96d326a012 --- /dev/null +++ b/src/contrib/memcheck.h @@ -0,0 +1,10 @@ +#pragma once +#ifdef KNOT_ENABLE_MEMCHECK +#include +#include +#else +#define RUNNING_ON_VALGRIND 0 +#define VALGRIND_MAKE_MEM_NOACCESS(...) +#define VALGRIND_MAKE_MEM_UNDEFINED(...) +#define VALGRIND_MAKE_MEM_DEFINED(...) +#endif diff --git a/src/contrib/time.h b/src/contrib/time.h index 019b8c3fba..22c1a13f41 100644 --- a/src/contrib/time.h +++ b/src/contrib/time.h @@ -20,6 +20,10 @@ #include #include +#ifndef CLOCK_REALTIME_COARSE +#define CLOCK_REALTIME_COARSE CLOCK_REALTIME +#endif + #ifdef __APPLE__ #define st_mtim st_mtimespec #endif diff --git a/src/contrib/ucw/lists.h b/src/contrib/ucw/lists.h index ee06e7d15e..03149e5aac 100644 --- a/src/contrib/ucw/lists.h +++ b/src/contrib/ucw/lists.h @@ -25,6 +25,8 @@ typedef struct list { #define TAIL(list) ((void *)((list).tail.prev)) #define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \ n=(void *)((NODE (n))->next)) +#define WALK_LIST_RESUME(n,list) for((n)= (void *)((NODE (n)) ? (NODE(n))->next : HEAD(list)); ((NODE (n)) && (NODE (n))->next) || ((n) = NULL); \ + (n)=(void *)((NODE (n))->next)) #define WALK_LIST_DELSAFE(n,nxt,list) \ for(n=HEAD(list); (nxt=(void *)((NODE (n))->next)); n=(void *) nxt) /* WALK_LIST_FIRST supposes that called code removes each processed node */ diff --git a/src/contrib/ucw/mempool.c b/src/contrib/ucw/mempool.c index 8e835c117f..d93eca056a 100644 --- a/src/contrib/ucw/mempool.c +++ b/src/contrib/ucw/mempool.c @@ -18,6 +18,7 @@ #include #include "contrib/asan.h" #include "contrib/macros.h" +#include "contrib/memcheck.h" #include "contrib/ucw/mempool.h" /** \todo This shouldn't be precalculated, but computed on load. */ @@ -198,6 +199,8 @@ mp_flush(struct mempool *pool) if ((uint8_t *)chunk - chunk->size == (uint8_t *)pool) { break; } + /* Inform valgrind all data in this chunk are no longer initialized to ensure next allocator initializes before reading */ + VALGRIND_MAKE_MEM_UNDEFINED((uint8_t *)chunk - chunk->size, chunk->size); next = chunk->next; chunk->next = pool->unused; ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); @@ -207,6 +210,8 @@ mp_flush(struct mempool *pool) pool->state.last[0] = chunk; if (chunk) { pool->state.free[0] = chunk->size - sizeof(*pool); + /* Inform valgrind all data in this pool are no longer initialized to ensure next allocator initializes before reading */ + VALGRIND_MAKE_MEM_UNDEFINED((uint8_t *)chunk - chunk->size + sizeof(*pool), chunk->size - sizeof(*pool)); ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk)); } else { pool->state.free[0] = 0; @@ -297,6 +302,7 @@ mp_alloc(struct mempool *pool, unsigned size) ptr = mp_alloc_internal(pool, size); } ASAN_UNPOISON_MEMORY_REGION(ptr, size); + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); return ptr; } @@ -311,6 +317,7 @@ mp_alloc_noalign(struct mempool *pool, unsigned size) ptr = mp_alloc_internal(pool, size); } ASAN_UNPOISON_MEMORY_REGION(ptr, size); + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); return ptr; } diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc index 335acacc89..39a75e90a9 100644 --- a/src/knot/Makefile.inc +++ b/src/knot/Makefile.inc @@ -1,7 +1,7 @@ libknotd_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(libkqueue_CFLAGS) \ $(liburcu_CFLAGS) $(lmdb_CFLAGS) $(systemd_CFLAGS) \ - -DKNOTD_MOD_STATIC -libknotd_la_LDFLAGS = $(AM_LDFLAGS) -export-symbols-regex '^knotd_' + -DKNOTD_MOD_STATIC ${fuzzer_CFLAGS} +libknotd_la_LDFLAGS = $(AM_LDFLAGS) -export-symbols-regex '^knotd_' ${fuzzer_CFLAGS} libknotd_la_LIBADD = $(dlopen_LIBS) $(libkqueue_LIBS) $(pthread_LIBS) libknotd_LIBS = libknotd.la libknot.la libdnssec.la libzscanner.la \ $(libcontrib_LIBS) $(liburcu_LIBS) $(lmdb_LIBS) \ @@ -103,6 +103,7 @@ libknotd_la_SOURCES = \ knot/nameserver/notify.h \ knot/nameserver/nsec_proofs.c \ knot/nameserver/nsec_proofs.h \ + knot/nameserver/query_state.h \ knot/nameserver/process_query.c \ knot/nameserver/process_query.h \ knot/nameserver/query_module.c \ @@ -148,6 +149,10 @@ libknotd_la_SOURCES = \ knot/journal/serialization.h \ knot/server/server.c \ knot/server/server.h \ + knot/server/dns-handler.c \ + knot/server/dns-handler.h \ + knot/server/network_req_manager.c \ + knot/server/network_req_manager.h \ knot/server/tcp-handler.c \ knot/server/tcp-handler.h \ knot/server/udp-handler.c \ @@ -207,6 +212,16 @@ libknotd_la_SOURCES = \ knot/zone/zonefile.c \ knot/zone/zonefile.h +if HAVE_ATOMIC_PRIMITIVES +libknotd_la_SOURCES += \ + knot/common/qps_limiter.c \ + knot/common/qps_limiter.h \ + knot/nameserver/lstack.c \ + knot/include/lstack.h \ + knot/nameserver/lqueue.c \ + knot/include/lqueue.h +endif HAVE_ATOMIC_PRIMITIVES + if HAVE_DAEMON noinst_LTLIBRARIES += libknotd.la pkgconfig_DATA += knotd.pc @@ -218,6 +233,7 @@ KNOTD_MOD_LDFLAGS = $(AM_LDFLAGS) -module -shared -avoid-version pkglibdir = $(module_instdir) pkglib_LTLIBRARIES = +# include $(srcdir)/knot/modules/azuredb/Makefile.inc include $(srcdir)/knot/modules/cookies/Makefile.inc include $(srcdir)/knot/modules/dnsproxy/Makefile.inc include $(srcdir)/knot/modules/dnstap/Makefile.inc @@ -230,3 +246,5 @@ include $(srcdir)/knot/modules/rrl/Makefile.inc include $(srcdir)/knot/modules/stats/Makefile.inc include $(srcdir)/knot/modules/synthrecord/Makefile.inc include $(srcdir)/knot/modules/whoami/Makefile.inc +include $(srcdir)/knot/modules/delay/Makefile.inc +# include $(srcdir)/knot/modules/azurednssec/Makefile.inc diff --git a/src/knot/common/fdset.c b/src/knot/common/fdset.c index 3514edc524..475a4aa47c 100644 --- a/src/knot/common/fdset.c +++ b/src/knot/common/fdset.c @@ -128,6 +128,37 @@ int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx) return idx; } +int fdset_set_ctx(fdset_t *set, unsigned i, void *ctx) +{ + if (i < set->n) { + set->ctx[i] = ctx; + return i; + } else { + return -1; + } +} + +void* fdset_get_ctx(fdset_t *set, unsigned i) +{ + if (i < set->n) { + return set->ctx[i]; + } else { + return NULL; + } +} + +int fdset_set_ctx_on_fd(fdset_t *set, int fd, void *ctx) +{ + for (unsigned i = 0; i < set->n; i++) { + if (fdset_get_fd(set, i) == fd) { + set->ctx[i] = ctx; + return i; + } + } + + return -1; +} + int fdset_remove(fdset_t *set, const unsigned idx) { if (set == NULL || idx >= set->n) { @@ -326,7 +357,7 @@ void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data) /* Check sweep state, remove if requested. */ if (set->timeout[idx] > 0 && set->timeout[idx] <= now.tv_sec) { const int fd = fdset_get_fd(set, idx); - if (cb(set, fd, data) == FDSET_SWEEP) { + if (cb(set, fd, set->ctx[idx], data) == FDSET_SWEEP) { (void)fdset_remove(set, idx); continue; } diff --git a/src/knot/common/fdset.h b/src/knot/common/fdset.h index 95a5c61e49..2a3b662a23 100644 --- a/src/knot/common/fdset.h +++ b/src/knot/common/fdset.h @@ -96,7 +96,7 @@ typedef enum { } fdset_sweep_state_t; /*! \brief Sweep callback (set, index, data) */ -typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *); +typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *ctx, void *arg); /*! * \brief Initialize fdset to given size. @@ -128,6 +128,39 @@ void fdset_clear(fdset_t *set); */ int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx); +/*! + * \brief Set the context on the descriptor at i. + * + * \param set Target set. + * \param i Index to set ctx at. + * + * \retval index of the added fd if successful. + * \retval -1 on errors. + */ +int fdset_set_ctx(fdset_t *set, unsigned i, void *ctx); + +/*! + * \brief Get the context on the descriptor at i. + * + * \param set Target set. + * \param i Index to get ctx at. + * + * \retval context value. + */ +void* fdset_get_ctx(fdset_t *set, unsigned i); + +/*! + * \brief Clear the context for the given fd + * + * \param set Target set. + * \param fd Handle whose context needs to be cancelled. + * \param ctx Context (optional). + * + * \retval index of the fd if successful. + * \retval -1 on errors. + */ +int fdset_set_ctx_on_fd(fdset_t *set, int fd, void *ctx); + /*! * \brief Remove and close file descriptor from watched set. * @@ -201,6 +234,27 @@ inline static int fdset_get_fd(const fdset_t *set, const unsigned idx) #endif } +/*! + * \brief Returns index for file descriptor. + * + * \param set Target set. + * \param . + * + * \retval Index of the file descriptor. ret >= 0 for file descriptor found. + * \retval ret < 0 on errors. + */ +inline static int fdset_get_index_for_fd(const fdset_t *set, int fd) +{ + assert(set); + for (unsigned i = 0; i < set->n; i++) { + if (fdset_get_fd(set, i) == fd) { + return i; + } + } + + return -1; +} + /*! * \brief Returns number of file descriptors stored in set. * @@ -277,6 +331,20 @@ inline static void fdset_it_next(fdset_it_t *it) #endif } +/*! + * \brief Get context from the iterator. + * + * \param it Target iterator. + * + * \return Context at the iterator. + */ +inline static void* fdset_it_get_ctx(fdset_it_t *it) +{ + assert(it); + unsigned idx = fdset_it_get_idx(it); + return fdset_get_ctx(it->set, idx); +} + /*! * \brief Remove file descriptor referenced by iterator from watched set. * @@ -304,9 +372,9 @@ inline static void fdset_it_remove(fdset_it_t *it) /* EVFILT_WRITE (1) -> -2 */ /* If not marked for delete then mark for delete. */ #if defined(__NetBSD__) - if ((signed short)it->set->ev[idx].filter >= 0) + if ((signed short)it->set->ev[idx].filter >= 0) #else - if (it->set->ev[idx].filter < 0) + if (it->set->ev[idx].filter < 0) #endif { it->set->ev[idx].filter = ~it->set->ev[idx].filter; diff --git a/src/knot/common/qps_limiter.c b/src/knot/common/qps_limiter.c new file mode 100644 index 0000000000..b84ff5b696 --- /dev/null +++ b/src/knot/common/qps_limiter.c @@ -0,0 +1,83 @@ +#include "qps_limiter.h" +#include "contrib/time.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Watomic-alignment" + +int qps_limiter_init(qps_limiter_t *limiter) +{ + if (posix_memalign( (void**)&limiter->qps_limit, 16, sizeof(qps_limit_t)) != 0) { + return ENOMEM; + } + + memset((void*)limiter->qps_limit, 0, sizeof(qps_limit_t)); + + struct timespec tv; + if (clock_gettime(CLOCK_REALTIME_COARSE, &tv)) { + return errno; + } + + limiter->start_time = tv.tv_sec; + + qps_limit_t limit = {0}; + + KNOT_ATOMIC_INIT(limiter->qps_limit[0], limit); + + return 0; +} + +void qps_limiter_cleanup(qps_limiter_t *limiter) +{ + free((void*)limiter->qps_limit); + limiter->qps_limit = NULL; +} + +bool qps_limiter_is_allowed(qps_limiter_t *limiter, time_t time, bool is_err) +{ + qps_limit_t expect; + qps_limit_t new; + + KNOT_ATOMIC_GET(limiter->qps_limit, expect); + + /* time_t could be 64bit. Using 64bit time will cause 128bit atomic operations which is not optimal. + We can keep time as from start of program to ensure 32bit gives us 68 years of run without restart, + and still be 64bit atomic operations. */ + uint32_t rel_time = time - limiter->start_time; + + do + { + int diff = rel_time - expect.time; + if (diff <= 0) { + /* time is old, need throttling */ + new.time = expect.time; + if ( (expect.query_cnt < limiter->log_qps) + || (is_err && expect.query_cnt < limiter->log_err_qps) ) { + new.query_cnt = expect.query_cnt + 1; + } else { + return false; + } + } else { + /* time is new, reset limit */ + new.time = rel_time; + long time_adj = (long)diff * limiter->log_qps; + if (expect.query_cnt < time_adj) { + /* enough time passed to reset the time */ + new.query_cnt = 1; + } else { + /* already used more time previously */ + int spill = expect.query_cnt - time_adj; + if ( (spill < limiter->log_qps) + || (is_err && spill < limiter->log_err_qps) ) { + new.query_cnt = spill + 1; + } else { + return false; + } + } + } + } + while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(limiter->qps_limit, expect, new)); + + return true; +} + +#pragma GCC diagnostic pop \ No newline at end of file diff --git a/src/knot/common/qps_limiter.h b/src/knot/common/qps_limiter.h new file mode 100644 index 0000000000..4af7ac3499 --- /dev/null +++ b/src/knot/common/qps_limiter.h @@ -0,0 +1,42 @@ +#pragma once +#include "knot/include/atomic.h" + +#pragma pack(push, 1) +typedef struct qps_limit { + KNOT_ALIGN(8) uint32_t time; + uint32_t query_cnt; +} qps_limit_t; +#pragma pack(pop) + +typedef struct qps_limiter { + time_t start_time; + int log_qps; + int log_err_qps; + KNOT_ATOMIC qps_limit_t *qps_limit; +} qps_limiter_t; + +/*! + * \brief Initialize QPS Limiter object. + * + * \param limiter QPS Limiter object. + * + * \retval 0 if sucessfully initialized. + * \retval Other values to indicate error. + */ +int qps_limiter_init(qps_limiter_t *limiter); + +/*! + * \brief Cleans up QPS Limiter object. + * + * \param limiter QPS Limiter object. + */ +void qps_limiter_cleanup(qps_limiter_t *limiter); + +/*! + * \brief Checks whether the query is allowed and meets qps limits. + * + * \param limiter QPS Limiter object. + * \param time Time second value from real time clock. + * \param is_err Is the log required for error case. + */ +bool qps_limiter_is_allowed(qps_limiter_t *limiter, time_t time, bool is_err); diff --git a/src/knot/common/stats.c b/src/knot/common/stats.c index 48c51e2c9a..b9c925594c 100644 --- a/src/knot/common/stats.c +++ b/src/knot/common/stats.c @@ -25,6 +25,8 @@ #include "knot/common/log.h" #include "knot/nameserver/query_module.h" +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + struct { bool active_dumper; pthread_t dumper; @@ -51,6 +53,26 @@ uint64_t server_zone_count(server_t *server) return knot_zonedb_size(server->zone_db); } +const char *server_stat_names[] = { + "udp_received", + "udp_async_done", + "udp_no_req_obj", + "udp_req_batch_limited", + "tcp_accept", + "tcp_received", + "tcp_async_done", + "tcp_no_req_obj", + "tcp_multiple_req", +}; + +static uint64_t server_stat_counters[ARRAY_SIZE(server_stat_names)]; + +void server_stats_increment_counter(server_stats_counter_t counter, uint64_t value) +{ + assert(counter < server_stats_max); + ATOMIC_ADD(server_stat_counters[counter], value); +} + const stats_item_t server_stats[] = { { "zone-count", server_zone_count }, { 0 } @@ -181,6 +203,10 @@ static void dump_to_file(FILE *fd, server_t *server) DUMP_CTR(fd, 1, "%s", item->name, item->val(server)); } + for(int i = 0; i < ARRAY_SIZE(server_stat_names); i++) { + DUMP_CTR(fd, 1, "%s", server_stat_names[i], ATOMIC_GET(server_stat_counters[i])); + } + dump_ctx_t ctx = { .fd = fd, .query_modules = conf()->query_modules, @@ -267,6 +293,7 @@ static void *dumper(void *data) void stats_reconfigure(conf_t *conf, server_t *server) { + assert(server_stats_max == ARRAY_SIZE(server_stat_names)); // Ensure enum and names are setup consistently. if (conf == NULL || server == NULL) { return; } diff --git a/src/knot/common/stats.h b/src/knot/common/stats.h index bd6df6d226..f10eab2c31 100644 --- a/src/knot/common/stats.h +++ b/src/knot/common/stats.h @@ -37,6 +37,27 @@ typedef struct { */ extern const stats_item_t server_stats[]; +/*! + * \brief Statistics metrics item. + */ +typedef enum { + server_stats_udp_received, + server_stats_udp_async_done, + server_stats_udp_no_req_obj, + server_stats_udp_req_batch_limited, + server_stats_tcp_accept, + server_stats_tcp_received, + server_stats_tcp_async_done, + server_stats_tcp_no_req_obj, + server_stats_tcp_multiple_req, + server_stats_max, +} server_stats_counter_t; + +/*! + * \brief Increment the server stats. + */ +void server_stats_increment_counter(server_stats_counter_t counter, uint64_t value); + /*! * \brief Read out value of single counter summed across threads. */ diff --git a/src/knot/conf/base.c b/src/knot/conf/base.c index 967317c892..26a6218869 100644 --- a/src/knot/conf/base.c +++ b/src/knot/conf/base.c @@ -131,6 +131,12 @@ static void init_cache( static size_t running_tcp_threads; static size_t running_xdp_threads; static size_t running_bg_threads; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + static bool numa_enabled = false; + static size_t udp_async_reqs; + static size_t tcp_async_reqs; + static size_t xdp_async_reqs; +#endif if (first_init || reinit_cache) { running_tcp_reuseport = conf_get_bool(conf, C_SRV, C_TCP_REUSEPORT); @@ -142,6 +148,12 @@ static void init_cache( running_xdp_threads = conf_xdp_threads(conf); running_bg_threads = conf_bg_threads(conf); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + numa_enabled = conf_is_numa_enabled(conf); + udp_async_reqs = conf_udp_async_req(conf); + tcp_async_reqs = conf_tcp_async_req(conf); + xdp_async_reqs = conf_xdp_async_req(conf); +#endif first_init = false; } @@ -181,6 +193,13 @@ static void init_cache( conf->cache.srv_bg_threads = running_bg_threads; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + conf->cache.numa_enabled = numa_enabled; + conf->cache.udp_srv_async_reqs = udp_async_reqs; + conf->cache.tcp_srv_async_reqs = tcp_async_reqs; + conf->cache.xdp_srv_async_reqs = xdp_async_reqs; +#endif + conf->cache.srv_tcp_max_clients = conf_tcp_max_clients(conf); val = conf_get(conf, C_XDP, C_TCP_MAX_CLIENTS); @@ -210,6 +229,9 @@ static void init_cache( val = conf_get(conf, C_SRV, C_ANS_ROTATION); conf->cache.srv_ans_rotate = conf_bool(&val); + + val = conf_get(conf, C_SRV, C_DISABLE_ANY); + conf->cache.srv_disable_any = conf_bool(&val); } int conf_new( diff --git a/src/knot/conf/base.h b/src/knot/conf/base.h index 62b6ee22b8..81b3a497d2 100644 --- a/src/knot/conf/base.h +++ b/src/knot/conf/base.h @@ -137,10 +137,17 @@ typedef struct { uint32_t xdp_tcp_idle_reset; bool xdp_tcp; bool xdp_route_check; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + bool numa_enabled; + size_t udp_srv_async_reqs; + size_t tcp_srv_async_reqs; + size_t xdp_srv_async_reqs; +#endif int ctl_timeout; conf_val_t srv_nsid; bool srv_ecs; bool srv_ans_rotate; + bool srv_disable_any; } cache; /*! List of dynamically loaded modules. */ diff --git a/src/knot/conf/conf.c b/src/knot/conf/conf.c index 5c0f6c63aa..2c5560af9b 100644 --- a/src/knot/conf/conf.c +++ b/src/knot/conf/conf.c @@ -1345,6 +1345,65 @@ conf_remote_t conf_remote_txn( return out; } +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#define UDP_ASYNC_DEFAULT_REQ 1024 +#define TCP_ASYNC_DEFAULT_REQ 128 +#define XDP_ASYNC_DEFAULT_REQ 0 + +bool conf_is_numa_enabled_txn( + conf_t *conf, + knot_db_txn_t *txn) +{ + conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_ENABLE_NUMA); + return conf_bool(&val); +} + +size_t conf_udp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn) +{ + conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_UDP_ASYNC_REQS); + int64_t reqs = conf_int(&val); + if (reqs == YP_NIL) { + return UDP_ASYNC_DEFAULT_REQ; + } + + return reqs; +} + +size_t conf_tcp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn) +{ + conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_TCP_ASYNC_REQS); + int64_t reqs = conf_int(&val); + if (reqs == YP_NIL) { + return TCP_ASYNC_DEFAULT_REQ; + } + + return reqs; +} + +size_t conf_xdp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn) +{ + conf_val_t lisxdp_val = conf_get(conf, C_SRV, C_LISTEN_XDP); + if (lisxdp_val.code == KNOT_EOK) { + conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_XDP_ASYNC_REQS); + int64_t reqs = conf_int(&val); + if (reqs == YP_NIL) { + return XDP_ASYNC_DEFAULT_REQ; + } + + return reqs; + } else { + return 0; + } +} +#endif + + int conf_xdp_iface( struct sockaddr_storage *addr, conf_xdp_iface_t *iface) diff --git a/src/knot/conf/conf.h b/src/knot/conf/conf.h index 68463a10c6..471ab6328f 100644 --- a/src/knot/conf/conf.h +++ b/src/knot/conf/conf.h @@ -829,6 +829,82 @@ conf_remote_t conf_remote_txn( conf_val_t *id, size_t index ); + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * Gets the value of numa enabled. + * + * \param[in] conf Configuration. + * \param[in] txn Configuration DB transaction. + * + * \return true if numa is enabed. + */ +bool conf_is_numa_enabled_txn( + conf_t *conf, + knot_db_txn_t *txn +); + +static inline bool conf_is_numa_enabled( + conf_t *conf) +{ + return conf_is_numa_enabled_txn(conf, &conf->read_txn); +} + +/*! + * Gets the configured number of UDP async requests. + * + * \param[in] conf Configuration. + * \param[in] txn Configuration DB transaction. + * + * \return Number of threads. + */ +size_t conf_udp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn +); +static inline size_t conf_udp_async_req( + conf_t *conf) +{ + return conf_udp_async_req_txn(conf, &conf->read_txn); +} + +/*! + * Gets the configured number of TCP async requests. + * + * \param[in] conf Configuration. + * \param[in] txn Configuration DB transaction. + * + * \return Number of threads. + */ +size_t conf_tcp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn +); +static inline size_t conf_tcp_async_req( + conf_t *conf) +{ + return conf_tcp_async_req_txn(conf, &conf->read_txn); +} + +/*! + * Gets the configured number of XDP async requests. + * + * \param[in] conf Configuration. + * \param[in] txn Configuration DB transaction. + * + * \return Number of threads. + */ +size_t conf_xdp_async_req_txn( + conf_t *conf, + knot_db_txn_t *txn +); +static inline size_t conf_xdp_async_req( + conf_t *conf) +{ + return conf_xdp_async_req_txn(conf, &conf->read_txn); +} +#endif + static inline conf_remote_t conf_remote( conf_t *conf, conf_val_t *id, diff --git a/src/knot/conf/module.c b/src/knot/conf/module.c index f1c1f248f9..56446ab054 100644 --- a/src/knot/conf/module.c +++ b/src/knot/conf/module.c @@ -343,12 +343,13 @@ void conf_mod_unload_shared( else \ log_##level(LOG_ARGS(mod_id, msg), ##__VA_ARGS__); -void conf_activate_modules( +void conf_activate_given_module_conf( conf_t *conf, struct server *server, const knot_dname_t *zone_name, list_t *query_modules, - struct query_plan **query_plan) + struct query_plan **query_plan, + conf_val_t val) { int ret = KNOT_EOK; @@ -357,15 +358,6 @@ void conf_activate_modules( goto activate_error; } - conf_val_t val; - - // Get list of associated modules. - if (zone_name != NULL) { - val = conf_zone_get(conf, C_MODULE, zone_name); - } else { - val = conf_default_get(conf, C_GLOBAL_MODULE); - } - switch (val.code) { case KNOT_EOK: break; @@ -439,11 +431,42 @@ activate_error: CONF_LOG(LOG_ERR, "failed to activate modules (%s)", knot_strerror(ret)); } +void conf_activate_modules( + conf_t *conf, + struct server *server, + const knot_dname_t *zone_name, + list_t *query_modules, + struct query_plan **query_plan) +{ + + int ret = KNOT_EOK; + + if (conf == NULL || query_modules == NULL || query_plan == NULL) { + ret = KNOT_EINVAL; + goto activate_error; + } + + conf_val_t val; + + // Get list of associated modules. + if (zone_name != NULL) { + val = conf_zone_get(conf, C_MODULE, zone_name); + } else { + val = conf_default_get(conf, C_GLOBAL_MODULE); + } + + conf_activate_given_module_conf(conf, server, zone_name, query_modules, query_plan, val); + + return; +activate_error: + CONF_LOG(LOG_ERR, "failed to activate modules (%s)", knot_strerror(ret)); +} + void conf_deactivate_modules( list_t *query_modules, struct query_plan **query_plan) { - if (query_modules == NULL || query_plan == NULL) { + if (query_modules == NULL || query_plan == NULL || *query_plan == NULL) { return; } diff --git a/src/knot/conf/module.h b/src/knot/conf/module.h index 9a6accbb59..4c62be8c84 100644 --- a/src/knot/conf/module.h +++ b/src/knot/conf/module.h @@ -101,6 +101,24 @@ void conf_activate_modules( struct query_plan **query_plan ); +/*! + * Activates configured query modules for the specified zone or for all zones. + * + * \param[in] conf Configuration. + * \param[in] zone_name Zone name, NULL for all zones. + * \param[in] query_modules Destination query modules list. + * \param[in] query_plan Destination query plan. + * \param[in] val conf val to activate. + */ +void conf_activate_given_module_conf( + conf_t *conf, + struct server *server, + const knot_dname_t *zone_name, + list_t *query_modules, + struct query_plan **query_plan, + conf_val_t val +); + /*! * Deactivates query modules list. * diff --git a/src/knot/conf/schema.c b/src/knot/conf/schema.c index e2c3ddba1a..b230567798 100644 --- a/src/knot/conf/schema.c +++ b/src/knot/conf/schema.c @@ -222,6 +222,13 @@ static const yp_item_t desc_server[] = { { C_MAX_IPV6_UDP_PAYLOAD, YP_TINT, YP_VINT = { KNOT_EDNS_MIN_DNSSEC_PAYLOAD, KNOT_EDNS_MAX_UDP_PAYLOAD, 1232, YP_SSIZE } }, +#ifdef ENABLE_ASYNC_QUERY_HANDLING + { C_ENABLE_NUMA, YP_TBOOL, YP_VBOOL = {false}}, + { C_UDP_ASYNC_REQS, YP_TINT, YP_VINT = { 0, INT32_MAX, 4 * 1024 } }, + { C_TCP_ASYNC_REQS, YP_TINT, YP_VINT = { 0, INT32_MAX, 1 * 1024 } }, + { C_XDP_ASYNC_REQS, YP_TINT, YP_VINT = { 0, INT32_MAX, 2 * 1024 } }, +#endif + { C_DISABLE_ANY, YP_TBOOL, YP_VNONE }, { NULL } }; @@ -429,6 +436,8 @@ static const yp_item_t desc_template[] = { { C_ID, YP_TSTR, YP_VNONE, CONF_IO_FREF }, { C_GLOBAL_MODULE, YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt }, YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } }, + { C_AZURE_MODULE, YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt }, + YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } }, ZONE_ITEMS(CONF_IO_FRLD_ZONES) // Legacy items. { C_TIMER_DB, YP_TSTR, YP_VSTR = { "timers" }, CONF_IO_FRLD_SRV }, diff --git a/src/knot/conf/schema.h b/src/knot/conf/schema.h index ac590655a9..cd2e40ea0a 100644 --- a/src/knot/conf/schema.h +++ b/src/knot/conf/schema.h @@ -55,6 +55,7 @@ #define C_ECS "\x12""edns-client-subnet" #define C_FILE "\x04""file" #define C_GLOBAL_MODULE "\x0D""global-module" +#define C_AZURE_MODULE "\x0C""azure-module" #define C_ID "\x02""id" #define C_IDENT "\x08""identity" #define C_INCL "\x07""include" @@ -169,6 +170,13 @@ #define C_MAX_JOURNAL_DEPTH "\x11""max-journal-depth" #define C_MAX_JOURNAL_USAGE "\x11""max-journal-usage" +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#define C_ENABLE_NUMA "\x0b""enable-numa" +#define C_UDP_ASYNC_REQS "\x0d""udp-async-req" +#define C_TCP_ASYNC_REQS "\x0d""tcp-async-req" +#define C_XDP_ASYNC_REQS "\x0d""xdp-async-req" +#endif + enum { KEYSTORE_BACKEND_PEM = 1, KEYSTORE_BACKEND_PKCS11 = 2, diff --git a/src/knot/dnssec/rrset-sign.c b/src/knot/dnssec/rrset-sign.c index ccaa23c37a..0301c64094 100644 --- a/src/knot/dnssec/rrset-sign.c +++ b/src/knot/dnssec/rrset-sign.c @@ -207,7 +207,7 @@ int knot_sign_ctx_add_data(dnssec_sign_ctx_t *ctx, * * \return Error code, KNOT_EOK if successful. */ -static int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx, +int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx, const knot_rrset_t *covered, const dnssec_key_t *key, uint32_t sig_incepted, uint32_t sig_expires, diff --git a/src/knot/dnssec/rrset-sign.h b/src/knot/dnssec/rrset-sign.h index 7114cb184c..9d8c850b15 100644 --- a/src/knot/dnssec/rrset-sign.h +++ b/src/knot/dnssec/rrset-sign.h @@ -58,6 +58,27 @@ int knot_sign_rrset2(knot_rrset_t *rrsigs, zone_sign_ctx_t *sign_ctx, knot_mm_t *mm); +/*! + * \brief Create RRSIG RDATA. + * + * \param[in] rrsigs RR set with RRSIGS. + * \param[in] ctx DNSSEC signing context. + * \param[in] covered RR covered by the signature. + * \param[in] key Key used for signing. + * \param[in] sig_incepted Timestamp of signature inception. + * \param[in] sig_expires Timestamp of signature expiration. + * \param[in] sign_flags Signing flags. + * \param[in] mm Memory context. + * + * \return Error code, KNOT_EOK if successful. + */ +int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx, + const knot_rrset_t *covered, + const dnssec_key_t *key, + uint32_t sig_incepted, uint32_t sig_expires, + dnssec_sign_flags_t sign_flags, + knot_mm_t *mm); + /*! * \brief Add all data covered by signature into signing context. * diff --git a/src/knot/dnssec/zone-keys.h b/src/knot/dnssec/zone-keys.h index 80b200a6ab..15bc25e91f 100644 --- a/src/knot/dnssec/zone-keys.h +++ b/src/knot/dnssec/zone-keys.h @@ -44,6 +44,9 @@ typedef struct { bool is_ksk_active_plus; bool is_pub_only; bool is_revoked; + + int signature_validity_offset; + int signature_validity_period; } zone_key_t; knot_dynarray_declare(keyptr, zone_key_t *, DYNARRAY_VISIBILITY_NORMAL, 1) @@ -61,6 +64,7 @@ typedef struct { zone_key_t *keys; // keys in keyset dnssec_sign_ctx_t **sign_ctxs; // signing buffers for keys in keyset const kdnssec_ctx_t *dnssec_ctx; // dnssec context + zone_node_t *rrsig; //rrsig } zone_sign_ctx_t; /*! diff --git a/src/knot/include/atomic.h b/src/knot/include/atomic.h new file mode 100644 index 0000000000..d42c26cc07 --- /dev/null +++ b/src/knot/include/atomic.h @@ -0,0 +1,40 @@ +#include +#include +#include +#ifdef HAVE_STDATOMIC +#include +#endif +#ifdef HAVE_STDALIGN +#include +#define KNOT_ALIGN(align) alignas(align) +#else +#define KNOT_ALIGN(align) +#endif +#include +#include +#include + +#ifdef HAVE_STDATOMIC +#define KNOT_ATOMIC _Atomic +#define KNOT_ATOMIC_INIT(dst, src) atomic_init(&(dst), src) +#define KNOT_ATOMIC_GET(src, dst) (dst) = atomic_load(src) +#define KNOT_ATOMIC_GET_RELAXED(src, dst) (dst) = atomic_load_explicit(src, memory_order_relaxed) +#define KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(src, cmp, val) atomic_compare_exchange_weak(src, &(cmp), val) +#define KNOT_ATOMIC_COMPARE_EXCHANGE_STRONG(src, cmp, val) atomic_compare_exchange_strong(src, &(cmp), val) +#define KNOT_ATOMIC_GET_SUB(src, val) atomic_fetch_sub(src, val) +#define KNOT_ATOMIC_GET_ADD(src, val) atomic_fetch_add(src, val) +#else +#ifdef HAVE_ATOMIC +#define KNOT_ATOMIC volatile +#define KNOT_ATOMIC_INIT(dst, src) __atomic_store(&(dst), &(src), __ATOMIC_SEQ_CST) +#define KNOT_ATOMIC_GET(src, dst) __atomic_load(src, &(dst), __ATOMIC_CONSUME) +#define KNOT_ATOMIC_GET_RELAXED(src, dst) __atomic_load(src, &(dst), __ATOMIC_RELAXED) +#define KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(src, cmp, val) __atomic_compare_exchange(src, &(cmp), &(val), true, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME) +#define KNOT_ATOMIC_COMPARE_EXCHANGE_STRONG KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK +#define KNOT_ATOMIC_GET_SUB(src, val) __atomic_fetch_sub(src, val, __ATOMIC_ACQ_REL) +#define KNOT_ATOMIC_GET_ADD(src, val) __atomic_fetch_add(src, val, __ATOMIC_ACQ_REL) +#else +#error Need atomic or stdatomic support +#endif +#endif + diff --git a/src/knot/include/lqueue.h b/src/knot/include/lqueue.h new file mode 100644 index 0000000000..afff170162 --- /dev/null +++ b/src/knot/include/lqueue.h @@ -0,0 +1,101 @@ +#pragma once +#include "knot/include/atomic.h" + +#pragma pack(push, 1) +/*! + * \brief To maintain performance, size is setup to uint16_t to keep atomic operation to 8 byte. + * This allows the size of the queue to be restricted to 2^16 - 2. If larger size is needed, the type needs to be changed. + */ +#define KNOTD_LOCKLESS_QUEUE_COUNT_TYPE uint16_t + +/*! + * \brief Queue state. + */ +typedef struct knotd_lockless_queue_state { + KNOT_ALIGN(sizeof(KNOTD_LOCKLESS_QUEUE_COUNT_TYPE) * 4) + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE head; /*!< Head where insertion can be performed. */ + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE tail; /*!< Tail where removal can be performed. */ + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE head_reserved; /*!< Head reservation to insert. */ + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE unused; /*!< To ensure queue state size is in power of 2 for atomic operation. */ +} knotd_lockless_queue_state_t; + +/*! + * \brief The lockless queue structure. Allocate the queue by calling knotd_lockless_queue_create. + * + * The queue is implemented using circular queue. The only variation is insertion cannot be performed atomically for following reason. + * If an item need to be inserted at head+1 position and update the head to head+1, these two operation have to performed in two location. + * The object insertion needs to be performed on item[head+1], but update to head need to be performed in state. + * These two memory are located too far apart to be used in single atomic memory operation. + * Performing these operation independently will cause race condition. + * If we write to array before incrementing head, two threads can write to items array in head+1, say thread1 followed by thread2, but head could be incremented by thread1. + * But what is left in item[head+1] is from thread2. So thread1's object is lost when it completes, but thread2 object may be duplicated as it retries and inserts again. + * if we write to array after incrementing head, a pop operation might see the head != tail, and hence assume head has the data and consume it before it is initialized. + * + * To eliminate the race condition, create a reservation first by incrementing head_reserved. + * This guarantees that no one will use the insertion position other than the thread that reserved it. Also pop will not consider reserved areas, but only committed areas (i.e. head). + * After reserving and setting the memory, the head needs to be moved. But the current thread can not move the head to its reserved position for following reason. + * + * Lets say initial state is (head=1, tail=1, head_reserved=1). + * Thread 1 reserves position 2. (head=1, tail=1, head_reserved=2). + * Thread 2 reserves position 3. (head=1, tail=1, head_reserved=3). + * Thread 1 has not yet finished assigning value to array item at 2. If thread 2 completes assinging value to 3. + * At this stage, if the head was set to 3. The pop operation will assume that memory in array index 2 is valid, which is still not assigned. + * To overcome this issue, the threads are allowed to move the head to head_reserved only if head is one before reserved. + * With this logic, thread 2 will spin until head = 2. Only thread that can make head to 2 is thread 1. Thread 1 will increment head to 2 only after setting array object at index 2. + * This ensures the thread safety with added atomic operation and potential spin by a thread for another thread. + */ +typedef struct { + KNOT_ATOMIC knotd_lockless_queue_state_t state; + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size; + void* items[]; +} knotd_lockless_queue_t; +#pragma pack(pop) + +/*! + * \brief Create lockless queue structure. + * + * \param queue Queue to be initialized. + * \param size Size of the max number of objects in queue to be supported. This is limited to 2^16 - 2. + * + * \retval 0 if successful. + */ +int knotd_lockless_queue_create(knotd_lockless_queue_t **queue, KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size); + +/*! + * \brief Frees lockless queue structure. + * + * \param queue Queue previously created using call to knotd_lockless_queue_create. + * \param size Size of the max number of objects in queue to be supported. This is limited to 2^16 - 2. + * + * \retval 0 if successful. + */ +void knotd_lockless_queue_delete(knotd_lockless_queue_t *queue); + +/*! + * \brief Enqueue an object into a queue. + * + * \param queue Queue previously created using call to knotd_lockless_queue_create. + * \param item Item to be inserted. + * \param first On return, if it is true, the object inserted is the first item in the queue currently. + * + * \retval 0 if successful. + */ +int knotd_lockless_queue_enqueue(knotd_lockless_queue_t *queue, void *item, bool *first); + +/*! + * \brief Dequeues an object from queue. + * + * \param queue Queue previously created using call to knotd_lockless_queue_create. + * + * \retval Item retrieved from queue, NULL if no object found. + */ +void* knotd_lockless_queue_dequeue(knotd_lockless_queue_t *queue); + +/*! + * \brief Get the number of objects in the queue. + * + * \param queue Queue previously created using call to knotd_lockless_queue_create. + * + * \retval Number of objects in the queue. + */ +KNOTD_LOCKLESS_QUEUE_COUNT_TYPE knotd_lockless_queue_count(knotd_lockless_queue_t *queue); diff --git a/src/knot/include/lstack.h b/src/knot/include/lstack.h new file mode 100644 index 0000000000..a85f766adf --- /dev/null +++ b/src/knot/include/lstack.h @@ -0,0 +1,74 @@ +#pragma once +#include "knot/include/atomic.h" + +#pragma pack(push, 1) +/*! + * \brief A node object that can be inserted into stack. + */ +typedef struct knotd_lockless_stack_node { + struct knotd_lockless_stack_node *next; /*!< Pointer to next node in the stack. */ +} knotd_lockless_stack_node_t; + +/*! + * \brief The head of the stack linked list. + */ +typedef struct { + KNOT_ALIGN(16) + knotd_lockless_stack_node_t *next; /*!< Pointer to top/first node in the stack. */ + uint32_t count; /*!< Keeps track of number of elements in the stack. */ + uint32_t aba_cookie; /*!< Value used to determine if the stack was updated to ensure atomicity.*/ + /*!< To detect pop(item1), pop(item2), push(item1) causing issues with pop(item1) executing in parallel. */ +} knotd_lockless_stack_head_t; + +/*! + * \brief Lockless stack structure. Call knotd_lockless_stack_init to initialize before using this structure. + * The stack is implemented using linked list of nodes. The node is preallocated as part of the items that are allocated. + * So there is no size limit on number of objects that can be added in the stack. + * Also, push and pop operations wont fail as the required memory for list are already pre-allocated as part of the object being pushed. + */ +typedef struct { + KNOT_ATOMIC knotd_lockless_stack_head_t *head; +} knotd_lockless_stack_t; +#pragma pack(pop) + +/*! + * \brief Initialize lockless structure. + * + * \param stack Stack to be initialized. + * + * \retval 0 if successful. + */ +int knotd_lockless_stack_init(knotd_lockless_stack_t *stack); + +/*! + * \brief Cleanup lockless structure. The members in the stack are not altered. + * + * \param stack Stack initialized using knotd_lockless_stack_init. + */ +void knotd_lockless_stack_cleanup(knotd_lockless_stack_t *stack); + +/*! + * \brief Push the node into the lockless stack. + * + * \param stack Stack initialized using knotd_lockless_stack_init. + * \param node Node to be inserted into stack. + */ +void knotd_lockless_stack_push(knotd_lockless_stack_t *stack, knotd_lockless_stack_node_t *node); + +/*! + * \brief Pop the node from the stack. + * + * \param stack Stack initialized using knotd_lockless_stack_init. + * + * \retval Node that is popped from stack, NULL if no nodes present. + */ +knotd_lockless_stack_node_t *knotd_lockless_stack_pop(knotd_lockless_stack_t *stack); + +/*! + * \brief Get the number of elements in the stack. + * + * \param stack Stack initialized using knotd_lockless_stack_init. + * + * \retval Count of objects in the stack. + */ +uint32_t knotd_lockless_stack_count(knotd_lockless_stack_t *stack); diff --git a/src/knot/include/modcounter.h b/src/knot/include/modcounter.h new file mode 100644 index 0000000000..1d51db2276 --- /dev/null +++ b/src/knot/include/modcounter.h @@ -0,0 +1,50 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// + +#pragma once +#include +#include "knot/include/module.h" + +//# Start : DONOT MAKE CHANGES HERE TO ADD COUNTERS +#define COMBINE_NAME(p1, p2) p1##_##p2 +#define COMMA_SEPARATED_P2(p1, p2) p2, +#define STRING_P2(p1, p2) #p2, +#define ARRAY_SIZE_P2_COMMA(p1, p2) ARRAY_SIZE(p2), +#define NO_CHANGE_P2(p1, p2) p2 + +#define CREATE_ENUM(ename, foreach) \ +typedef enum { \ + foreach(COMBINE_NAME, ename, COMMA_SEPARATED_P2, _) \ +} ename##_enum_t; + +#define CREATE_STR_ARR(ename, foreach) \ +static const char *str_map_##ename[] = { \ + foreach(STRING_P2, _, NO_CHANGE_P2, _) \ +}; + +#define CREATE_SUB_ENUM(ename, foreach) CREATE_ENUM(ename, foreach) + +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + +#ifdef CREATE_COUNTER_DEFINITIONS +#define CREATE_COUNTERS(ename, foreach) \ + CREATE_ENUM(ename, foreach) \ + CREATE_STR_ARR(ename, foreach) +#define CREATE_DIMENSIONS(ename, foreach) \ + CREATE_ENUM(ename, foreach) \ + CREATE_STR_ARR(ename, foreach) \ +static char *to_str_function_##ename(uint32_t idx, uint32_t count) { assert(idx < ARRAY_SIZE(str_map_##ename)); return strdup(str_map_##ename[idx]); } +#define CREATE_NAME_MAP(name, foreach) \ +static const knotd_mod_idx_to_str_f name##_map_to_str[] = { \ + foreach(COMBINE_NAME, to_str_function, COMMA_SEPARATED_P2, _) \ +}; \ +static const int name##_dim_size[] = { \ + foreach(COMBINE_NAME, str_map, ARRAY_SIZE_P2_COMMA, _) \ +}; +#else +#define CREATE_COUNTERS(ename, foreach) CREATE_ENUM(ename, foreach) +#define CREATE_DIMENSIONS(ename, foreach) CREATE_ENUM(ename, foreach) +#define CREATE_NAME_MAP(name, foreach) +#endif +//# End of DONOT MAKE CHANGES HERE TO ADD COUNTERS \ No newline at end of file diff --git a/src/knot/include/module.h b/src/knot/include/module.h index ed5a06cd62..af9ba821d9 100644 --- a/src/knot/include/module.h +++ b/src/knot/include/module.h @@ -396,18 +396,29 @@ typedef enum { KNOTD_QUERY_FLAG_COOKIE = 1 << 3, /*!< Valid DNS Cookie indication. */ } knotd_query_flag_t; +typedef struct knotd_qdata_params knotd_qdata_params_t; +typedef int (*async_operation_completion_callback)(knotd_qdata_params_t *params); + /*! Query processing data context parameters. */ -typedef struct { +struct knotd_qdata_params { knotd_query_flag_t flags; /*!< Current query flgas. */ const struct sockaddr_storage *remote; /*!< Current remote address. */ + const struct sockaddr_storage *local; /*!< Current local address. */ int socket; /*!< Current network socket. */ unsigned thread_id; /*!< Current thread id. */ void *server; /*!< Server object private item. */ const struct knot_xdp_msg *xdp_msg; /*!< Possible XDP message context. */ -} knotd_qdata_params_t; + void *dns_req; /*!< Request this param belongs to. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING + async_operation_completion_callback async_completed_callback; /*!< handler for async operation completion at layer */ +#endif +}; + +typedef struct knotd_qdata knotd_qdata_t; +typedef int (*module_async_operation_completed)(knotd_qdata_t *query, int state); /*! Query processing data context. */ -typedef struct { +struct knotd_qdata { knot_pkt_t *query; /*!< Query to be solved. */ knotd_query_type_t type; /*!< Query packet type. */ const knot_dname_t *name; /*!< Currently processed name. */ @@ -424,7 +435,14 @@ typedef struct { knotd_qdata_params_t *params; /*!< Low-level processing parameters. */ struct knotd_qdata_extra *extra; /*!< Private items (process_query.h). */ -} knotd_qdata_t; + + struct timespec query_time; /*!< Time when the query was received. */ + void *state; /*!< State of the query processor. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING + module_async_operation_completed async_completed; /*!< handler for completinig the query async in module. */ + module_async_operation_completed async_in_completed; /*!< handler for completinig the query in async in module. */ +#endif +}; /*! * Gets the local (destination) address of the query. @@ -434,8 +452,7 @@ typedef struct { * * \return Local address or NULL if error. */ -const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata, - struct sockaddr_storage *buff); +const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata); /*! * Gets the remote (source) address of the query. @@ -471,11 +488,16 @@ typedef enum { KNOTD_STATE_DONE = 4, /*!< Finished. */ KNOTD_STATE_FAIL = 5, /*!< Error. */ KNOTD_STATE_FINAL = 6, /*!< Finished and finalized (QNAME, EDNS, TSIG). */ + KNOTD_STATE_ZONE_LOOKUPDONE = 7, /*!< Positive result for zone fetch */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING + KNOT_STATE_ASYNC = 100, //!< The request needs to be async handled. Value should match KNOT_LAYER_STATE_ASYNC. +#endif } knotd_state_t; /*! brief Internet query processing states. */ typedef enum { KNOTD_IN_STATE_BEGIN, /*!< Begin name resolution. */ + KNOTD_IN_STATE_LOOKUPDONE, /*!< Name lookup completed for the qname */ KNOTD_IN_STATE_NODATA, /*!< Positive result with NO data. */ KNOTD_IN_STATE_HIT, /*!< Positive result. */ KNOTD_IN_STATE_MISS, /*!< Negative result. */ @@ -483,12 +505,17 @@ typedef enum { KNOTD_IN_STATE_FOLLOW, /*!< Resolution not complete (CNAME/DNAME chain). */ KNOTD_IN_STATE_TRUNC, /*!< Finished, packet size limit encountered. */ KNOTD_IN_STATE_ERROR, /*!< Resolution failed. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING + KNOTD_IN_STATE_ASYNC = 100, /*!< The request needs to be async handled. */ +#endif } knotd_in_state_t; /*! Query module processing stages. */ typedef enum { KNOTD_STAGE_BEGIN = 0, /*!< Before query processing. */ + KNOTD_STAGE_ZONE_LOOKUP,/*!< Before zone lookup is done. */ KNOTD_STAGE_PREANSWER, /*!< Before section processing. */ + KNOTD_STAGE_NAME_LOOKUP,/*!< Before name lookup is done */ KNOTD_STAGE_ANSWER, /*!< Answer section processing. */ KNOTD_STAGE_AUTHORITY, /*!< Authority section processing. */ KNOTD_STAGE_ADDITIONAL, /*!< Additional section processing. */ diff --git a/src/knot/modules/delay/Makefile.inc b/src/knot/modules/delay/Makefile.inc new file mode 100644 index 0000000000..6f1ff2e83a --- /dev/null +++ b/src/knot/modules/delay/Makefile.inc @@ -0,0 +1,13 @@ +knot_modules_delay_la_SOURCES = knot/modules/delay/delay.c +EXTRA_DIST += knot/modules/delay/delay.rst + +if STATIC_MODULE_delay +libknotd_la_SOURCES += $(knot_modules_delay_la_SOURCES) +endif + +if SHARED_MODULE_delay +knot_modules_delay_la_LDFLAGS = $(KNOTD_MOD_LDFLAGS) +knot_modules_delay_la_CPPFLAGS = $(KNOTD_MOD_CPPFLAGS) $(DELAY_CFLAGS) +knot_modules_delay_la_LIBADD = $(DELAY_LIBS) +pkglib_LTLIBRARIES += knot/modules/delay.la +endif \ No newline at end of file diff --git a/src/knot/modules/delay/delay.c b/src/knot/modules/delay/delay.c new file mode 100644 index 0000000000..29a41f8c61 --- /dev/null +++ b/src/knot/modules/delay/delay.c @@ -0,0 +1,240 @@ +#include +#include "contrib/time.h" +#include "knot/include/module.h" +#include "contrib/mempattern.h" +#include "knot/query/layer.h" +#include +#include "knot/include/lqueue.h" +#include + +/* This module is for demonstation of async mode */ + +#define MOD_DELAY "\x05""delay" +#define MOD_THREADS "\x07""threads" +#define MOD_ALL "\x03""all" +#define MOD_ID "\x08""identity" + +const yp_item_t delay_conf[] = { + { MOD_DELAY, YP_TINT, YP_VINT = { 1, INT32_MAX, 10} }, + { MOD_THREADS, YP_TINT, YP_VINT = { 1, INT8_MAX, 4} }, + { MOD_ALL, YP_TBOOL, YP_VBOOL = { false } }, + { MOD_ID, YP_TSTR, YP_VNONE }, + { NULL } +}; + +typedef struct queue_node { + struct timespec wake_time; + knotd_qdata_t *qdata; + bool is_in_state; + int return_state; +} queue_node_t; + +typedef struct { + char id[64]; + int delay_ms; + bool exit; + int thread_count; + knotd_lockless_queue_t *queue; + pthread_t dispatch_thread[]; +} delay_ctx_t; + +int delay_conf_check(knotd_conf_check_args_t *args) +{ + return KNOT_EOK; +} + +static int delay_query(knotd_qdata_t *qdata, int return_state, bool is_in_state, knotd_mod_t *mod) +{ + struct timespec curr_time; + int ret = KNOT_EOK; + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) == 0) { + delay_ctx_t *ctx = knotd_mod_ctx(mod); + queue_node_t *node = mm_alloc(qdata->mm, sizeof(*node)); + if (node == NULL) { + ret = KNOT_ENOMEM; + } + else { + long val = curr_time.tv_nsec + ctx->delay_ms * 1000 * 1000; + node->wake_time.tv_nsec = val % (1000 * 1000 * 1000); + node->wake_time.tv_sec = curr_time.tv_sec + val / (1000 * 1000 * 1000); + node->qdata = qdata; + node->is_in_state = is_in_state; + node->return_state = return_state; + + bool first; + if ((ret = knotd_lockless_queue_enqueue(ctx->queue, node, &first)) != KNOT_EOK) { + mm_free(qdata->mm, node); + ret = KNOT_ESYSTEM; + } + } + } else { + ret = KNOT_ESYSTEM; + } + + return ret; +} + +static knotd_state_t delay_message(knotd_state_t state, knot_pkt_t *pkt, + knotd_qdata_t *qdata, knotd_mod_t *mod) +{ + return delay_query(qdata, state, false, mod) == KNOT_EOK ? KNOT_STATE_ASYNC : state; +} + +static knotd_in_state_t delay_message_in(knotd_in_state_t state, knot_pkt_t *pkt, + knotd_qdata_t *qdata, knotd_mod_t *mod) +{ + return delay_query(qdata, state, true, mod) == KNOT_EOK ? KNOTD_IN_STATE_ASYNC : state; +} + +static bool check_time(queue_node_t *node, struct timespec *delay_time) { + delay_time->tv_sec = 0; + delay_time->tv_nsec = 1000 * 1000; + struct timespec curr_time; + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) == 0) { + if (curr_time.tv_sec > node->wake_time.tv_sec + || (curr_time.tv_sec == node->wake_time.tv_sec + && curr_time.tv_nsec >= node->wake_time.tv_nsec)) { + return true; + } + + delay_time->tv_nsec = node->wake_time.tv_nsec - curr_time.tv_nsec; + delay_time->tv_sec = node->wake_time.tv_sec - curr_time.tv_sec; + if (node->wake_time.tv_nsec < curr_time.tv_nsec) { + delay_time->tv_nsec += 1000 * 1000 * 1000; + delay_time->tv_sec -= 1; + } + } + + return false; +} + +static struct timespec dispatch_queue(delay_ctx_t *ctx, bool all) +{ + queue_node_t *node; + struct timespec sleep_time = {0, 1000 * 1000}; + while (true) { + node = knotd_lockless_queue_dequeue(ctx->queue); + + if (!node) { + break; + } + + /* Should be the last call on the object or memory. + * Further access to node, qdata or mm will result in race condition as network thread can processs and cleanup */ + if (node->is_in_state) { + node->qdata->async_in_completed(node->qdata, node->return_state); + } else { + node->qdata->async_completed(node->qdata, node->return_state); + } + } + + return sleep_time; +} + +static void *dispatch_thread(void *d) +{ + delay_ctx_t *ctx = d; + while (!ctx->exit) { + queue_node_t *node = knotd_lockless_queue_dequeue(ctx->queue); + + if (node == NULL) { + struct timespec tenth_ms = { 0, 100000}; + nanosleep(&tenth_ms, &tenth_ms); + continue; + } else { + struct timespec delay_time; + if (!check_time(node, &delay_time)) { + nanosleep(&delay_time, &delay_time); + } + } + + /* Should be the last call on the object or memory. + * Further access to node, qdata or mm will result in race condition as network thread can processs and cleanup */ + if (node->is_in_state) { + node->qdata->async_in_completed(node->qdata, node->return_state); + } else { + node->qdata->async_completed(node->qdata, node->return_state); + } + } + + return d; +} + +int delay_load(knotd_mod_t *mod) +{ + knotd_conf_t threads = knotd_conf_mod(mod, MOD_THREADS); + /* Create delay context. */ + delay_ctx_t *ctx = calloc(1, sizeof(*ctx) + sizeof(pthread_t) * threads.single.integer); + if (ctx == NULL) { + return KNOT_ENOMEM; + } + + int rc; + if ((rc = knotd_lockless_queue_create(&ctx->queue, 16 * 1024))) { + free(ctx); + return rc; + } + + for (int i = 0; i < threads.single.integer; i++) { + if(pthread_create(&ctx->dispatch_thread[i], NULL, dispatch_thread, ctx) != 0) + { + ctx->exit = 1; + for (int j = 0; j < i; j++) { + void *retval; + pthread_join(ctx->dispatch_thread[j], &retval); + } + knotd_lockless_queue_delete(ctx->queue); + free(ctx); + return KNOT_ESYSTEM; + } + ctx->thread_count++; + } + + /* Set delay. */ + knotd_conf_t conf = knotd_conf_mod(mod, MOD_DELAY); + ctx->delay_ms = conf.single.integer; + + /* Set id. */ + conf = knotd_conf_mod(mod, MOD_ID); + if (conf.single.string) { + strncpy(ctx->id, conf.single.string, sizeof(ctx->id)); + ctx->id[sizeof(ctx->id) - 1] = '\0'; + } + + /* Set scope. */ + conf = knotd_conf_mod(mod, MOD_ALL); + bool all = conf.single.boolean; + + knotd_mod_ctx_set(mod, ctx); + + knotd_mod_hook(mod, KNOTD_STAGE_END, delay_message); + if (all) { + knotd_mod_hook(mod, KNOTD_STAGE_BEGIN, delay_message); + knotd_mod_in_hook(mod, KNOTD_STAGE_NAME_LOOKUP, delay_message_in); + knotd_mod_in_hook(mod, KNOTD_STAGE_PREANSWER, delay_message_in); + knotd_mod_in_hook(mod, KNOTD_STAGE_ANSWER, delay_message_in); + knotd_mod_in_hook(mod, KNOTD_STAGE_AUTHORITY, delay_message_in); + knotd_mod_in_hook(mod, KNOTD_STAGE_ADDITIONAL, delay_message_in); + } + + return KNOT_EOK; +} + +void delay_unload(knotd_mod_t *mod) +{ + delay_ctx_t *ctx = knotd_mod_ctx(mod); + if (ctx) { + ctx->exit = true; + void *retval; + for (int i = 0; i < ctx->thread_count; i++) { + pthread_join(ctx->dispatch_thread[i], &retval); + } + knotd_mod_ctx_set(mod, NULL); + dispatch_queue(ctx, true); + knotd_lockless_queue_delete(ctx->queue); + free(ctx); + } +} + +KNOTD_MOD_API(delay, KNOTD_MOD_FLAG_SCOPE_ANY, + delay_load, delay_unload, delay_conf, delay_conf_check); diff --git a/src/knot/modules/delay/delay.rst b/src/knot/modules/delay/delay.rst new file mode 100644 index 0000000000..39954b5043 --- /dev/null +++ b/src/knot/modules/delay/delay.rst @@ -0,0 +1,50 @@ +.. _mod-delay: + +``delay`` – delay the query response +==================================== + +A module for delaying response to a query. + +Example +------- + + mod-delay: + - id: delay_10ms + delay: 10ms + + template: + - id: default + global-module: mod-delay/delay_10ms + +The above configuration delays the response by 10ms. + +Module reference +---------------- + +For delaying query response, use this module. + +:: + + mod-delay: + - id: STR + delay: INT + all: BOOL + +id +.. + +A module identifier. + +.. _mod-dnstap_sink: + +delay +.... + +Number of ms to delay the module call. +*Required* + +all +........ + +The value indicates if all module call needs to be delayed or final response alone needs to be delayed. +If all module calls are delayed, then total time the query is delayed will be number of module hooks available * delay time. \ No newline at end of file diff --git a/src/knot/modules/dnstap/Makefile.inc b/src/knot/modules/dnstap/Makefile.inc index e69b56c7b9..d1536ae1e8 100644 --- a/src/knot/modules/dnstap/Makefile.inc +++ b/src/knot/modules/dnstap/Makefile.inc @@ -1,4 +1,7 @@ -knot_modules_dnstap_la_SOURCES = knot/modules/dnstap/dnstap.c +knot_modules_dnstap_la_SOURCES = knot/modules/dnstap/dnstap.c \ + knot/modules/dnstap/dnstapcounter.c \ + knot/modules/dnstap/dnstapcounter.h + EXTRA_DIST += knot/modules/dnstap/dnstap.rst if STATIC_MODULE_dnstap diff --git a/src/knot/modules/dnstap/dnstap.c b/src/knot/modules/dnstap/dnstap.c index b74cc5274a..2bf62664dc 100644 --- a/src/knot/modules/dnstap/dnstap.c +++ b/src/knot/modules/dnstap/dnstap.c @@ -17,18 +17,27 @@ #include #include +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS +#include "knot/common/qps_limiter.h" +#endif #include "contrib/dnstap/dnstap.h" #include "contrib/dnstap/dnstap.pb-c.h" #include "contrib/dnstap/message.h" #include "contrib/dnstap/writer.h" #include "contrib/time.h" #include "knot/include/module.h" +#include "dnstapcounter.h" #define MOD_SINK "\x04""sink" #define MOD_IDENTITY "\x08""identity" #define MOD_VERSION "\x07""version" #define MOD_QUERIES "\x0B""log-queries" #define MOD_RESPONSES "\x0D""log-responses" +#define MOD_COMBINED "\x0F""query-with-resp" +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS +#define MOD_QPS_LIMIT "\x09""qps-limit" +#define MOD_ERR_LIMIT "\x09""err-limit" +#endif const yp_item_t dnstap_conf[] = { { MOD_SINK, YP_TSTR, YP_VNONE }, @@ -36,6 +45,11 @@ const yp_item_t dnstap_conf[] = { { MOD_VERSION, YP_TSTR, YP_VNONE }, { MOD_QUERIES, YP_TBOOL, YP_VBOOL = { true } }, { MOD_RESPONSES, YP_TBOOL, YP_VBOOL = { true } }, + { MOD_COMBINED, YP_TBOOL, YP_VBOOL = { false } }, +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + { MOD_QPS_LIMIT, YP_TINT, YP_VINT = { 0, INT32_MAX, 0 } }, + { MOD_ERR_LIMIT, YP_TINT, YP_VINT = { 0, INT32_MAX, 0 } }, +#endif { NULL } }; @@ -56,10 +70,14 @@ typedef struct { size_t identity_len; char *version; size_t version_len; + bool log_query_with_resp; +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + qps_limiter_t qps_limiter; +#endif } dnstap_ctx_t; static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt, - knotd_qdata_t *qdata, knotd_mod_t *mod) + knotd_qdata_t *qdata, knotd_mod_t *mod, struct timespec *tv) { assert(pkt && qdata && mod); @@ -73,14 +91,17 @@ static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt, struct fstrm_iothr_queue *ioq = fstrm_iothr_get_input_queue_idx(ctx->iothread, qdata->params->thread_id); - /* Unless we want to measure the time it takes to process each query, - * we can treat Q/R times the same. */ - struct timespec tv = { .tv_sec = time(NULL) }; - + void *wire2 = NULL; + size_t len_wire2 = 0; /* Determine query / response. */ Dnstap__Message__Type msgtype = DNSTAP__MESSAGE__TYPE__AUTH_QUERY; if (knot_wire_get_qr(pkt->wire)) { msgtype = DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE; + + if (ctx->log_query_with_resp) { + wire2 = qdata->query->wire; + len_wire2 = qdata->query->size; + } } /* Determine whether we run on UDP/TCP. */ @@ -90,12 +111,11 @@ static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt, } /* Create a dnstap message. */ - struct sockaddr_storage buff; Dnstap__Message msg; int ret = dt_message_fill(&msg, msgtype, (const struct sockaddr *)knotd_qdata_remote_addr(qdata), - (const struct sockaddr *)knotd_qdata_local_addr(qdata, &buff), - protocol, pkt->wire, pkt->size, &tv); + (const struct sockaddr *)knotd_qdata_local_addr(qdata), + protocol, pkt->wire, pkt->size, tv, wire2, len_wire2, &qdata->query_time); if (ret != KNOT_EOK) { return state; } @@ -140,16 +160,98 @@ static knotd_state_t dnstap_message_log_query(knotd_state_t state, knot_pkt_t *p knotd_qdata_t *qdata, knotd_mod_t *mod) { assert(qdata); + struct timespec tv; + clock_gettime(CLOCK_REALTIME_COARSE, &tv); + + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_emitted, + log_emitted_QUERY, + 1); - return log_message(state, qdata->query, qdata, mod); + return log_message(state, qdata->query, qdata, mod, &tv); } /*! \brief Submit message - response. */ static knotd_state_t dnstap_message_log_response(knotd_state_t state, knot_pkt_t *pkt, knotd_qdata_t *qdata, knotd_mod_t *mod) { - return log_message(state, pkt, qdata, mod); + struct timespec tv; + clock_gettime(CLOCK_REALTIME_COARSE, &tv); + + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_emitted, + log_emitted_RESPONSE, + 1); + + return log_message(state, pkt, qdata, mod, &tv); +} + + +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS +/*! \brief Submit message - query. */ +static knotd_state_t dnstap_message_log_query_limit(knotd_state_t state, knot_pkt_t *pkt, + knotd_qdata_t *qdata, knotd_mod_t *mod) +{ + struct timespec tv; + clock_gettime(CLOCK_REALTIME_COARSE, &tv); + + dnstap_ctx_t *ctx = knotd_mod_ctx(mod); + if (qps_limiter_is_allowed(&ctx->qps_limiter, tv.tv_sec, false)) { + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_emitted, + log_emitted_QUERY, + 1); + + return log_message(state, qdata->query, qdata, mod, &tv); + } else { + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_dropped, + log_dropped_QUERY, + 1); + + return state; + } +} + +/*! \brief Submit message - response. */ +static knotd_state_t dnstap_message_log_response_limit(knotd_state_t state, knot_pkt_t *pkt, + knotd_qdata_t *qdata, knotd_mod_t *mod) +{ + struct timespec tv; + clock_gettime(CLOCK_REALTIME_COARSE, &tv); + + bool err = KNOT_RCODE_SERVFAIL == knot_wire_get_rcode(pkt->wire); + + dnstap_ctx_t *ctx = knotd_mod_ctx(mod); + if (qps_limiter_is_allowed(&ctx->qps_limiter, tv.tv_sec, err)) { + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_emitted, + log_emitted_RESPONSE, + 1); + + return log_message(state, pkt, qdata, mod, &tv); + } else { + knotd_mod_stats_incr( + mod, + qdata->params->thread_id, + dnstap_counter_log_dropped, + log_dropped_RESPONSE, + 1); + + return state; + } } +#endif /*! \brief Create a UNIX socket sink. */ static struct fstrm_writer* dnstap_unix_writer(const char *path) @@ -261,6 +363,28 @@ int dnstap_load(knotd_mod_t *mod) conf = knotd_conf_mod(mod, MOD_RESPONSES); const bool log_responses = conf.single.boolean; +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + /* Get QPS_limit. */ + conf = knotd_conf_mod(mod, MOD_QPS_LIMIT); + ctx->qps_limiter.log_qps = conf.single.integer; + + /* Get Err QPS_limit. */ + conf = knotd_conf_mod(mod, MOD_ERR_LIMIT); + ctx->qps_limiter.log_err_qps = conf.single.integer; + + /* Get Log query with resp. */ + conf = knotd_conf_mod(mod, MOD_COMBINED); + ctx->log_query_with_resp = conf.single.boolean; + + bool limit_by_qps = ctx->qps_limiter.log_qps || ctx->qps_limiter.log_err_qps; + + if (limit_by_qps) { + if (qps_limiter_init(&ctx->qps_limiter) != KNOT_EOK) { + goto fail; + } + } +#endif + /* Initialize the writer and the options. */ struct fstrm_writer *writer = dnstap_writer(sink); if (writer == NULL) { @@ -288,16 +412,31 @@ int dnstap_load(knotd_mod_t *mod) /* Hook to the query plan. */ if (log_queries) { - knotd_mod_hook(mod, KNOTD_STAGE_BEGIN, dnstap_message_log_query); + knotd_mod_hook(mod, KNOTD_STAGE_BEGIN, +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + limit_by_qps ? dnstap_message_log_query_limit : +#endif + dnstap_message_log_query); } if (log_responses) { - knotd_mod_hook(mod, KNOTD_STAGE_END, dnstap_message_log_response); + knotd_mod_hook(mod, KNOTD_STAGE_END, +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + limit_by_qps ? dnstap_message_log_response_limit : +#endif + dnstap_message_log_response); } + if (KNOT_EOK != dnstap_create_counters(mod)) { + goto fail; + } + return KNOT_EOK; fail: knotd_mod_log(mod, LOG_ERR, "failed to init sink '%s'", sink); +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + qps_limiter_cleanup(&ctx->qps_limiter); +#endif free(ctx->identity); free(ctx->version); free(ctx); @@ -308,8 +447,12 @@ fail: void dnstap_unload(knotd_mod_t *mod) { dnstap_ctx_t *ctx = knotd_mod_ctx(mod); + dnstap_delete_counters(mod); fstrm_iothr_destroy(&ctx->iothread); +#ifdef ENABLE_THROTTLE_DNSTAP_LOGS + qps_limiter_cleanup(&ctx->qps_limiter); +#endif free(ctx->identity); free(ctx->version); free(ctx); diff --git a/src/knot/modules/dnstap/dnstap.rst b/src/knot/modules/dnstap/dnstap.rst index 02071dc156..5a2668cea4 100644 --- a/src/knot/modules/dnstap/dnstap.rst +++ b/src/knot/modules/dnstap/dnstap.rst @@ -102,3 +102,32 @@ log-responses If enabled, response messages will be logged. *Default:* on + +qps-limit +......... +If set to non-zero, the server will log maximum of qps-limit queries per second and drop other logs. +This is a token bucket approach of how many QPS will be logged every second. Any unused tokens will expire at the end of the second. + +*Default:* 0 + +err-limit +......... +If set to non-zero, the server will log errors upto the err-limit. qps-limit applies to the error logs as well. +But, error queries are allowed to consume additional tokens from future seconds upto err-limit. +If more error queries are logged, that reduces the number of tokens available for regular queries in future without changing logging qps. +In the worst case, if there are too many errors, every second releases qps-limit tokens and only consumed by error queries. + +Ex, if qps-limit is 10 and err-limit is 100, after first 10 successful queries are logged, success queries on that second are ignored. +During that second, if failures happen, 90 more failure queries are logged. At the beginning of next second, the available token to normal queries becomes -90 + 10 = -80. +In this case, error has consumed the qps token for next 9 seconds and no success query logs will be added for next 9 seconds. +But, if there were errors during those seconds, it still has 10 tokens per second to consume on error side. So upto 10 errors can be logged during those seconds. +If nothing is logged for next 9 seconds, at the end of 9 seconds, the system resets to default limit of regular queries with 10 tokens, and errors with 100 tokens. +In the long run, errors and success can consume only qps-limit. But errors are prioritized and allowed to consume more tokens at the expense of success. + +*Default:* 0 + +query-with-resp +............... +If set to on, logs query packet along with response packet to reduce round trip and also to make analysis easier. + +*Default:* off \ No newline at end of file diff --git a/src/knot/modules/dnstap/dnstapcounter.c b/src/knot/modules/dnstap/dnstapcounter.c new file mode 100644 index 0000000000..3d6743ddd3 --- /dev/null +++ b/src/knot/modules/dnstap/dnstapcounter.c @@ -0,0 +1,20 @@ + +#define CREATE_COUNTER_DEFINITIONS +#include "dnstapcounter.h" + +int dnstap_create_counters(knotd_mod_t *mod) { + int rc = 0; + for(int i = 0; i < dnstap_counter_max; i++) { + rc = knotd_mod_stats_add(mod, str_map_dnstap_counter[i], dnstap_counter_dim_size[i], dnstap_counter_map_to_str[i]); + if (rc) { + break; + } + } + + return rc; +} + +void dnstap_delete_counters(knotd_mod_t *mod) { + // This API is not exposed in knot. So nothing to free at this stage. + // knotd_mod_stats_free(mod); +} \ No newline at end of file diff --git a/src/knot/modules/dnstap/dnstapcounter.h b/src/knot/modules/dnstap/dnstapcounter.h new file mode 100644 index 0000000000..44807f2f1c --- /dev/null +++ b/src/knot/modules/dnstap/dnstapcounter.h @@ -0,0 +1,42 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// + +#pragma once +#include "knot/include/modcounter.h" + +#define FOREACH_DNSTAP_COUNTER(OPS1, param1, OPS2, param2) \ + OPS2(param2, OPS1(param1, log_emitted)) \ + OPS2(param2, OPS1(param1, log_dropped)) \ + OPS2(param2, OPS1(param1, max)) + +#define FOREACH_LOG_EMITTED(OPS1, param1, OPS2, param2) \ + OPS2(param2, OPS1(param1, QUERY)) \ + OPS2(param2, OPS1(param1, RESPONSE)) + +#define FOREACH_LOG_DROPPED(OPS1, param1, OPS2, param2) \ + OPS2(param2, OPS1(param1, QUERY)) \ + OPS2(param2, OPS1(param1, RESPONSE)) + +#define FOREACH_MAX(OPS1, param1, OPS2, param2) OPS2(param2, OPS1(param1, max)) + +CREATE_COUNTERS(dnstap_counter, FOREACH_DNSTAP_COUNTER) +CREATE_DIMENSIONS(log_emitted, FOREACH_LOG_EMITTED) +CREATE_DIMENSIONS(log_dropped, FOREACH_LOG_DROPPED) +CREATE_DIMENSIONS(max, FOREACH_MAX) +CREATE_NAME_MAP(dnstap_counter, FOREACH_DNSTAP_COUNTER) + +/*! + * \brief Creates all counters for module. + * + * \param mod Module handle for the counters. + * + */ +int dnstap_create_counters(knotd_mod_t *mod); + +/*! + * \brief cleans up the counters for the module. + * + * \param mod Module handle for the counters. + */ +void dnstap_delete_counters(knotd_mod_t *mod); \ No newline at end of file diff --git a/src/knot/modules/probe/probe.c b/src/knot/modules/probe/probe.c index 963fc7c21e..af92a06c66 100644 --- a/src/knot/modules/probe/probe.c +++ b/src/knot/modules/probe/probe.c @@ -80,8 +80,7 @@ static knotd_state_t export(knotd_state_t state, knot_pkt_t *pkt, ATOMIC_SET(ctx->last_times[idx], now_ns); // Prepare data sources. - struct sockaddr_storage buff; - const struct sockaddr_storage *local = knotd_qdata_local_addr(qdata, &buff); + const struct sockaddr_storage *local = knotd_qdata_local_addr(qdata); const struct sockaddr_storage *remote = knotd_qdata_remote_addr(qdata); bool tcp = !(qdata->params->flags & KNOTD_QUERY_FLAG_LIMIT_SIZE); diff --git a/src/knot/modules/queryacl/queryacl.c b/src/knot/modules/queryacl/queryacl.c index e787083222..9711769426 100644 --- a/src/knot/modules/queryacl/queryacl.c +++ b/src/knot/modules/queryacl/queryacl.c @@ -52,8 +52,7 @@ static knotd_state_t queryacl_process(knotd_state_t state, knot_pkt_t *pkt, } if (ctx->allow_iface.count > 0) { - struct sockaddr_storage buff; - const struct sockaddr_storage *addr = knotd_qdata_local_addr(qdata, &buff); + const struct sockaddr_storage *addr = knotd_qdata_local_addr(qdata); if (!knotd_conf_addr_range_match(&ctx->allow_iface, addr)) { qdata->rcode = KNOT_RCODE_NOTAUTH; return KNOTD_STATE_FAIL; diff --git a/src/knot/modules/stats/stats.c b/src/knot/modules/stats/stats.c index cedbd144ce..5024e8d0b4 100644 --- a/src/knot/modules/stats/stats.c +++ b/src/knot/modules/stats/stats.c @@ -32,6 +32,8 @@ #define MOD_QTYPE "\x0A""query-type" #define MOD_QSIZE "\x0A""query-size" #define MOD_RSIZE "\x0A""reply-size" +#define MOD_NO_RESP "\x07""no-resp" +#define MOD_RESP_TIME "\x09""resp-time" #define OTHER "other" @@ -49,6 +51,8 @@ const yp_item_t stats_conf[] = { { MOD_QTYPE, YP_TBOOL, YP_VNONE }, { MOD_QSIZE, YP_TBOOL, YP_VNONE }, { MOD_RSIZE, YP_TBOOL, YP_VNONE }, + { MOD_NO_RESP, YP_TBOOL, YP_VNONE }, + { MOD_RESP_TIME, YP_TBOOL, YP_VBOOL = { true } }, { NULL } }; @@ -66,6 +70,8 @@ enum { CTR_QTYPE, CTR_QSIZE, CTR_RSIZE, + CTR_NO_RESP, + CTR_RESP_TIME, }; typedef struct { @@ -82,6 +88,8 @@ typedef struct { bool qtype; bool qsize; bool rsize; + bool no_resp; + bool resp_time; } stats_t; typedef struct { @@ -171,6 +179,21 @@ static char *resp_bytes_to_str(uint32_t idx, uint32_t count) } } +enum { + WITH_FAILURE, + WITHOUT_FAILURE, + NO_RESP_TYPE_COUNT +}; + +static char *no_resp_to_str(uint32_t idx, uint32_t count) +{ + switch (idx) { + case WITH_FAILURE: return strdup("failed"); + case WITHOUT_FAILURE: return strdup("success"); + default: assert(0); return NULL; + } +} + enum { EDNS_REQ = 0, EDNS_RESP, @@ -334,6 +357,29 @@ static char *rsize_to_str(uint32_t idx, uint32_t count) return size_to_str(idx, count); } +static uint32_t resp_time_bucket[] = { 0, 5, 10, 20, 50, 100, 150, 200, 250, 500, 750, 1000, 1300, 1500, 1800, 2000, 3000, 5000, 0xFFFFFFFF }; +static char *resp_time_to_str(uint32_t idx, uint32_t count) +{ + char str[64]; + if (idx < count - 1) { + snprintf(str, sizeof(str), "%u-%u", resp_time_bucket[idx], resp_time_bucket[idx+1]); + } else { + snprintf(str, sizeof(str), "%u-max", resp_time_bucket[idx]); + } + return strdup(str); +} + +static int resp_time_to_bucket_id(uint32_t resp_time) { + for (int i = 1; i < sizeof(resp_time_bucket)/sizeof(uint32_t); i++) + { + if (resp_time <= resp_time_bucket[i]) { + return i-1; + } + } + + return 0; +} + static const ctr_desc_t ctr_descs[] = { #define item(macro, name, count) \ [CTR_##macro] = { MOD_##macro, offsetof(stats_t, name), (count), name##_to_str } @@ -350,6 +396,8 @@ static const ctr_desc_t ctr_descs[] = { item(QTYPE, qtype, QTYPE__COUNT), item(QSIZE, qsize, QSIZE_MAX_IDX + 1), item(RSIZE, rsize, RSIZE_MAX_IDX + 1), + item(NO_RESP, no_resp, NO_RESP_TYPE_COUNT), + item(RESP_TIME, resp_time, ((sizeof(resp_time_bucket)/sizeof(uint32_t)) - 1)), { NULL } }; @@ -452,6 +500,27 @@ static knotd_state_t update_counters(knotd_state_t state, knot_pkt_t *pkt, knot_pkt_size(pkt)); break; } + } else if (stats->resp_bytes == 0) { + knotd_mod_stats_incr(mod, tid, CTR_NO_RESP, state != KNOTD_STATE_FAIL ? WITHOUT_FAILURE : WITH_FAILURE, + 1); + } + + struct timespec now; + if (clock_gettime(CLOCK_REALTIME_COARSE, &now) != -1) { + // Calculate resp time + uint32_t time = (now.tv_sec - qdata->query_time.tv_sec) * 1000; + time += (now.tv_nsec - qdata->query_time.tv_nsec) / 1000000; + if (now.tv_nsec < qdata->query_time.tv_nsec) { + // time -= 1000; This results in overflow. Commenting this will round up instead of rounding down. + if (now.tv_sec <= qdata->query_time.tv_sec) { + // now is smaller than query time + time = 0; + } + } + + int id = resp_time_to_bucket_id(time); + knotd_mod_stats_incr(mod, tid, CTR_RESP_TIME, id, + 1); } // Get the extended response code. diff --git a/src/knot/nameserver/internet.h b/src/knot/nameserver/internet.h index 52afe62414..ace2adb21e 100644 --- a/src/knot/nameserver/internet.h +++ b/src/knot/nameserver/internet.h @@ -20,7 +20,7 @@ #include "knot/include/module.h" #include "knot/nameserver/process_query.h" -/*! \brief Don't follow CNAME/DNAME chain beyond this depth. */ +/*! \brief Don't follow CNAME/DNAME chain beyond this depth of 5. */ #define CNAME_CHAIN_MAX 5 /*! diff --git a/src/knot/nameserver/lqueue.c b/src/knot/nameserver/lqueue.c new file mode 100644 index 0000000000..16f83b3f94 --- /dev/null +++ b/src/knot/nameserver/lqueue.c @@ -0,0 +1,91 @@ +#include +#include "knot/include/lqueue.h" +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Watomic-alignment" + +int knotd_lockless_queue_create(knotd_lockless_queue_t **queue, KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size) { + assert(size < 0xFFFFU); + size_t size_to_alloc = sizeof(knotd_lockless_queue_t) + ((size + 1) * sizeof(void*)); + if (posix_memalign( (void**)queue, 16, size_to_alloc) != 0) { + return ENOMEM; + } + + memset((void*)*queue, 0, sizeof(knotd_lockless_queue_t)); + (*queue)->size = size + 1; + return 0; +} + +void knotd_lockless_queue_delete(knotd_lockless_queue_t *queue) +{ + free(queue); +} + +int knotd_lockless_queue_enqueue(knotd_lockless_queue_t *queue, void *item, bool *first) +{ + // Make a reservation + knotd_lockless_queue_state_t state, target_state; + KNOTD_LOCKLESS_QUEUE_COUNT_TYPE prev_head, insert_pos; + + KNOT_ATOMIC_GET_RELAXED(&queue->state, state); + do + { + insert_pos = (state.head_reserved + 1) % queue->size; + if (insert_pos == state.tail) + { + return ENOMEM; // queue is full + } + + prev_head = state.head_reserved; + target_state = state; + target_state.head_reserved = insert_pos; + } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state)); + + // save the object in reserved position + queue->items[insert_pos] = item; + + // Commit the progress, only if all previous reservations have committed + do + { + KNOT_ATOMIC_GET_RELAXED(&queue->state, state); + } while (state.head != prev_head); // Prev reservation is not yet committed + + do + { + target_state = state; + target_state.head = insert_pos; + } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state)); + + *first = state.head == state.tail; + + return 0; +} + +void* knotd_lockless_queue_dequeue(knotd_lockless_queue_t *queue) { + knotd_lockless_queue_state_t state, target_state; + void *item; + + KNOT_ATOMIC_GET_RELAXED(&queue->state, state); + + do + { + if (state.head == state.tail) + { + return NULL; + } + + target_state = state; + target_state.tail = (target_state.tail + 1) % queue->size; + item = queue->items[target_state.tail]; + } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state)); + + return item; +} + +KNOTD_LOCKLESS_QUEUE_COUNT_TYPE knotd_lockless_queue_count(knotd_lockless_queue_t *queue) { + knotd_lockless_queue_state_t state; + KNOT_ATOMIC_GET_RELAXED(&queue->state, state); + + return (queue->size + state.head - state.tail) % queue->size; +} \ No newline at end of file diff --git a/src/knot/nameserver/lstack.c b/src/knot/nameserver/lstack.c new file mode 100644 index 0000000000..a9633150b5 --- /dev/null +++ b/src/knot/nameserver/lstack.c @@ -0,0 +1,68 @@ +#include +#include "knot/include/lstack.h" +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Watomic-alignment" + +int knotd_lockless_stack_init(knotd_lockless_stack_t *stack) { + if (posix_memalign( (void**)&stack->head, 16, sizeof(knotd_lockless_stack_head_t)) != 0) { + return ENOMEM; + } + + memset((void*)stack->head, 0, sizeof(knotd_lockless_stack_head_t)); + knotd_lockless_stack_head_t head = {0}; + KNOT_ATOMIC_INIT(stack->head[0], head); + return 0; +} + +void knotd_lockless_stack_cleanup(knotd_lockless_stack_t *stack) { + free(stack->head); + stack->head = NULL; +} + +void knotd_lockless_stack_push(knotd_lockless_stack_t *stack, knotd_lockless_stack_node_t *node) { + knotd_lockless_stack_head_t expect, new; + assert(node->next == NULL); + + KNOT_ATOMIC_GET(stack->head, expect); + do + { + node->next = expect.next; + new.next = node; + new.count = expect.count + 1; + new.aba_cookie = expect.aba_cookie + 1; + } + while(!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(stack->head, expect, new)); +} + +knotd_lockless_stack_node_t *knotd_lockless_stack_pop(knotd_lockless_stack_t *stack) { + knotd_lockless_stack_head_t expect, new; + + KNOT_ATOMIC_GET(stack->head, expect); + do + { + if (expect.next == NULL) + { + assert(expect.count == 0); + return NULL; + } + + new.next = expect.next->next; // DONOT free up stack nodes after pop, it can cause invalid memory access here. + new.count = expect.count - 1; + new.aba_cookie = expect.aba_cookie + 1; + } + while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(stack->head, expect, new)); + + expect.next->next = NULL; + return expect.next; +} + +uint32_t knotd_lockless_stack_count(knotd_lockless_stack_t *stack) +{ + knotd_lockless_stack_head_t expect; + KNOT_ATOMIC_GET_RELAXED(stack->head, expect); + return expect.count; +} + +#pragma GCC diagnostic pop diff --git a/src/knot/nameserver/nsec_proofs.c b/src/knot/nameserver/nsec_proofs.c index 93e47ffc21..49872efd33 100644 --- a/src/knot/nameserver/nsec_proofs.c +++ b/src/knot/nameserver/nsec_proofs.c @@ -637,7 +637,12 @@ int nsec_append_rrsigs(knot_pkt_t *pkt, knotd_qdata_t *qdata, bool optional) { int ret = KNOT_EOK; uint16_t flags = optional ? KNOT_PF_NOTRUNC : KNOT_PF_NULL; - flags |= KNOT_PF_FREE; // Free all RRSIGs, they are synthesized + if (qdata->extra->zone->sign_ctx == NULL) + { + // Free all RRSIGs, they are synthesized + // For online sign module, it will freed as part of azuredb module. + flags |= KNOT_PF_FREE; + } flags |= KNOT_PF_ORIGTTL; /* Append RRSIGs for section. */ diff --git a/src/knot/nameserver/process_query.c b/src/knot/nameserver/process_query.c index 1489dc01d6..8ad684c784 100644 --- a/src/knot/nameserver/process_query.c +++ b/src/knot/nameserver/process_query.c @@ -32,6 +32,7 @@ #include "libknot/libknot.h" #include "contrib/macros.h" #include "contrib/mempattern.h" +#include "knot/nameserver/query_state.h" /*! \brief Accessor to query-specific data. */ #define QUERY_DATA(ctx) ((knotd_qdata_t *)(ctx)->data) @@ -64,6 +65,8 @@ static void query_data_init(knot_layer_t *ctx, knotd_qdata_params_t *params, data->extra = extra; data->rcode_ede = KNOT_EDNS_EDE_NONE; + clock_gettime(CLOCK_REALTIME_COARSE, &data->query_time); + /* Initialize lists. */ memset(extra, 0, sizeof(*extra)); init_list(&extra->wildcards); @@ -181,8 +184,42 @@ static int query_chaos(knot_pkt_t *pkt, knot_layer_t *ctx) return KNOT_STATE_DONE; } +/*! + * \brief Lookup zone from plan if available, else fallback to regular DB lookup. + */ +static zone_t *lookup_zone(struct query_plan *plan, knotd_qdata_t *qdata, knot_pkt_t *query, + knot_zonedb_t *db, const knot_dname_t *zone_name, bool is_suffix_match) +{ + struct query_step *step; + int next_state = KNOT_STATE_PRODUCE; + qdata->extra->zone_lookup_params.is_suffix_match = is_suffix_match; + qdata->extra->zone_lookup_params.zone_name = zone_name; + + zone_t *static_zone_response = is_suffix_match ? knot_zonedb_find_suffix(db, zone_name) : knot_zonedb_find(db, zone_name); + if (static_zone_response != NULL) { + return static_zone_response; + } + + if (plan != NULL) { + WALK_LIST(step, plan->stage[KNOTD_STAGE_ZONE_LOOKUP]) { + next_state = step->process(next_state, query, qdata, step->ctx); + if (next_state == KNOT_STATE_FAIL) { + break; + } + } + } + + if (next_state == KNOTD_STATE_ZONE_LOOKUPDONE) + { + return (zone_t*) qdata->extra->zone; + } + + return NULL; +} + /*! \brief Find zone for given question. */ -static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zonedb) +static const zone_t *answer_zone_find(struct query_plan *plan, knotd_qdata_t *qdata, + knot_pkt_t *query, knot_zonedb_t *zonedb) { uint16_t qtype = knot_pkt_qtype(query); uint16_t qclass = knot_pkt_qclass(query); @@ -200,7 +237,7 @@ static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zo */ if (qtype == KNOT_RRTYPE_DS) { const knot_dname_t *parent = knot_wire_next_label(qname, NULL); - zone = knot_zonedb_find_suffix(zonedb, parent); + zone = lookup_zone(plan, qdata, query, zonedb, parent, true); /* If zone does not exist, search for its parent zone, this will later result to NODATA answer. */ /*! \note This is not 100% right, it may lead to DS name for example @@ -211,10 +248,10 @@ static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zo if (zone == NULL) { if (query_type(query) == KNOTD_QUERY_TYPE_NORMAL) { - zone = knot_zonedb_find_suffix(zonedb, qname); + zone = lookup_zone(plan, qdata, query, zonedb, qname, true); } else { // Direct match required. - zone = knot_zonedb_find(zonedb, qname); + zone = lookup_zone(plan, qdata, query, zonedb, qname, false); } } @@ -266,6 +303,32 @@ static int answer_edns_init(const knot_pkt_t *query, knot_pkt_t *resp, knot_edns_set_do(&qdata->opt_rr); } + /* Append Microsoft origin scope if requested. */ + uint8_t *ms_origin_scope_opt = knot_pkt_edns_option(query, KNOT_EDNS_MICROSOFT_ORIGIN_SCOPE_CODE); + if (ms_origin_scope_opt) { + const uint8_t *ms_origin_scope_data = knot_edns_opt_get_data(ms_origin_scope_opt); + uint16_t ms_origin_scope_len = knot_edns_opt_get_length(ms_origin_scope_opt); + ret = knot_edns_add_option(&qdata->opt_rr, + KNOT_EDNS_MICROSOFT_ORIGIN_SCOPE_CODE, + ms_origin_scope_len, + ms_origin_scope_data, + qdata->mm); + if (ret != KNOT_EOK) { + return ret; + } + + /* Append billing id information */ + const uint8_t data_tag_val[MAX_DATA_TAG_LEN] = {0}; + ret = knot_edns_add_option(&qdata->opt_rr, + KNOT_EDNS_OPTION_DATA_TAG_CODE, + sizeof(MAX_DATA_TAG_LEN)-1, + data_tag_val, + qdata->mm); + if (ret != KNOT_EOK) { + return ret; + } + } + /* Append NSID if requested and available. */ if (knot_pkt_edns_option(query, KNOT_EDNS_OPTION_NSID) != NULL) { conf_val_t *nsid = &conf()->cache.srv_nsid; @@ -383,9 +446,9 @@ static int answer_edns_put(knot_pkt_t *resp, knotd_qdata_t *qdata) } /*! \brief Initialize response, sizes and find zone from which we're going to answer. */ -static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx) +static int prepare_answer(struct query_plan *plan, knotd_qdata_t *qdata, knot_pkt_t *query, + knot_pkt_t *resp, knot_layer_t *ctx) { - knotd_qdata_t *qdata = QUERY_DATA(ctx); server_t *server = qdata->params->server; /* Initialize response. */ @@ -403,8 +466,9 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx /* Update maximal answer size. */ bool has_limit = qdata->params->flags & KNOTD_QUERY_FLAG_LIMIT_SIZE; + uint16_t resp_size = KNOT_WIRE_MAX_PKTSIZE; if (has_limit) { - resp->max_size = KNOT_WIRE_MIN_PKTSIZE; + resp_size = KNOT_WIRE_MIN_PKTSIZE; if (knot_pkt_has_edns(query)) { uint16_t server_size; switch (knotd_qdata_remote_addr(qdata)->ss_family) { @@ -419,11 +483,10 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx } uint16_t client_size = knot_edns_get_payload(query->opt_rr); uint16_t transfer = MIN(client_size, server_size); - resp->max_size = MAX(resp->max_size, transfer); + resp_size = MAX(resp_size, transfer); } - } else { - resp->max_size = KNOT_WIRE_MAX_PKTSIZE; } + resp->max_size = MIN(resp->max_size, resp_size); /* All supported OPCODEs require a question. */ const knot_dname_t *qname = knot_pkt_qname(query); @@ -447,7 +510,7 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx process_query_qname_case_lower(query); /* Find zone for QNAME. */ - qdata->extra->zone = answer_zone_find(query, server->zone_db); + qdata->extra->zone = answer_zone_find(plan, qdata, query, server->zone_db); if (qdata->extra->zone != NULL && qdata->extra->contents == NULL) { qdata->extra->contents = qdata->extra->zone->contents; } @@ -528,11 +591,19 @@ static int process_query_err(knot_layer_t *ctx, knot_pkt_t *pkt) return KNOT_STATE_DONE; } +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#define NON_CONTINUABLE_STATE(next_state) ((next_state) == KNOT_STATE_FAIL || (next_state) == KNOT_STATE_ASYNC) +#define BREAK_IF_ASYNC(next_state) if ((next_state) == KNOT_STATE_ASYNC) { break; } +#else +#define NON_CONTINUABLE_STATE(next_state) ((next_state) == KNOT_STATE_FAIL) +#define BREAK_IF_ASYNC(next_state) +#endif + #define PROCESS_BEGIN(plan, step, next_state, qdata) \ if (plan != NULL) { \ - WALK_LIST(step, plan->stage[KNOTD_STAGE_BEGIN]) { \ - next_state = step->process(next_state, pkt, qdata, step->ctx); \ - if (next_state == KNOT_STATE_FAIL) { \ + WALK_LIST_RESUME((step), plan->stage[KNOTD_STAGE_BEGIN]) { \ + next_state = (step)->process(next_state, pkt, qdata, (step)->ctx); \ + if (NON_CONTINUABLE_STATE(next_state)) { \ goto finish; \ } \ } \ @@ -540,14 +611,44 @@ static int process_query_err(knot_layer_t *ctx, knot_pkt_t *pkt) #define PROCESS_END(plan, step, next_state, qdata) \ if (plan != NULL) { \ - WALK_LIST(step, plan->stage[KNOTD_STAGE_END]) { \ - next_state = step->process(next_state, pkt, qdata, step->ctx); \ + WALK_LIST_RESUME((step), plan->stage[KNOTD_STAGE_END]) { \ + next_state = (step)->process(next_state, pkt, qdata, (step)->ctx); \ if (next_state == KNOT_STATE_FAIL) { \ next_state = process_query_err(ctx, pkt); \ } \ + BREAK_IF_ASYNC(next_state); \ } \ } +static void init_state_machine(state_machine_t *state) { + memset(state, 0, sizeof(*state)); + state->process_query_next_state = KNOT_STATE_PRODUCE; +} + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +static int complete_async_call(knotd_qdata_t *qdata) +{ + return qdata->params->async_completed_callback(qdata->params); +} + +static int async_operation_in_completed_callback(knotd_qdata_t *qdata, int state) +{ + assert(qdata->state); + state_machine_t *state_machine = qdata->state; + state_machine->process_query_next_state_in = state; + return complete_async_call(qdata); +} + +static int async_operation_completed_callback(knotd_qdata_t *qdata, int state) +{ + assert(qdata->state); + state_machine_t *state_machine = qdata->state; + state_machine->process_query_next_state = state; + + return complete_async_call(qdata); +} +#endif + static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt) { assert(pkt && ctx); @@ -557,9 +658,10 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt) knotd_qdata_t *qdata = QUERY_DATA(ctx); struct query_plan *plan = conf()->query_plan; struct query_plan *zone_plan = NULL; - struct query_step *step; - - int next_state = KNOT_STATE_PRODUCE; + state_machine_t *state = NULL; + struct query_step *step = NULL; + struct query_step **step_to_use = &step; + int next_state; /* Check parse state. */ knot_pkt_t *query = qdata->query; @@ -569,10 +671,48 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt) goto finish; } - /* Preprocessing. */ - if (prepare_answer(query, pkt, ctx) != KNOT_EOK) { - next_state = KNOT_STATE_FAIL; - goto finish; + state = qdata->state; + if (state == NULL) { + state = mm_alloc(ctx->mm, sizeof(*state)); + if (state == NULL) { + qdata->rcode = KNOT_RCODE_SERVFAIL; + next_state = KNOT_STATE_FAIL; + goto finish; + } + init_state_machine(state); + qdata->state = state; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + qdata->async_completed = async_operation_completed_callback; + qdata->async_in_completed = async_operation_in_completed_callback; +#endif + } + step_to_use = &state->step; + + next_state = state->process_query_next_state; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + assert(next_state != KNOT_STATE_ASYNC); /* at the beginning or resuming cant be in async */ +#endif + if (NON_CONTINUABLE_STATE(next_state)) { + if (state->process_query_state == PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN) + { + /* Async state and failurs are result of query_* methods + * Go to query_* and recover execution from there. */ + goto run_query; + } + else + { + goto finish; + } + } + + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER) { + /* Preprocessing. */ + if (prepare_answer(plan, qdata, query, pkt, ctx) != KNOT_EOK) { + next_state = KNOT_STATE_FAIL; + goto finish; + } + + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER); } if (qdata->extra->zone != NULL && qdata->extra->zone->query_plan != NULL) { @@ -580,75 +720,129 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt) } /* Before query processing code. */ - PROCESS_BEGIN(plan, step, next_state, qdata); - PROCESS_BEGIN(zone_plan, step, next_state, qdata); - - /* Answer based on qclass. */ - if (next_state == KNOT_STATE_PRODUCE) { - switch (knot_pkt_qclass(pkt)) { - case KNOT_CLASS_CH: - next_state = query_chaos(pkt, ctx); - break; - case KNOT_CLASS_ANY: - case KNOT_CLASS_IN: - next_state = query_internet(pkt, ctx); - break; - default: - qdata->rcode = KNOT_RCODE_REFUSED; - next_state = KNOT_STATE_FAIL; - break; - } + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_BEGIN) { + PROCESS_BEGIN(plan, *step_to_use, next_state, qdata); + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_BEGIN); } - /* Postprocessing. */ - if (next_state == KNOT_STATE_DONE || next_state == KNOT_STATE_PRODUCE) { - /* Restore original QNAME. */ - process_query_qname_case_restore(pkt, qdata); + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN) { + PROCESS_BEGIN(zone_plan, *step_to_use, next_state, qdata); + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN); + } - /* Move to Additionals to add OPT and TSIG. */ - if (pkt->current != KNOT_ADDITIONAL) { - (void)knot_pkt_begin(pkt, KNOT_ADDITIONAL); +run_query: + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_QUERY) { + /* Answer based on qclass. */ + if (next_state == KNOT_STATE_PRODUCE) { + switch (knot_pkt_qclass(pkt)) { + case KNOT_CLASS_CH: + next_state = query_chaos(pkt, ctx); + break; + case KNOT_CLASS_ANY: + case KNOT_CLASS_IN: + next_state = query_internet(pkt, ctx); + break; + default: + qdata->rcode = KNOT_RCODE_REFUSED; + next_state = KNOT_STATE_FAIL; + break; + } } + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_QUERY); + } - /* Put OPT RR to the additional section. */ - if (answer_edns_put(pkt, qdata) != KNOT_EOK) { - qdata->rcode = KNOT_RCODE_FORMERR; - next_state = KNOT_STATE_FAIL; - goto finish; - } + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_POST_QUERY) { + /* Postprocessing. */ + if (next_state == KNOT_STATE_DONE || next_state == KNOT_STATE_PRODUCE) { + /* Restore original QNAME. */ + process_query_qname_case_restore(pkt, qdata); - /* Transaction security (if applicable). */ - if (process_query_sign_response(pkt, qdata) != KNOT_EOK) { - next_state = KNOT_STATE_FAIL; - goto finish; + /* Move to Additionals to add OPT and TSIG. */ + if (pkt->current != KNOT_ADDITIONAL) { + (void)knot_pkt_begin(pkt, KNOT_ADDITIONAL); + } + + /* Put OPT RR to the additional section. */ + if (answer_edns_put(pkt, qdata) != KNOT_EOK) { + qdata->rcode = KNOT_RCODE_FORMERR; + next_state = KNOT_STATE_FAIL; + goto finish; + } + + /* Transaction security (if applicable). */ + if (process_query_sign_response(pkt, qdata) != KNOT_EOK) { + next_state = KNOT_STATE_FAIL; + goto finish; + } } + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_POST_QUERY); } finish: - switch (next_state) { - case KNOT_STATE_NOOP: - break; - case KNOT_STATE_FAIL: - /* Error processing. */ - next_state = process_query_err(ctx, pkt); - break; - case KNOT_STATE_FINAL: - /* Just skipped postprocessing. */ - next_state = KNOT_STATE_DONE; - break; - default: - set_rcode_to_packet(pkt, qdata); + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_HANDLE_ERROR) { + switch (next_state) { + case KNOT_STATE_NOOP: + break; + case KNOT_STATE_FAIL: + /* Error processing. */ + next_state = process_query_err(ctx, pkt); + break; + case KNOT_STATE_FINAL: + /* Just skipped postprocessing. */ + next_state = KNOT_STATE_DONE; + break; + default: + set_rcode_to_packet(pkt, qdata); + } + + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_HANDLE_ERROR); } /* After query processing code. */ - PROCESS_END(plan, step, next_state, qdata); - PROCESS_END(zone_plan, step, next_state, qdata); + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_END) { + PROCESS_END(plan, *step_to_use, next_state, qdata); + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_END); + } + + STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END) { + PROCESS_END(zone_plan, *step_to_use, next_state, qdata); + STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END); + } rcu_read_unlock(); + if (knot_layer_active_state(next_state)) { + /* + * Exiting the state machine with an active state will result in more produce calls which need to resume from beginning. + * Reset the state machine so it will execute all steps. + */ + if (state) { + init_state_machine(state); + } + } + return next_state; } +#ifdef ENABLE_ASYNC_QUERY_HANDLING +static int process_query_set_async_state(knot_layer_t *ctx, knot_pkt_t *pkt, int layer_state) +{ + assert(pkt && ctx); + knotd_qdata_t *qdata = QUERY_DATA(ctx); + if (qdata != NULL) { + state_machine_t *state = qdata->state; + if (state != NULL) { + state->process_query_next_state = layer_state; + if (layer_state == KNOT_STATE_FAIL) { + state->process_query_next_state_in = KNOTD_IN_STATE_ERROR; + } + } + } + + return layer_state; +} +#endif + bool process_query_acl_check(conf_t *conf, acl_action_t action, knotd_qdata_t *qdata) { @@ -944,6 +1138,9 @@ const knot_layer_api_t *process_query_layer(void) .finish = &process_query_finish, .consume = &process_query_in, .produce = &process_query_out, +#ifdef ENABLE_ASYNC_QUERY_HANDLING + .set_async_state = &process_query_set_async_state, +#endif }; return &api; } diff --git a/src/knot/nameserver/process_query.h b/src/knot/nameserver/process_query.h index 00e1790c03..c4bdfbc7ca 100644 --- a/src/knot/nameserver/process_query.h +++ b/src/knot/nameserver/process_query.h @@ -24,6 +24,9 @@ /* Query processing module implementation. */ const knot_layer_api_t *process_query_layer(void); +/*Note: temp length for data tag option*/ +#define MAX_DATA_TAG_LEN 25 + /*! \brief Query processing intermediate data. */ typedef struct knotd_qdata_extra { const zone_t *zone; /*!< Zone from which is answered. */ @@ -42,6 +45,16 @@ typedef struct knotd_qdata_extra { /* Extensions. */ void *ext; void (*ext_cleanup)(knotd_qdata_t *); /*!< Extensions cleanup callback. */ + + union + { + struct { + const knot_dname_t *zone_name; + bool is_suffix_match; + } zone_lookup_params; + }; + + int ext_result; /*!< Additional error code from module. Modules MUST return this value for : KNOTD_STAGE_NAME_LOOKUP */ } knotd_qdata_extra_t; /*! \brief Visited wildcard node list. */ diff --git a/src/knot/nameserver/query_module.c b/src/knot/nameserver/query_module.c index 0effefe524..e7a59f78a3 100644 --- a/src/knot/nameserver/query_module.c +++ b/src/knot/nameserver/query_module.c @@ -107,7 +107,7 @@ int query_plan_step(struct query_plan *plan, knotd_stage_t stage, _public_ int knotd_mod_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_hook_f hook) { - if (stage != KNOTD_STAGE_BEGIN && stage != KNOTD_STAGE_END) { + if (stage != KNOTD_STAGE_BEGIN && stage != KNOTD_STAGE_END && stage != KNOTD_STAGE_ZONE_LOOKUP) { return KNOT_EINVAL; } @@ -117,7 +117,7 @@ int knotd_mod_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_hook_f hook) _public_ int knotd_mod_in_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_in_hook_f hook) { - if (stage == KNOTD_STAGE_BEGIN || stage == KNOTD_STAGE_END) { + if (stage == KNOTD_STAGE_BEGIN || stage == KNOTD_STAGE_END || stage == KNOTD_STAGE_ZONE_LOOKUP) { return KNOT_EINVAL; } @@ -618,47 +618,31 @@ void knotd_conf_free(knotd_conf_t *conf) } _public_ -const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata, - struct sockaddr_storage *buff) +const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata) { - if (qdata == NULL) { + if (qdata == NULL || qdata->params == NULL) { return NULL; } - if (qdata->params->xdp_msg != NULL) { -#ifdef ENABLE_XDP - return (struct sockaddr_storage *)&qdata->params->xdp_msg->ip_to; -#else - assert(0); - return NULL; -#endif - } else { - socklen_t buff_len = sizeof(*buff); - if (getsockname(qdata->params->socket, (struct sockaddr *)buff, - &buff_len) != 0) { - return NULL; - } - return buff; + if (qdata->params->local != NULL && qdata->params->local->ss_family != AF_UNSPEC) { + return qdata->params->local; } + + return NULL; } _public_ const struct sockaddr_storage *knotd_qdata_remote_addr(knotd_qdata_t *qdata) { - if (qdata == NULL) { + if (qdata == NULL || qdata->params == NULL) { return NULL; } - if (qdata->params->xdp_msg != NULL) { -#ifdef ENABLE_XDP - return (struct sockaddr_storage *)&qdata->params->xdp_msg->ip_from; -#else - assert(0); - return NULL; -#endif - } else { + if (qdata->params->remote != NULL && qdata->params->remote->ss_family != AF_UNSPEC) { return qdata->params->remote; } + + return NULL; } _public_ diff --git a/src/knot/nameserver/query_module.h b/src/knot/nameserver/query_module.h index 5cc905b909..c1fec8aa88 100644 --- a/src/knot/nameserver/query_module.h +++ b/src/knot/nameserver/query_module.h @@ -26,8 +26,10 @@ #ifdef HAVE_ATOMIC #define ATOMIC_GET(src) __atomic_load_n(&(src), __ATOMIC_RELAXED) + #define ATOMIC_ADD(dst, val) __atomic_add_fetch(&(dst), (val), __ATOMIC_RELAXED) #else - #define ATOMIC_GET(src) (src) +#define ATOMIC_GET(src) (src) +#define ATOMIC_ADD(dst, val) ((dst) += (src)) #endif #define KNOTD_STAGES (KNOTD_STAGE_END + 1) diff --git a/src/knot/nameserver/query_state.h b/src/knot/nameserver/query_state.h new file mode 100644 index 0000000000..8b8b972bbe --- /dev/null +++ b/src/knot/nameserver/query_state.h @@ -0,0 +1,83 @@ +#pragma once + +typedef enum { + PROCESS_QUERY_STATE_BEGIN, + PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER, + PROCESS_QUERY_STATE_DONE_PLAN_BEGIN, + PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN, + PROCESS_QUERY_STATE_DONE_QUERY, + PROCESS_QUERY_STATE_DONE_POST_QUERY, + PROCESS_QUERY_STATE_DONE_HANDLE_ERROR, + PROCESS_QUERY_STATE_DONE_PLAN_END, + PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END, +} process_query_state_t; + +typedef enum { + INTERNET_PROCESS_QUERY_STATE_BEGIN, + INTERNET_PROCESS_QUERY_STATE_DONE_PREPROCESS, +} internet_process_query_state_t; + +typedef enum { + ANSWER_QUERY_STATE_BEGIN, + ANSWER_QUERY_STATE_DONE_PREANSWER, + ANSWER_QUERY_STATE_DONE_ANSWER_BEGIN, + ANSWER_QUERY_STATE_DONE_SOLVE_ANSWER, + ANSWER_QUERY_STATE_DONE_SOLVE_ANSWER_DNSSEC, + ANSWER_QUERY_STATE_DONE_STAGE_ANSWER, + ANSWER_QUERY_STATE_DONE_AUTH_BEGIN, + ANSWER_QUERY_STATE_DONE_SOLVE_AUTH, + ANSWER_QUERY_STATE_DONE_SOLVE_AUTH_DNSSEC, + ANSWER_QUERY_STATE_DONE_STAGE_AUTH, + ANSWER_QUERY_STATE_DONE_ADDITIONAL_BEGIN, + ANSWER_QUERY_STATE_DONE_SOLVE_ADDITIONAL, + ANSWER_QUERY_STATE_DONE_SOLVE_AADDITIONAL_DNSSEC, + ANSWER_QUERY_STATE_DONE_STAGE_AADDITIONAL, + ANSWER_QUERY_STATE_DONE_SET_ERROR, +} answer_query_state_t; + +typedef enum { + SOLVE_ANSWER_STATE_BEGIN, + SOLVE_ANSWER_HANDLE_INCOMING_STATE, + SOLVE_ANSWER_SOLVE_NAME_FIRST, + SOLVE_ANSWER_SOLVE_NAME_FIRST_DONE, + SOLVE_ANSWER_SOLVE_NAME_FOLLOW, +} solve_answer_query_state_t; + +typedef enum { + SOLVE_NAME_STATE_BEGIN, + SOLVE_NAME_HANDLE_INCOMING_STATE, + SOLVE_NAME_STAGE_LOOKUP, + SOLVE_NAME_STAGE_LOOKUP_DONE, +} solve_name_query_state_t; + +/*! \brief State machine state data. + Preserves execution state of the function like + 1. Position within function + 2. Local variables whose values need to be preserved between resume. + TBD: Unions can be used to save space for functions that cant execute in parallel. */ +typedef struct { + struct query_step *step; + process_query_state_t process_query_state; + internet_process_query_state_t internet_process_query_state; + answer_query_state_t answer_query_state; + solve_answer_query_state_t solve_answer_state; + solve_name_query_state_t solve_name_state; + int process_query_next_state; + int process_query_next_state_in; + int solve_answer_old_state; + int solve_name_incoming_state; + bool solve_answer_loop_in_async; +} state_machine_t; + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#define STATE_MACHINE_RUN_STATE(state, next_state, async_state, sub_state, curr_state) \ + if (((state) == NULL) || ((next_state) != async_state && (state)->sub_state < (curr_state))) + +#define STATE_MACHINE_COMPLETED_STATE(state, next_state, async_state, sub_state, curr_state) \ + if (((state) != NULL) && (next_state) != async_state) { (state)->sub_state = (curr_state); } +#else +#define STATE_MACHINE_RUN_STATE(state, next_state, async_state, sub_state, curr_state) + +#define STATE_MACHINE_COMPLETED_STATE(state, next_state, async_state, sub_state, curr_state) +#endif + diff --git a/src/knot/query/layer.h b/src/knot/query/layer.h index 252d043ae8..21bc570bef 100644 --- a/src/knot/query/layer.h +++ b/src/knot/query/layer.h @@ -34,20 +34,34 @@ typedef enum { KNOT_STATE_DONE, //!< Finished. KNOT_STATE_FAIL, //!< Error. KNOT_STATE_FINAL, //!< Finished and finalized. +#ifdef ENABLE_ASYNC_QUERY_HANDLING + KNOT_LAYER_STATE_ASYNC = 100, //!< The request needs to be async handled. Value should match KNOT_STATE_ASYNC. +#endif } knot_layer_state_t; +#define knot_layer_active_state(state) ((state) == KNOT_STATE_PRODUCE || (state) == KNOT_STATE_FAIL) +#define knot_layer_send_state(state) ((state) != KNOT_STATE_FAIL && (state) != KNOT_STATE_NOOP) + typedef struct knot_layer_api knot_layer_api_t; /*! \brief Packet processing context. */ typedef struct { const knot_layer_api_t *api; //!< Layer API. - knot_mm_t *mm; //!< Processing memory context. + knot_mm_t *mm; //!< Processing memory context. This memory is setup from the req to the layer when request is processed. knot_layer_state_t state; //!< Processing state. void *data; //!< Module specific. tsig_ctx_t *tsig; //!< TODO: remove unsigned flags; //!< Custom flags. } knot_layer_t; +#define knot_layer_clear_req_data(layer) { \ + (layer).mm = NULL; \ + (layer).state = KNOT_STATE_NOOP; \ + (layer).data = NULL; \ + (layer).tsig = NULL; \ + (layer).flags = 0; \ +} + /*! \brief Packet processing module API. */ struct knot_layer_api { int (*begin)(knot_layer_t *ctx, void *params); @@ -55,6 +69,9 @@ struct knot_layer_api { int (*finish)(knot_layer_t *ctx); int (*consume)(knot_layer_t *ctx, knot_pkt_t *pkt); int (*produce)(knot_layer_t *ctx, knot_pkt_t *pkt); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + int (*set_async_state)(knot_layer_t *ctx, knot_pkt_t *pkt, int layer_state); +#endif }; /*! \brief Helper for conditional layer call. */ @@ -133,3 +150,17 @@ inline static void knot_layer_produce(knot_layer_t *ctx, knot_pkt_t *pkt) { LAYER_CALL(ctx, produce, pkt); } + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Set the state from layer. + * + * \param ctx Layer context. + * \param pkt Data packet. + * \param state State to be set. + */ +inline static void knot_layer_set_async_state(knot_layer_t *ctx, knot_pkt_t *pkt, int state) +{ + LAYER_CALL(ctx, set_async_state, pkt, ctx->state); +} +#endif diff --git a/src/knot/server/dns-handler.c b/src/knot/server/dns-handler.c new file mode 100644 index 0000000000..135dd95972 --- /dev/null +++ b/src/knot/server/dns-handler.c @@ -0,0 +1,291 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +#define __APPLE_USE_RFC_3542 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_UIO_H // struct iovec (OpenBSD) +#include +#endif /* HAVE_SYS_UIO_H */ +#include +#include +#include "knot/server/dns-handler.h" + +#define DISPATCH_QUEUE_SIZE (8 * 1024) + +/*! + * \brief Process dns request for first time or resume processing if it was suspended due to async handling. + * + * \param dns_handler DNS Handler to process the request. + * \param dns_req DNS request to process for first time or after it is resumed from async delay state. + * + * \retval KNOT_EOK if succeeded. + */ +static int handle_dns_request_continue(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req) { + assert(dns_handler->layer.mm == dns_req->req_data.mm); + int ret = KNOT_EOK; + knot_pkt_t *ans = dns_req->handler_data.ans; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_req->handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) { + /* force state to be in failed state and execute produce. This guarantees module cleanup are performed */ + knot_layer_set_async_state(&dns_handler->layer, ans, KNOT_STATE_FAIL); + } +#endif + + /* Process answer. */ + while (knot_layer_active_state(dns_handler->layer.state)) { + knot_layer_produce(&dns_handler->layer, ans); + +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_req->handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) { + ret = KNOT_EOF; + break; + } else if (dns_handler->layer.state != KNOT_LAYER_STATE_ASYNC) +#endif + { + /* Send, if response generation passed and wasn't ignored. */ + if (dns_handler->send_result && ans->size > 0 && knot_layer_send_state(dns_handler->layer.state)) { + int sent = dns_handler->send_result(dns_handler, dns_req, ans->size); + if (sent != ans->size) { + ret = KNOT_EOF; + break; + } + } + } + } + +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_handler->layer.state != KNOT_LAYER_STATE_ASYNC) { +#endif + /* Send response only if finished successfully. */ + if (dns_handler->layer.state == KNOT_STATE_DONE) { + dns_req->req_data.tx->iov_len = ans->size; + } else { + dns_req->req_data.tx->iov_len = 0; + } + + /* Reset after processing. */ + knot_layer_finish(&dns_handler->layer); + + /* Flush per-query memory (including query and answer packets). */ + mp_flush(dns_handler->layer.mm->ctx); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + } else { + dns_req->handler_data.flag |= DNS_HANDLER_REQUEST_FLAG_IS_ASYNC; + knot_layer_backup_to_dns_handler_request(dns_handler->layer, *dns_req); + } +#endif + + return ret; +} + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Resume the execution of request processing interrupted in produce call which is currently in async delayed state. + * + * \param dns_handler dns request handler context. + * \param dns_req Request to be Resumed. + * + * \retval KNOT_EOK if succeeded. + */ +static int handle_dns_request_resume(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req) +{ + assert(dns_handler_request_is_async(*dns_req)); + knot_layer_backup_from_dns_handler_request(dns_handler->layer, *dns_req); + dns_req->handler_data.flag &= ~DNS_HANDLER_REQUEST_FLAG_IS_ASYNC; + dns_handler->layer.state = KNOT_STATE_PRODUCE; /* State is ignored by the produce itself. But helps handle_dns_request_continue to start the produce */ + + int ret = handle_dns_request_continue(dns_handler, dns_req); + if (!dns_handler_request_is_async(*dns_req)) { + // The requeste is completed. Notify network layer, this is done. + dns_handler->async_complete(dns_handler, dns_req); + } + return ret; +} + +/*! + * \brief Callback from knot_layer_t indicating the async request is complete. + * NOTE: This WILL BE called on the thread different from that owns the dns_request_handler_context_t. + * Any step that creates race condition with dns_request_handler_context_t thread has to be mutexed. + * + * \param params params for the knot_layer_t which completed async operation. + */ +static int dns_handler_notify_async_completed(knotd_qdata_params_t *params) +{ + dns_handler_request_t *dns_req = params->dns_req; + uint64_t value = 1; + + // capture the handle. As soon as req is put in queue, ownership of the req moves to queue and req can be dispatched and cleaned up. + int async_notify_handle = dns_req->handler_data.dns_handler_ctx->async_notify_handle; + + bool first = false; + int rc = knotd_lockless_queue_enqueue(dns_req->handler_data.dns_handler_ctx->async_completed_reqs, dns_req, &first); + assert(rc == 0); + + if (first && write(async_notify_handle, &value, sizeof(value)) == -1) { + /* Request is queued, we just did not wake up async handler, next might be able to */ + return KNOT_ESYSTEM; + } + else { + return KNOT_EOK; + } +} + +/*! + * \brief Continune processing async completed requests. + * Network layer is expected to call this function, when dns_request_handler_context_get_async_notify_handle is signaled. + * + * \param dns_handler dns request handler context. + */ +void handle_dns_request_async_completed_queries(dns_request_handler_context_t *dns_handler_ctx) +{ + /* cleanup read ctx */ + uint8_t buff[8]; + /* consume the data from the async notification handle */ + _unused_ int unused = read(dns_handler_ctx->async_notify_handle, buff, sizeof(buff)); + + dns_handler_request_t *dns_req; + while ((dns_req = knotd_lockless_queue_dequeue(dns_handler_ctx->async_completed_reqs))) { + handle_dns_request_resume(dns_handler_ctx, dns_req); + } +} +#endif + +/*! + * \brief Initialize dns request handler. + * + * \param dns_handler DNS handler to be initialized. + * \param server Server to be used in this DNS request handler. + * \param thread_id ID of the thread that will invoke this dns_handler. + * \param flags DNS request flags to be used in this handler. + * \param send_result Optional. Callback method to send results to network layer. If none provided, only final result will be available in tx of request. + * \param async_complete Notification when async query is completed. + * + * \retval KNOT_EOK if success. + */ +int initialize_dns_handle( + dns_request_handler_context_t *dns_handler, + server_t *server, + int thread_id, + uint8_t flags, + send_produced_result send_result +#ifdef ENABLE_ASYNC_QUERY_HANDLING + ,async_query_completed_callback async_complete +#endif +) { + assert(flags & KNOTD_QUERY_FLAG_LIMIT_SIZE || send_result); + dns_handler->server = server; + dns_handler->thread_id = thread_id; + dns_handler->flags = flags; + dns_handler->send_result = send_result; + knot_layer_init(&dns_handler->layer, NULL, process_query_layer()); + +#ifdef ENABLE_ASYNC_QUERY_HANDLING + dns_handler->async_complete = async_complete; + dns_handler->async_notify_handle = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (dns_handler->async_notify_handle == -1) { + return KNOT_ESYSTEM; + } + + int ret; + if ((ret = knotd_lockless_queue_create(&dns_handler->async_completed_reqs, DISPATCH_QUEUE_SIZE))) { + return ret; + } +#endif + + return 0; +} + +/*! + * \brief Cleanup dns request handler. + * + * \param dns_handler DNS handler to be cleaned up. + */ +void cleanup_dns_handle(dns_request_handler_context_t *dns_handler) { +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_handler->async_notify_handle != -1) { + close(dns_handler->async_notify_handle); + dns_handler->async_notify_handle = -1; + } + + knotd_lockless_queue_delete(dns_handler->async_completed_reqs); +#endif +} + +/*! + * \brief handles dns request. + * + * \param dns_handler DNS handler to be used. + * \param dns_req DNS request to be processed. + * + * \retval KNOT_EOK if success. + */ +int handle_dns_request(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req) +{ + dns_handler_request_clear_handler_data(*dns_req); + knot_layer_clear_req_data(dns_handler->layer); + + // Use the memory from req for this query processing + dns_handler->layer.mm = dns_req->req_data.mm; + dns_req->handler_data.dns_handler_ctx = dns_handler; + + // allocate params from request memory + knotd_qdata_params_t *params = mm_alloc(dns_handler->layer.mm, sizeof(*params)); + if (!params) { + // failed to allocated, just fail the call + dns_req->req_data.tx->iov_len = 0; + return KNOT_ESPACE; + } + +#ifdef ENABLE_ASYNC_QUERY_HANDLING + dns_req->handler_data.flag &= ~(DNS_HANDLER_REQUEST_FLAG_IS_ASYNC | DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED); + params->async_completed_callback = dns_handler_notify_async_completed; +#endif + + /* Create query processing parameter. */ + params->remote = &dns_req->req_data.source_addr; + params->local = &dns_req->req_data.target_addr; + params->flags = dns_handler->flags; + params->socket = dns_req->req_data.fd; + params->server = dns_handler->server; + params->xdp_msg = dns_req->req_data.xdp_msg; + params->thread_id = dns_handler->thread_id; + params->dns_req = dns_req; + dns_req->handler_data.params = params; + + /* Start query processing. */ + knot_layer_begin(&dns_handler->layer, params); + + /* Create packets. */ + knot_pkt_t *query = knot_pkt_new(dns_req->req_data.rx->iov_base, dns_req->req_data.rx->iov_len, dns_handler->layer.mm); + dns_req->handler_data.ans = knot_pkt_new(dns_req->req_data.tx->iov_base, dns_req->req_data.tx->iov_len, dns_handler->layer.mm); + + /* Input packet. */ + int ret = knot_pkt_parse(query, 0); + if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT) + query->parsed--; // artificially decreasing "parsed" leads to FORMERR + } + knot_layer_consume(&dns_handler->layer, query); + + return handle_dns_request_continue(dns_handler, dns_req); +} diff --git a/src/knot/server/dns-handler.h b/src/knot/server/dns-handler.h new file mode 100644 index 0000000000..8cb7f08593 --- /dev/null +++ b/src/knot/server/dns-handler.h @@ -0,0 +1,223 @@ +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ +#pragma once + +#include "contrib/macros.h" +#include "contrib/mempattern.h" +#include "contrib/sockaddr.h" +#include "contrib/ucw/mempool.h" +#include "knot/common/fdset.h" +#include "knot/nameserver/process_query.h" +#include "knot/query/layer.h" +#include "knot/server/server.h" +#include "libknot/xdp/xdp.h" +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#include "knot/include/lqueue.h" +#include +#endif + +/* Buffer identifiers. */ +enum { + RX = 0, + TX = 1, + NBUFS = 2 +}; + +typedef struct dns_request_handler_context dns_request_handler_context_t; +typedef struct dns_handler_request dns_handler_request_t; +typedef int (*send_produced_result)(dns_request_handler_context_t *net, dns_handler_request_t *req, size_t size); + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +typedef void (*async_query_completed_callback)(dns_request_handler_context_t *net, dns_handler_request_t *req); + +/*! \brief DNS request handler flags. */ +typedef enum dns_handler_request_flag { + DNS_HANDLER_REQUEST_FLAG_IS_ASYNC = (1 << 0), /*!< Is the request is currently handled asynchronously. */ + DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED = (1 << 1), /*!< Is the request cancelled. */ +} dns_handler_request_flag_t; +#endif + +/*! \brief DNS request handler context data. */ +struct dns_request_handler_context { +#ifdef ENABLE_ASYNC_QUERY_HANDLING + knotd_lockless_queue_t *async_completed_reqs; /*!< Requests which were asynchrnously completed by modules, but processing has not resumed for these requests. */ + async_query_completed_callback async_complete; /*!< Callback to network layer to indicate that the query in async state is completed. */ + int async_notify_handle; /*!< Handle used by dns request handling base layer to notify that there are requests pending async handling. */ +#endif + knot_layer_t layer; /*!< Query processing layer. */ + server_t *server; /*!< Name server structure. */ + send_produced_result send_result; /*!< Sends the results produced. + If this is null, the sender handles the response after completion of dns request handling + and sends only single result. */ + unsigned thread_id; /*!< Thread identifier. */ + uint8_t flags; /*!< Flags for dns request handler for how to handle request. */ +}; + +/*! \brief Network request data from network layer. */ +typedef struct dns_handler_network_layer_request { + struct sockaddr_storage source_addr; /*!< Source address. */ + struct sockaddr_storage target_addr; /*!< Target address. */ + struct iovec *rx; /*!< Received iovec. */ + struct iovec *tx; /*!< Send iovec. */ + struct knot_xdp_msg *xdp_msg; /*!< XDP message. */ + knot_mm_t *mm; /*!< Processing memory context. */ + int fd; /*!< handle for the network request. */ +} dns_handler_network_layer_request_t; + +/*! \brief Network handler data to process the request. */ +typedef struct dns_handler_request_data { +#ifdef ENABLE_ASYNC_QUERY_HANDLING + dns_handler_request_flag_t flag; /*!< Flags for the req. */ +#endif + knot_pkt_t *ans; /*!< Answer for the req. */ + dns_request_handler_context_t *dns_handler_ctx; /*!< dns request handler context for the req. */ + knotd_qdata_params_t *params; /*!< params for this req. */ + struct { + knot_layer_state_t state; //!< Processing state. + void *data; //!< Module specific. + tsig_ctx_t *tsig; //!< TODO: remove + unsigned flags; //!< Custom flags. + } layer_data_backup_on_async_stop; //!< Layer data backup when req was offline. Valid only when req is offline. +} dns_handler_request_data_t; + +/*! \brief Dns handler request data. */ +struct dns_handler_request { + dns_handler_network_layer_request_t req_data; /*!< Data from network layer. Only data here can be exchanged between network layer and dns handler. */ + dns_handler_request_data_t handler_data; /*!< Data from dns request handler. This data should be treated private for dns handler and network handler should not use it. */ +}; + +/*! + * \brief Initialize dns request handler. + * + * \param dns_handler DNS handler to be initialized. + * \param server Server to be used in this DNS request handler. + * \param thread_id ID of the thread that will invoke this dns_handler. + * \param flags DNS request flags to be used in this handler. + * \param send_result Optional. Callback method to send results to network layer. If none provided, only final result will be available in tx of request. + * \param async_complete Notification when async query is completed. + * + * \retval KNOT_EOK if success. + */ +int initialize_dns_handle( + dns_request_handler_context_t *dns_handler, + server_t *server, + int thread_id, + uint8_t flags, + send_produced_result send_result +#ifdef ENABLE_ASYNC_QUERY_HANDLING + ,async_query_completed_callback async_complete +#endif +); + +/*! + * \brief Cleanup dns request handler. + * + * \param dns_handler DNS handler to be cleaned up. + */ +void cleanup_dns_handle(dns_request_handler_context_t *dns_handler); + +/*! + * \brief handles dns request. + * + * \param dns_handler DNS handler to be used. + * \param dns_req DNS request to be processed. + * + * \retval KNOT_EOK if success. + */ +int handle_dns_request(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req); + +/*! + * \brief Clear the request with any previously processed query state information. + * + * \param dns_req DNS request that needs to be reset. + */ +#define dns_handler_request_clear_handler_data(dns_req) memset(&((dns_req).handler_data), 0, sizeof(dns_handler_request_data_t)) + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Get the async handle that needs to be used for monitoring pending async completed requests. + * + * \param dns_handler dns request handler context. + * + * \retval Poll handle for monitoring existence of queries in async queue ready to execute. + */ +#define dns_request_handler_context_get_async_notify_handle(dns_handler) ((dns_handler)->async_notify_handle) + +/*! + * \brief Gets if a DNS request is in async state. + * + * \param dns_req DNS request whose async state needs to be checked. + * + * \retval true if the request is in async state. + */ +#define dns_handler_request_is_async(dns_req) ((dns_req).handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_ASYNC) + +/*! + * \brief Gets if a DNS request is in cancelled state. + * + * \param dns_req DNS request whose async state needs to be checked. + * + * \retval true if the request is in cancelled state. + */ +#define dns_handler_request_is_cancelled(dns_req) ((dns_req).handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) + +/*! + * \brief Cancels the request from being processed. + * + * \param dns_req DNS request whose async state needs to be changed. + */ +#define dns_handler_cancel_request(dns_req) ((dns_req).handler_data.flag |= DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) + +/*! + * \brief Handle DNS async completed queries in this dns handler. + * + * \param dns_handler dns request handler context. + */ +void handle_dns_request_async_completed_queries(dns_request_handler_context_t *dns_handler); + +#endif + +/*! + * \brief Backup the layer state into request itself, + * \brief to allow layer to process other requests while current request is delayed by async processing. + * + * \param layer Layer whose states need to be preserved. + * \param dns_req DNS request that needs to be used to preserve layer state. + */ +#define knot_layer_backup_to_dns_handler_request(layer, dns_req) { \ + assert((layer).mm == (dns_req).req_data.mm); \ + (dns_req).handler_data.layer_data_backup_on_async_stop.state = (layer).state; \ + (dns_req).handler_data.layer_data_backup_on_async_stop.data = (layer).data; \ + (dns_req).handler_data.layer_data_backup_on_async_stop.tsig = (layer).tsig; \ + (dns_req).handler_data.layer_data_backup_on_async_stop.flags = (layer).flags; \ +} + +/*! + * \brief Restore the layer state from request, + * \brief to resume processing request that was previously delayed due to async + * + * \param layer Layer whose states need to be restored. + * \param dns_req DNS request that needs to be used to restore layer state. + */ +#define knot_layer_backup_from_dns_handler_request(layer, dns_req) { \ + (layer).mm = (dns_req).req_data.mm; \ + (layer).state = (dns_req).handler_data.layer_data_backup_on_async_stop.state; \ + (layer).data = (dns_req).handler_data.layer_data_backup_on_async_stop.data; \ + (layer).tsig = (dns_req).handler_data.layer_data_backup_on_async_stop.tsig; \ + (layer).flags = (dns_req).handler_data.layer_data_backup_on_async_stop.flags; \ +} + + diff --git a/src/knot/server/network_req_manager.c b/src/knot/server/network_req_manager.c new file mode 100644 index 0000000000..d357ac5212 --- /dev/null +++ b/src/knot/server/network_req_manager.c @@ -0,0 +1,539 @@ +#include "knot/server/network_req_manager.h" +#include "contrib/memcheck.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Watomic-alignment" + +/*! \brief Basic network request manager data. */ +typedef struct network_dns_request_manager_basic { + network_dns_request_manager_t base; //!< Base network request manager function pointers. + + size_t buffer_size; //!< Buffer size used in rx/tx while allocating the DNS request. + knot_mm_t query_processing_mm; //!< Query processing mm for the requests allocated. There is only one per network manager and hence can't support async requests. +} network_dns_request_manager_basic_t; + +/*! + * \brief Free network request allocated using free. + * + * \param mgr Network request manager used for allocating req. + * \param req Request to be freed. + */ +static void network_dns_request_manager_basic_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + if (req) { + for (unsigned i = 0; i < NBUFS; ++i) { + free(req->iov[i].iov_base); + } + free(req); + } +} + +/*! + * \brief Allocate memory for data associated with request. + * + * \param mgr Network request manager to be used. + * \param size size of memory to allocate. + * + * \retval Memory allocated. NULL if failed. + */ +static void* network_dns_request_manager_basic_allocate_mem(network_dns_request_manager_t *mgr, size_t size) { + return malloc(size); +} + +/*! + * \brief Free memory for data associated with request previously allocated using allocate_mem_func. + * + * \param mgr Network request manager to be used. + * \param mem Memory previously allocated using allocate_mem_func. + */ +void network_dns_request_manager_basic_free_mem(struct network_dns_request_manager *mgr, void *mem) { + return free(mem); +} + +/*! + * \brief Allocate request. + * + * \param mgr Network request manager to be used. + * + * \retval Request allocaed, NULL if failed. + */ +static network_dns_request_t* network_dns_request_manager_basic_allocate_req(network_dns_request_manager_t *mgr) { + network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base); + network_dns_request_t *req = NULL; + req = calloc(1, sizeof(network_dns_request_t)); + if (req == NULL) { + return NULL; + } + + for (unsigned i = 0; i < NBUFS; ++i) { + req->iov[i].iov_base = malloc(this->buffer_size); + if (req->iov[i].iov_base == NULL) { + network_dns_request_manager_basic_free_req(&this->base, req); + return NULL; + } + req->iov[i].iov_len = this->buffer_size; + } + + req->dns_req.req_data.rx = &req->iov[RX]; + req->dns_req.req_data.tx = &req->iov[TX]; + req->dns_req.req_data.mm = &this->query_processing_mm; + req->dns_req.req_data.xdp_msg = NULL; + + return req; +} + +/*! + * \brief Reset request to handle new request. + * + * \param mgr Network request manager to be used. + * \param req Request to be reset. + */ +static void network_dns_request_manager_basic_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base); + req->iov[RX].iov_len = this->buffer_size; + req->iov[TX].iov_len = this->buffer_size; + + // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request. + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size); + + dns_handler_request_clear_handler_data(req->dns_req); +} + +/*! + * \brief Delete the request manager. + * + * \param mgr Network request manager to be deleted. + */ +static void network_dns_request_manager_basic_delete(network_dns_request_manager_t *mgr) { + network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base); + mp_delete(this->query_processing_mm.ctx); + memset(this, 0, sizeof(*this)); + free(this); +} + +/*! \brief Knot_mm based network request manager data. */ +typedef struct network_dns_request_manager_knot_mm { + network_dns_request_manager_t base; //!< Base network request manager function pointers. + + size_t buffer_size; //!< Buffer size used in rx/tx while allocating the DNS request. + knot_mm_t req_allocation_mm; //!< mm used for request allocation only. All memory for request are freed when request manager is destroyed. + knot_mm_t query_processing_mm; //!< Query processing mm for the requests allocated. There is only one per network manager and hence can't support async requests. +} network_dns_request_manager_knot_mm_t; + +/*! + * \brief Free network request allocated using knot_mm. + * + * \param mgr Network request manager used for allocating req. + * \param req Request to be freed. + */ +static void network_dns_request_manager_knot_mm_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + _unused_ network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base); + + // individual request can't be freed, but mark as not accessible + VALGRIND_MAKE_MEM_NOACCESS(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_NOACCESS(req->iov[TX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_NOACCESS(req, sizeof(network_dns_request_t)); +} + +/*! + * \brief Allocate request. + * + * \param mgr Network request manager to be used. + * + * \retval Request allocaed, NULL if failed. + */ +static network_dns_request_t* network_dns_request_manager_knot_mm_allocate_req(network_dns_request_manager_t *mgr) { + network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base); + network_dns_request_t *req = NULL; + req = mm_calloc(&this->req_allocation_mm, 1, sizeof(network_dns_request_t)); + if (req == NULL) { + return NULL; + } + + for (unsigned i = 0; i < NBUFS; ++i) { + req->iov[i].iov_base = mm_alloc(&this->req_allocation_mm, this->buffer_size); + if (req->iov[i].iov_base == NULL) { + network_dns_request_manager_knot_mm_free_req(&this->base, req); + return NULL; + } + req->iov[i].iov_len = this->buffer_size; + } + + req->dns_req.req_data.rx = &req->iov[RX]; + req->dns_req.req_data.tx = &req->iov[TX]; + req->dns_req.req_data.mm = &this->query_processing_mm; + req->dns_req.req_data.xdp_msg = NULL; + + return req; +} + +/*! + * \brief Allocate memory for data associated with request. + * + * \param mgr Network request manager to be used. + * \param size size of memory to allocate. + * + * \retval Memory allocated. NULL if failed. + */ +static void* network_dns_request_manager_knot_mm_allocate_mem(network_dns_request_manager_t *mgr, size_t size) { + network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base); + return mm_alloc(&this->req_allocation_mm, size); +} + +/*! + * \brief Free memory for data associated with request previously allocated using allocate_mem_func. + * + * \param mgr Network request manager to be used. + * \param mem Memory previously allocated using allocate_mem_func. + */ +void network_dns_request_manager_knot_mm_free_mem(struct network_dns_request_manager *mgr, void *mem) { + // Individual allocation cannot be freed when done from knot_mm. +} + +/*! + * \brief Reset request to handle new request. + * + * \param mgr Network request manager to be used. + * \param req Request to be reset. + */ +static void network_dns_request_manager_knot_mm_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base); + req->iov[RX].iov_len = this->buffer_size; + req->iov[TX].iov_len = this->buffer_size; + + // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request. + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size); + + dns_handler_request_clear_handler_data(req->dns_req); +} + +/*! + * \brief Delete the request manager. + * + * \param mgr Network request manager to be deleted. + */ +static void network_dns_request_manager_knot_mm_delete(network_dns_request_manager_t *mgr) { + network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base); + mp_delete(this->req_allocation_mm.ctx); + mp_delete(this->query_processing_mm.ctx); + memset(this, 0, sizeof(*this)); + free(this); +} + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#include "knot/include/lstack.h" + +/*! \brief Pooled network request manager data. */ +typedef struct network_dns_request_pool_manager { + network_dns_request_manager_t base; //!< Base network request manager function pointers. + + size_t buffer_size; //!< Buffer size used in rx/tx while allocating the DNS request. + size_t memory_size; //!< memory size to use for processing DNS request. + knot_mm_t req_allocation_mm; //!< mm used for request allocation only. All memory for request are freed when request manager is destroyed. + knotd_lockless_stack_t free_pool; //!< Stack of freed request pool. + atomic_shared_dns_request_manager_t *shared_req_mgr; //!< Shared resource manager. +} network_dns_request_pool_manager_t; + +/*! + * \brief Free network request allocated by adding to free queue + * + * \param mgr Network request manager used for allocating req. + * \param req Request to be freed. + */ +static void network_dns_request_pool_manager_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base); + assert(req->free_list_node.next == NULL); + + // Reset request for reuse and put it in the pool + mgr->restore_network_request_func(mgr, req); + + // Make the request memory inaccessible + VALGRIND_MAKE_MEM_NOACCESS(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_NOACCESS(req->iov[TX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_NOACCESS(req, sizeof(network_dns_request_t)); + // except free_list_node which is needed for maintaining free list. + VALGRIND_MAKE_MEM_UNDEFINED(&req->free_list_node, sizeof(req->free_list_node)); + + // Put the request in free pool + knotd_lockless_stack_push(&this->free_pool, &req->free_list_node); +} + +/*! + * \brief Allocate request from pool. + * + * \param mgr Network request manager to be used. + * + * \retval Request allocaed, NULL if failed. + */ +static network_dns_request_t* network_dns_request_pool_manager_allocate_req(network_dns_request_manager_t *mgr) { + network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base); + knotd_lockless_stack_node_t *free_node = knotd_lockless_stack_pop(&this->free_pool); + if (free_node == NULL) { + return NULL; + } + + network_dns_request_t* req = caa_container_of(free_node, network_dns_request_t, free_list_node); + // Make everything in req available to read. Buffers available but not initialized. + // Request was initialized as part of creation, but we intentionally made it inaccessible when it is in free list. + VALGRIND_MAKE_MEM_DEFINED(req, sizeof(network_dns_request_t)); + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size); + return req; +} + +/*! + * \brief Allocate request. + * + * \param this Network request manager to be used. + * + * \retval Request allocaed, NULL if failed. + */ +static network_dns_request_t* network_dns_request_pool_manager_real_allocate(network_dns_request_pool_manager_t *this) { + network_dns_request_t *req = NULL; + req = mm_calloc(&this->req_allocation_mm, 1, sizeof(network_dns_request_t)); + if (req == NULL) { + return NULL; + } + + for (unsigned i = 0; i < NBUFS; ++i) { + req->iov[i].iov_base = mm_alloc(&this->req_allocation_mm, this->buffer_size); + if (req->iov[i].iov_base == NULL) { + network_dns_request_pool_manager_free_req(&this->base, req); + return NULL; + } + req->iov[i].iov_len = this->buffer_size; + } + + req->dns_req.req_data.rx = &req->iov[RX]; + req->dns_req.req_data.tx = &req->iov[TX]; + req->dns_req.req_data.mm = mm_calloc(&this->req_allocation_mm, 1, sizeof(knot_mm_t)); + if (req->dns_req.req_data.mm == NULL) { + return NULL; + } + mm_ctx_mempool(req->dns_req.req_data.mm, this->memory_size); + req->dns_req.req_data.xdp_msg = NULL; + + return req; +} + +/*! + * \brief Allocate memory for data associated with request. + * + * \param mgr Network request manager to be used. + * \param size size of memory to allocate. + * + * \retval Memory allocated. NULL if failed. + */ +static void* network_dns_request_pool_manager_allocate_mem(network_dns_request_manager_t *mgr, size_t size) { + // Don't allocate from knot_mm. It will lead to race condition as pool manager is shared by threads. + // Only DNS requests are preallocated to avoid race condition. Individual memory should be allocated using malloc, + return malloc(size); +} + +/*! + * \brief Free memory for data associated with request previously allocated using allocate_mem_func. + * + * \param mgr Network request manager to be used. + * \param mem Memory previously allocated using allocate_mem_func. + */ +void network_dns_request_pool_manager_free_mem(struct network_dns_request_manager *mgr, void *mem) { + free(mem); +} + +/*! + * \brief Reset request to handle new request. + * + * \param mgr Network request manager to be used. + * \param req Request to be reset. + */ +static void network_dns_request_pool_manager_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) { + network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base); + req->iov[RX].iov_len = this->buffer_size; + req->iov[TX].iov_len = this->buffer_size; + + // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request. + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size); + VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size); + + dns_handler_request_clear_handler_data(req->dns_req); +} + +/*! + * \brief Delete the request manager. + * + * \param mgr Network request manager to be deleted. + */ +static void network_dns_request_pool_manager_delete(network_dns_request_manager_t *mgr) { + network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base); + struct shared_dns_request_manager expect, new_value = {0}; + do { + KNOT_ATOMIC_GET(this->shared_req_mgr, expect); + new_value.ref_count = expect.ref_count - 1; + if (expect.ref_count == 1) { + // last reference + new_value.req_mgr = NULL; + } else { + new_value.req_mgr = expect.req_mgr; + } + } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(this->shared_req_mgr, expect, new_value)); + + if ( new_value.ref_count == 0) { + // free the request manager + mp_delete(this->req_allocation_mm.ctx); + knotd_lockless_stack_cleanup(&this->free_pool); + memset(this, 0, sizeof(*this)); + free(this); + } +} +#endif + +/*! + * \brief Creates the network manager which allocates the DNS requests using malloc/free. + * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async. + * + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t *network_dns_request_manager_basic_create(size_t buffer_size, size_t memory_size) { + network_dns_request_manager_basic_t *this = calloc(1, sizeof(*this)); + if (this == NULL) { + return NULL; + } + + this->base.allocate_network_request_func = network_dns_request_manager_basic_allocate_req; + this->base.allocate_mem_func = network_dns_request_manager_basic_allocate_mem; + this->base.restore_network_request_func = network_dns_request_manager_basic_reset_request; + this->base.free_network_request_func = network_dns_request_manager_basic_free_req; + this->base.free_mem_func = network_dns_request_manager_basic_free_mem; + this->base.delete_req_manager = network_dns_request_manager_basic_delete; + + this->buffer_size = buffer_size; + mm_ctx_mempool(&this->query_processing_mm, memory_size); + + return &this->base; +} + +/*! + * \brief Creates the network manager which allocates the DNS requests using knot_mm_t. + * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async. + * \brief Since knot_mm_t does not support free, any request allocated using this manager will be freed when this manager is destroyed. + * + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t *network_dns_request_manager_knot_mm_create(size_t buffer_size, size_t memory_size) { + network_dns_request_manager_knot_mm_t *this = calloc(1, sizeof(*this)); + if (this == NULL) { + return NULL; + } + + this->base.allocate_network_request_func = network_dns_request_manager_knot_mm_allocate_req; + this->base.allocate_mem_func = network_dns_request_manager_knot_mm_allocate_mem; + this->base.restore_network_request_func = network_dns_request_manager_knot_mm_reset_request; + this->base.free_network_request_func = network_dns_request_manager_knot_mm_free_req; + this->base.free_mem_func = network_dns_request_manager_knot_mm_free_mem; + this->base.delete_req_manager = network_dns_request_manager_knot_mm_delete; + + this->buffer_size = buffer_size; + mm_ctx_mempool(&this->req_allocation_mm, buffer_size); + mm_ctx_mempool(&this->query_processing_mm, memory_size); + + return &this->base; +} + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Creates the network manager which allocates the DNS requests and manages the pool. + * \brief Any freed request will be added to free pool, and hence memory is not released. + * \brief Deleting the request manager frees the memory. + * + * \param shared_req_mgr Shared request pool manager. Should be initialized with init_shared_req_mgr. + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * \param pool_size Number of requests to maintain in the pool initially. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t* network_dns_request_pool_manager_create(atomic_shared_dns_request_manager_t *shared_req_mgr, size_t buffer_size, size_t memory_size, size_t pool_size) { + struct shared_dns_request_manager expect, new_value = {0}; + void *is_being_initialized_value = (void*)shared_req_mgr; + + // atomically initialize the structure. On input created_pool points to NULL. + // If shared_req_mgr points to NULL, make it point to itself and proceed to creation. + // If shared_req_mgr points to itself, wait until it completes. + do { + KNOT_ATOMIC_GET(shared_req_mgr, expect); + if (expect.req_mgr == NULL) { + // not initialized. Try to take a lock. + new_value.req_mgr = is_being_initialized_value; + new_value.ref_count = 0; + if (KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(shared_req_mgr, expect, new_value)) { + // we got the lock to create it. + break; + } + } + else if (expect.req_mgr == is_being_initialized_value) { + // still being initialized + struct timespec ten_ms = { 0, 10000000}; + nanosleep(&ten_ms, &ten_ms); + } + else { + new_value.req_mgr = expect.req_mgr; + new_value.ref_count = expect.ref_count + 1; + if (KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(shared_req_mgr, expect, new_value)) { + // we got the reference to the mgr + return expect.req_mgr; + } + } + } + while (true); + + new_value.req_mgr = NULL; + new_value.ref_count = 0; + network_dns_request_pool_manager_t *this = calloc(1, sizeof(*this)); + if (this == NULL) { + KNOT_ATOMIC_INIT(*shared_req_mgr, new_value); + return NULL; + } + + this->base.allocate_network_request_func = network_dns_request_pool_manager_allocate_req; + this->base.allocate_mem_func = network_dns_request_pool_manager_allocate_mem; + this->base.restore_network_request_func = network_dns_request_pool_manager_reset_request; + this->base.free_network_request_func = network_dns_request_pool_manager_free_req; + this->base.free_mem_func = network_dns_request_pool_manager_free_mem; + this->base.delete_req_manager = network_dns_request_pool_manager_delete; + + this->shared_req_mgr = shared_req_mgr; + this->buffer_size = buffer_size; + this->memory_size = memory_size; + + if (knotd_lockless_stack_init(&this->free_pool) != 0) { + free(this); + KNOT_ATOMIC_INIT(*shared_req_mgr, new_value); + return NULL; + } + + mm_ctx_mempool(&this->req_allocation_mm, buffer_size); + + for (size_t i = 0; i < pool_size; i++) { + network_dns_request_t* req = network_dns_request_pool_manager_real_allocate(this); + if (req == NULL) { + mp_delete(this->req_allocation_mm.ctx); + free(this); + KNOT_ATOMIC_INIT(*shared_req_mgr, new_value); + return NULL; + } + knotd_lockless_stack_push(&this->free_pool, &req->free_list_node); + } + + new_value.req_mgr = &this->base; + new_value.ref_count = 1; + KNOT_ATOMIC_INIT(*shared_req_mgr, new_value); + return new_value.req_mgr; +} +#endif diff --git a/src/knot/server/network_req_manager.h b/src/knot/server/network_req_manager.h new file mode 100644 index 0000000000..71c8b33019 --- /dev/null +++ b/src/knot/server/network_req_manager.h @@ -0,0 +1,90 @@ +#pragma once +#include +#include "knot/server/dns-handler.h" +#ifdef ENABLE_ASYNC_QUERY_HANDLING +#include "knot/include/lstack.h" +#endif +#ifdef KNOT_ENABLE_NUMA +#include +#define KNOT_MAX_NUMA 16 +#else +#define KNOT_MAX_NUMA 1 +#endif +#include "knot/common/log.h" + +/*! \brief Control message to fit IP_PKTINFO or IPv6_RECVPKTINFO. */ +typedef union { + struct cmsghdr cmsg; + uint8_t buf[CMSG_SPACE(sizeof(struct in6_pktinfo))]; +} cmsg_pktinfo_t; + +/*! \brief DNS request structure allocated for networking layer. */ +typedef struct network_dns_request { +#ifdef ENABLE_ASYNC_QUERY_HANDLING + knotd_lockless_stack_node_t free_list_node;//!< Lockless stack node. +#endif + dns_handler_request_t dns_req; //!< dns request part for the handler. + struct iovec iov[NBUFS]; //!< IOV used in network API for this DNS request. + size_t msg_namelen_received; //!< Message name length received. + size_t msg_controllen_received; //!< Message control length received. + cmsg_pktinfo_t pktinfo; //!< Request's DNS cmsg info. +} network_dns_request_t; + +/*! \brief Network request manager that handles allocation/deallocation/reset. */ +typedef struct network_dns_request_manager { + network_dns_request_t* (*allocate_network_request_func)(struct network_dns_request_manager *); //!< allocate request call. + void* (*allocate_mem_func)(struct network_dns_request_manager *, size_t); //!< allocate memory for request call. + void (*restore_network_request_func)(struct network_dns_request_manager *, network_dns_request_t *); //!< Restore request state after a query is executed to prepare for next request. + void (*free_network_request_func)(struct network_dns_request_manager *, network_dns_request_t *); //!< Free previously allocated request. + void (*free_mem_func)(struct network_dns_request_manager *, void *); //!< Free memory allocated in allocate_mem_func. + void (*delete_req_manager)(struct network_dns_request_manager *); //!< Delete the dns request manager. +} network_dns_request_manager_t; + +/*! + * \brief Creates the network manager which allocates the DNS requests using malloc/free. + * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async. + * + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t *network_dns_request_manager_basic_create(size_t buffer_size, size_t memory_size); + +/*! + * \brief Creates the network manager which allocates the DNS requests using knot_mm_t. + * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async. + * \brief Since knot_mm_t does not support free, any request allocated using this manager will be freed when this manager is destroyed. + * + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t *network_dns_request_manager_knot_mm_create(size_t buffer_size, size_t memory_size); + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +struct shared_dns_request_manager { + KNOT_ALIGN(16) + network_dns_request_manager_t *req_mgr; + int ref_count; +}; + +typedef KNOT_ATOMIC struct shared_dns_request_manager atomic_shared_dns_request_manager_t; + +#define init_shared_req_mgr(shared_req_mgr) { struct shared_dns_request_manager __t = {0}; KNOT_ATOMIC_INIT(shared_req_mgr, __t); } + +/*! + * \brief Creates the network manager which allocates the DNS requests and manages the pool. + * \brief Any freed request will be added to free pool, and hence memory is not released. + * \brief Deleting the request manager frees the memory. + * + * \param shared_req_mgr Shared request pool manager. Should be initialized with init_shared_req_mgr. + * \param buffer_size Buffer size to be used for dns request/response. + * \param memory_size Memory size to be used when handling the DNS request. + * \param pool_size Number of requests to maintain in the pool initially. + * + * \retval DNS request manager on success. NULL otherwise. + */ +network_dns_request_manager_t *network_dns_request_pool_manager_create(atomic_shared_dns_request_manager_t *shared_req_mgr, size_t buffer_size, size_t memory_size, size_t pool_size); +#endif diff --git a/src/knot/server/server.c b/src/knot/server/server.c index bac00a9dbe..396133df07 100644 --- a/src/knot/server/server.c +++ b/src/knot/server/server.c @@ -43,6 +43,9 @@ #include "contrib/os.h" #include "contrib/sockaddr.h" #include "contrib/trim.h" +#ifdef KNOT_ENABLE_NUMA +#include +#endif #ifdef ENABLE_XDP #include @@ -1085,7 +1088,25 @@ static int set_handler(server_t *server, int index, unsigned size, runnable_t ru static int configure_threads(conf_t *conf, server_t *server) { - int ret = set_handler(server, IO_UDP, conf->cache.srv_udp_threads, udp_master); + int ret; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + bool use_numa = conf->cache.numa_enabled; +#ifdef KNOT_ENABLE_NUMA + use_numa = use_numa && (-1 != numa_available()); +#endif + + ret = init_udp_async(conf->cache.udp_srv_async_reqs, use_numa); + if (ret != KNOT_EOK) { + return ret; + } + + ret = init_tcp_async(conf->cache.tcp_srv_async_reqs, use_numa); + if (ret != KNOT_EOK) { + return ret; + } +#endif + + ret = set_handler(server, IO_UDP, conf->cache.srv_udp_threads, udp_master); if (ret != KNOT_EOK) { return ret; } diff --git a/src/knot/server/tcp-handler.c b/src/knot/server/tcp-handler.c index 978a82c022..524e060441 100644 --- a/src/knot/server/tcp-handler.c +++ b/src/knot/server/tcp-handler.c @@ -42,20 +42,25 @@ #include "contrib/sockaddr.h" #include "contrib/time.h" #include "contrib/ucw/mempool.h" +#include "knot/server/dns-handler.h" +#include "knot/server/network_req_manager.h" +#include "knot/common/stats.h" /*! \brief TCP context data. */ typedef struct tcp_context { - knot_layer_t layer; /*!< Query processing layer. */ - server_t *server; /*!< Name server structure. */ - struct iovec iov[2]; /*!< TX/RX buffers. */ + network_dns_request_manager_t *req_mgr; /*!< DNS request manager. */ + dns_request_handler_context_t dns_handler; /*!< DNS request handler context. */ + network_dns_request_t *tcp_req; /*!< DNS request. */ unsigned client_threshold; /*!< Index of first TCP client. */ struct timespec last_poll_time; /*!< Time of the last socket poll. */ bool is_throttled; /*!< TCP connections throttling switch. */ fdset_t set; /*!< Set of server/client sockets. */ - unsigned thread_id; /*!< Thread identifier. */ unsigned max_worker_fds; /*!< Max TCP clients per worker configuration + no. of ifaces. */ int idle_timeout; /*!< [s] TCP idle timeout configuration. */ int io_timeout; /*!< [ms] TCP send/recv timeout configuration. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING + int async_fd; /*!< Async notification file descriptor. */ +#endif } tcp_context_t; #define TCP_SWEEP_INTERVAL 2 /*!< [secs] granularity of connection sweeping. */ @@ -87,9 +92,17 @@ static void client_addr(const struct sockaddr_storage *ss, char *out, size_t out } /*! \brief Sweep TCP connection. */ -static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *data) +static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *ctx, _unused_ void *data) { - assert(set && fd >= 0); + assert(set && fd >= 0 && data != NULL); + + _unused_ tcp_context_t *tcp = (tcp_context_t*)data; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + network_dns_request_t *req = (network_dns_request_t *) ctx; + if (req != NULL) { + dns_handler_cancel_request(req->dns_req); + } +#endif /* Best-effort, name and shame. */ struct sockaddr_storage ss = { 0 }; @@ -103,16 +116,6 @@ static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *data) return FDSET_SWEEP; } -static bool tcp_active_state(int state) -{ - return (state == KNOT_STATE_PRODUCE || state == KNOT_STATE_FAIL); -} - -static bool tcp_send_state(int state) -{ - return (state != KNOT_STATE_FAIL && state != KNOT_STATE_NOOP); -} - static void tcp_log_error(struct sockaddr_storage *ss, const char *operation, int ret) { /* Don't log ECONN as it usually means client closed the connection. */ @@ -156,70 +159,41 @@ static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, return fdset_get_length(fds); } -static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec *tx) +static int tcp_handle(tcp_context_t *tcp, _unused_ unsigned idx) { + network_dns_request_t *tcp_req = tcp->tcp_req; /* Get peer name. */ - struct sockaddr_storage ss; socklen_t addrlen = sizeof(struct sockaddr_storage); - if (getpeername(fd, (struct sockaddr *)&ss, &addrlen) != 0) { + if (getpeername(tcp_req->dns_req.req_data.fd, (struct sockaddr *)&tcp_req->dns_req.req_data.source_addr, &addrlen) != 0) { return KNOT_EADDRNOTAVAIL; } - /* Create query processing parameter. */ - knotd_qdata_params_t params = { - .remote = &ss, - .socket = fd, - .server = tcp->server, - .thread_id = tcp->thread_id - }; + addrlen = sizeof(struct sockaddr_storage); + if (getsockname(tcp_req->dns_req.req_data.fd, (struct sockaddr *)&tcp_req->dns_req.req_data.target_addr, &addrlen) != 0) { + return KNOT_EADDRNOTAVAIL; + } - rx->iov_len = KNOT_WIRE_MAX_PKTSIZE; - tx->iov_len = KNOT_WIRE_MAX_PKTSIZE; + tcp->req_mgr->restore_network_request_func(tcp->req_mgr, tcp_req); /* Receive data. */ - int recv = net_dns_tcp_recv(fd, rx->iov_base, rx->iov_len, tcp->io_timeout); + int recv = net_dns_tcp_recv(tcp_req->dns_req.req_data.fd, tcp_req->dns_req.req_data.rx->iov_base, tcp_req->dns_req.req_data.rx->iov_len, tcp->io_timeout); if (recv > 0) { - rx->iov_len = recv; + tcp_req->dns_req.req_data.rx->iov_len = recv; } else { - tcp_log_error(&ss, "receive", recv); + tcp_log_error(&tcp_req->dns_req.req_data.source_addr, "receive", recv); return KNOT_EOF; } - /* Initialize processing layer. */ - knot_layer_begin(&tcp->layer, ¶ms); - - /* Create packets. */ - knot_pkt_t *ans = knot_pkt_new(tx->iov_base, tx->iov_len, tcp->layer.mm); - knot_pkt_t *query = knot_pkt_new(rx->iov_base, rx->iov_len, tcp->layer.mm); + int ret = handle_dns_request(&tcp->dns_handler, &tcp_req->dns_req); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_handler_request_is_async(tcp_req->dns_req)) { + // Save the request on tcp connection context + fdset_set_ctx(&tcp->set, idx, tcp->tcp_req); - /* Input packet. */ - int ret = knot_pkt_parse(query, 0); - if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT) - query->parsed--; // artificially decreasing "parsed" leads to FORMERR - } - knot_layer_consume(&tcp->layer, query); - - /* Resolve until NOOP or finished. */ - while (tcp_active_state(tcp->layer.state)) { - knot_layer_produce(&tcp->layer, ans); - /* Send, if response generation passed and wasn't ignored. */ - if (ans->size > 0 && tcp_send_state(tcp->layer.state)) { - int sent = net_dns_tcp_send(fd, ans->wire, ans->size, - tcp->io_timeout, NULL); - if (sent != ans->size) { - tcp_log_error(&ss, "send", sent); - ret = KNOT_EOF; - break; - } - } + // Release it + tcp->tcp_req = NULL; } - - /* Reset after processing. */ - knot_layer_finish(&tcp->layer); - - /* Flush per-query memory (including query and answer packets). */ - mp_flush(tcp->layer.mm->ctx); - +#endif return ret; } @@ -243,8 +217,18 @@ static void tcp_event_accept(tcp_context_t *tcp, unsigned i) static int tcp_event_serve(tcp_context_t *tcp, unsigned i) { - int ret = tcp_handle(tcp, fdset_get_fd(&tcp->set, i), - &tcp->iov[0], &tcp->iov[1]); + if (tcp->tcp_req == NULL) { + // Previous tcp req is asynced and now we need a new request structure to process the request. + tcp->tcp_req = tcp->req_mgr->allocate_network_request_func(tcp->req_mgr); + + if (tcp->tcp_req == NULL) { + server_stats_increment_counter(server_stats_tcp_no_req_obj, 1); + return KNOT_EOK; // ignore processing now + } + } + + tcp->tcp_req->dns_req.req_data.fd = fdset_get_fd(&tcp->set, i); + int ret = tcp_handle(tcp, i); if (ret == KNOT_EOK) { /* Update socket activity timer. */ (void)fdset_set_watchdog(&tcp->set, i, tcp->idle_timeout); @@ -277,28 +261,118 @@ static void tcp_wait_for_events(tcp_context_t *tcp) unsigned int idx = fdset_it_get_idx(&it); if (fdset_it_is_error(&it)) { should_close = (idx >= tcp->client_threshold); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + } else if (idx == tcp->client_threshold - 1) { + // Async completion notification + server_stats_increment_counter(server_stats_tcp_async_done, 1); + handle_dns_request_async_completed_queries(&tcp->dns_handler); +#endif } else if (fdset_it_is_pollin(&it)) { /* Master sockets - new connection to accept. */ if (idx < tcp->client_threshold) { /* Don't accept more clients than configured. */ if (fdset_get_length(set) < tcp->max_worker_fds) { + server_stats_increment_counter(server_stats_tcp_accept, 1); tcp_event_accept(tcp, idx); } /* Client sockets - already accepted connection or closed connection :-( */ - } else if (tcp_event_serve(tcp, idx) != KNOT_EOK) { - should_close = true; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + } else if (fdset_get_ctx(&tcp->set, idx) != NULL) { + // Received another request before completing the current one, ignore for now + // Implement more async handling + server_stats_increment_counter(server_stats_tcp_multiple_req, 1); +#endif + } else { + server_stats_increment_counter(server_stats_tcp_received, 1); + if (tcp_event_serve(tcp, idx) != KNOT_EOK) { + should_close = true; + } } } /* Evaluate. */ if (should_close) { +#ifdef ENABLE_ASYNC_QUERY_HANDLING + network_dns_request_t *req = (network_dns_request_t *) fdset_it_get_ctx(&it); + if (req != NULL) { + dns_handler_cancel_request(req->dns_req); + } +#endif fdset_it_remove(&it); } } fdset_it_commit(&it); } +static int tcp_send_produced_result(dns_request_handler_context_t *dns_handler, dns_handler_request_t *req, size_t size) { + tcp_context_t *tcp = caa_container_of(dns_handler, tcp_context_t, dns_handler); + int sent = net_dns_tcp_send(req->req_data.fd, req->req_data.tx->iov_base, size, + tcp->io_timeout, NULL); + if (sent != size) { + tcp_log_error(&req->req_data.source_addr, "send", sent); + } + + return sent; +} + +#ifdef ENABLE_ASYNC_QUERY_HANDLING +static bool use_numa = false; +static bool tcp_use_async = false; +static atomic_shared_dns_request_manager_t tcp_shared_req_mgr[KNOT_MAX_NUMA]; +static size_t tcp_req_pool_size; + +/*! + * \brief Initialize tcp async. + * + * \param pool_size Request pool size. + * \param numa_enabled Indicates if numa available. + * + * \retval KNOT_EOK on success. + */ +int init_tcp_async(size_t pool_size, bool numa_enabled) { + for (int i = 0; i < KNOT_MAX_NUMA; i++) { + init_shared_req_mgr(tcp_shared_req_mgr[i]); + } + tcp_req_pool_size = pool_size; + tcp_use_async = true; + use_numa = numa_enabled; + return KNOT_EOK; +} + +static void tcp_async_query_completed_callback(dns_request_handler_context_t *net, dns_handler_request_t *req) { + tcp_context_t *tcp = caa_container_of(net, tcp_context_t, dns_handler); + network_dns_request_t *tcp_req = caa_container_of(req, network_dns_request_t, dns_req); + + if (!dns_handler_request_is_cancelled(tcp_req->dns_req)) { + bool err = false; + // Send the response + if (req->req_data.tx->iov_len > 0) { + int size = req->req_data.tx->iov_len; + int sent = net_dns_tcp_send(req->req_data.fd, req->req_data.tx->iov_base, size, + tcp->io_timeout, NULL); + if (sent != size) { + tcp_log_error(&req->req_data.source_addr, "send", sent); + err = true; + } + } + + // Cleanup async req from fd to allow fd receive more request + int idx = fdset_get_index_for_fd(&tcp->set, req->req_data.fd); + fdset_set_ctx(&tcp->set, idx, NULL); + + if (!err) { + fdset_set_watchdog(&tcp->set, idx, tcp->idle_timeout); + } + } + + // Free the request + tcp->req_mgr->free_network_request_func(tcp->req_mgr, tcp_req); +} +#endif + + + int tcp_master(dthread_t *thread) { if (thread == NULL || thread->data == NULL) { @@ -319,28 +393,52 @@ int tcp_master(dthread_t *thread) } #endif - int ret = KNOT_EOK; + _unused_ int numa_node = 0; +#ifdef KNOT_ENABLE_NUMA + if (use_numa) + { + unsigned cpu = dt_online_cpus(); + if (cpu > 1) { + unsigned cpu_mask = (dt_get_id(thread) % cpu); + dt_setaffinity(thread, &cpu_mask, 1); + int cpu_numa_node = numa_node_of_cpu(cpu_mask); + numa_node = cpu_numa_node % KNOT_MAX_NUMA; + log_info("TCP thread %d using numa %d, original %d", thread_id, numa_node, cpu_numa_node); + } + } +#endif - /* Create big enough memory cushion. */ - knot_mm_t mm; - mm_ctx_mempool(&mm, 16 * MM_DEFAULT_BLKSIZE); + int ret = KNOT_EOK; /* Create TCP answering context. */ - tcp_context_t tcp = { - .server = handler->server, - .is_throttled = false, - .thread_id = thread_id, - }; - knot_layer_init(&tcp.layer, &mm, process_query_layer()); - - /* Create iovec abstraction. */ - for (unsigned i = 0; i < 2; ++i) { - tcp.iov[i].iov_len = KNOT_WIRE_MAX_PKTSIZE; - tcp.iov[i].iov_base = malloc(tcp.iov[i].iov_len); - if (tcp.iov[i].iov_base == NULL) { - ret = KNOT_ENOMEM; - goto finish; - } + tcp_context_t tcp = {0}; + tcp.req_mgr = +#ifdef ENABLE_ASYNC_QUERY_HANDLING + tcp_use_async ? + network_dns_request_pool_manager_create(&tcp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, tcp_req_pool_size) : +#endif + network_dns_request_manager_basic_create(KNOT_WIRE_MAX_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE); + if (tcp.req_mgr == NULL) { + goto finish; + } + + ret = initialize_dns_handle( + &tcp.dns_handler, + handler->server, + thread_id, + 0, + tcp_send_produced_result +#ifdef ENABLE_ASYNC_QUERY_HANDLING + ,tcp_async_query_completed_callback +#endif + ); + if (ret != KNOT_EOK) { + goto finish; + } + + tcp.tcp_req = tcp.req_mgr->allocate_network_request_func(tcp.req_mgr); + if (tcp.tcp_req == NULL) { + goto finish; } /* Initialize sweep interval and TCP configuration. */ @@ -361,6 +459,15 @@ int tcp_master(dthread_t *thread) goto finish; /* Terminate on zero interfaces. */ } +#ifdef ENABLE_ASYNC_QUERY_HANDLING + tcp.async_fd = dns_request_handler_context_get_async_notify_handle(&tcp.dns_handler); + if (fdset_add(&tcp.set, tcp.async_fd, FDSET_POLLIN, NULL) < 0) { + goto finish; + } + + tcp.client_threshold++; +#endif + for (;;) { /* Check for cancellation. */ if (dt_is_cancelled(thread)) { @@ -372,16 +479,27 @@ int tcp_master(dthread_t *thread) /* Sweep inactive clients and refresh TCP configuration. */ if (tcp.last_poll_time.tv_sec >= next_sweep.tv_sec) { - fdset_sweep(&tcp.set, &tcp_sweep, NULL); + fdset_sweep(&tcp.set, &tcp_sweep, &tcp); update_sweep_timer(&next_sweep); update_tcp_conf(&tcp); } } finish: - free(tcp.iov[0].iov_base); - free(tcp.iov[1].iov_base); - mp_delete(mm.ctx); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + { + struct timespec five_sec = { 5, 0 }; + nanosleep(&five_sec, &five_sec); + } +#endif + + if (tcp.tcp_req != NULL) { + tcp.req_mgr->free_network_request_func(tcp.req_mgr, tcp.tcp_req); + } + if (tcp.req_mgr != NULL) { + tcp.req_mgr->delete_req_manager(tcp.req_mgr); + } + cleanup_dns_handle(&tcp.dns_handler); fdset_clear(&tcp.set); return ret; diff --git a/src/knot/server/tcp-handler.h b/src/knot/server/tcp-handler.h index b60ce8f1f7..ceecc0c573 100644 --- a/src/knot/server/tcp-handler.h +++ b/src/knot/server/tcp-handler.h @@ -28,6 +28,18 @@ #define TCP_BACKLOG_SIZE 10 /*!< TCP listen backlog size. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Initialize tcp async. + * + * \param pool_size Request pool size. + * \param numa_enabled Indicates if numa available. + * + * \retval KNOT_EOK on success. + */ +int init_tcp_async(size_t pool_size, bool numa_enabled); +#endif + /*! * \brief TCP handler thread runnable. * diff --git a/src/knot/server/udp-handler.c b/src/knot/server/udp-handler.c index 60b771c2eb..a417a4352d 100644 --- a/src/knot/server/udp-handler.c +++ b/src/knot/server/udp-handler.c @@ -38,91 +38,20 @@ #include "knot/nameserver/process_query.h" #include "knot/query/layer.h" #include "knot/server/server.h" +#include "knot/server/dns-handler.h" #include "knot/server/udp-handler.h" #include "knot/server/xdp-handler.h" - -/* Buffer identifiers. */ -enum { - RX = 0, - TX = 1, - NBUFS = 2 -}; +#include +#include "knot/server/network_req_manager.h" +#include "knot/common/stats.h" /*! \brief UDP context data. */ typedef struct { - knot_layer_t layer; /*!< Query processing layer. */ - server_t *server; /*!< Name server structure. */ - unsigned thread_id; /*!< Thread identifier. */ + dns_request_handler_context_t dns_handler; /*!< DNS Request handler context. */ + network_dns_request_manager_t *req_mgr; } udp_context_t; -static bool udp_state_active(int state) -{ - return (state == KNOT_STATE_PRODUCE || state == KNOT_STATE_FAIL); -} - -static void udp_handle(udp_context_t *udp, int fd, struct sockaddr_storage *ss, - struct iovec *rx, struct iovec *tx, struct knot_xdp_msg *xdp_msg) -{ - /* Create query processing parameter. */ - knotd_qdata_params_t params = { - .remote = ss, - .flags = KNOTD_QUERY_FLAG_NO_AXFR | KNOTD_QUERY_FLAG_NO_IXFR | /* No transfers. */ - KNOTD_QUERY_FLAG_LIMIT_SIZE, /* Enforce UDP packet size limit. */ - .socket = fd, - .server = udp->server, - .xdp_msg = xdp_msg, - .thread_id = udp->thread_id - }; - - /* Start query processing. */ - knot_layer_begin(&udp->layer, ¶ms); - - /* Create packets. */ - knot_pkt_t *query = knot_pkt_new(rx->iov_base, rx->iov_len, udp->layer.mm); - knot_pkt_t *ans = knot_pkt_new(tx->iov_base, tx->iov_len, udp->layer.mm); - - /* Input packet. */ - int ret = knot_pkt_parse(query, 0); - if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT) - query->parsed--; // artificially decreasing "parsed" leads to FORMERR - } - knot_layer_consume(&udp->layer, query); - - /* Process answer. */ - while (udp_state_active(udp->layer.state)) { - knot_layer_produce(&udp->layer, ans); - } - - /* Send response only if finished successfully. */ - if (udp->layer.state == KNOT_STATE_DONE) { - tx->iov_len = ans->size; - } else { - tx->iov_len = 0; - } - - /* Reset after processing. */ - knot_layer_finish(&udp->layer); - - /* Flush per-query memory (including query and answer packets). */ - mp_flush(udp->layer.mm->ctx); -} - -typedef struct { - void* (*udp_init)(void *); - void (*udp_deinit)(void *); - int (*udp_recv)(int, void *); - void (*udp_handle)(udp_context_t *, void *); - void (*udp_send)(void *); - void (*udp_sweep)(void *); // Optional -} udp_api_t; - -/*! \brief Control message to fit IP_PKTINFO or IPv6_RECVPKTINFO. */ -typedef union { - struct cmsghdr cmsg; - uint8_t buf[CMSG_SPACE(sizeof(struct in6_pktinfo))]; -} cmsg_pktinfo_t; - -static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx) +static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx, int fd, struct sockaddr_storage *target_addr) { tx->msg_controllen = rx->msg_controllen; if (tx->msg_controllen > 0) { @@ -135,6 +64,13 @@ static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx) #if defined(__linux__) || defined(__APPLE__) struct cmsghdr *cmsg = CMSG_FIRSTHDR(tx); if (cmsg == NULL) { + /* The socket is not bound to ANY addr. Get the socket ip. */ + socklen_t sock_len = sizeof(*target_addr); + if (getsockname(fd, (struct sockaddr *)target_addr, + &sock_len) != 0) { + /* Socket get failed. Cleanup the IP */ + memset(target_addr, 0, sizeof(*target_addr)); + } return; } @@ -143,47 +79,84 @@ static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx) struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg); info->ipi_spec_dst = info->ipi_addr; info->ipi_ifindex = 0; + struct sockaddr_in * target_socket_v4 = (struct sockaddr_in *)target_addr; + target_socket_v4->sin_family = AF_INET; + target_socket_v4->sin_port = -1; // TBD, if we need port later + target_socket_v4->sin_addr = info->ipi_addr; } else if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { struct in6_pktinfo *info = (struct in6_pktinfo *)CMSG_DATA(cmsg); info->ipi6_ifindex = 0; + struct sockaddr_in6 * target_socket_v6 = (struct sockaddr_in6 *)target_addr; + target_socket_v6->sin6_family = AF_INET6; + target_socket_v6->sin6_port = -1; // TBD, if we need port later + target_socket_v6->sin6_addr = info->ipi6_addr; } #endif } /* UDP recvfrom() request struct. */ struct udp_recvfrom { - int fd; - struct sockaddr_storage addr; + network_dns_request_t *udp_req; struct msghdr msg[NBUFS]; - struct iovec iov[NBUFS]; - uint8_t buf[NBUFS][KNOT_WIRE_MAX_PKTSIZE]; - cmsg_pktinfo_t pktinfo; + network_dns_request_manager_t *req_mgr; }; -static void *udp_recvfrom_init(_unused_ void *xdp_sock) +static inline void udp_set_msghdr_from_req(struct msghdr *msg, network_dns_request_t *req, int rxtx) { + msg->msg_name = &req->dns_req.req_data.source_addr; + msg->msg_namelen = sizeof(struct sockaddr_storage); + msg->msg_iov = &req->iov[rxtx]; + msg->msg_iovlen = 1; + msg->msg_control = &req->pktinfo.cmsg; + msg->msg_controllen = sizeof(cmsg_pktinfo_t); +} + +/*! + * \brief Sets the DNS request for msghdr. + * + * \param udp_recv udp_recvfrom context which needs to be udpated. + * \param req Request to be setup on msghdr. + */ +static void udp_recvfrom_set_request(struct udp_recvfrom *udp_recv, network_dns_request_t *req) { + udp_recv->udp_req = req; + for (unsigned i = 0; i < NBUFS; ++i) { + udp_set_msghdr_from_req(&udp_recv->msg[i], req, i); + } +} + +typedef struct { + void* (*udp_init)(void *, network_dns_request_manager_t *req_mgr); + void (*udp_deinit)(void *); + int (*udp_recv)(int, void *); + void (*udp_handle)(udp_context_t *, void *); + void (*udp_send)(void *); + void (*udp_sweep)(void *); // Optional +} udp_api_t; + + +static void *udp_recvfrom_init(_unused_ void *xdp_sock, network_dns_request_manager_t *req_mgr) { struct udp_recvfrom *rq = malloc(sizeof(struct udp_recvfrom)); if (rq == NULL) { return NULL; } memset(rq, 0, sizeof(struct udp_recvfrom)); + rq->req_mgr = req_mgr; - for (unsigned i = 0; i < NBUFS; ++i) { - rq->iov[i].iov_base = rq->buf + i; - rq->iov[i].iov_len = KNOT_WIRE_MAX_PKTSIZE; - rq->msg[i].msg_name = &rq->addr; - rq->msg[i].msg_namelen = sizeof(rq->addr); - rq->msg[i].msg_iov = &rq->iov[i]; - rq->msg[i].msg_iovlen = 1; - rq->msg[i].msg_control = &rq->pktinfo.cmsg; - rq->msg[i].msg_controllen = sizeof(rq->pktinfo); + network_dns_request_t *udp_req = req_mgr->allocate_network_request_func(req_mgr); + if (udp_req == NULL) { + free(rq); + return NULL; } + + udp_recvfrom_set_request(rq, udp_req); + return rq; } static void udp_recvfrom_deinit(void *d) { struct udp_recvfrom *rq = d; + rq->req_mgr->free_network_request_func(rq->req_mgr, rq->udp_req); free(rq); } @@ -191,14 +164,25 @@ static int udp_recvfrom_recv(int fd, void *d) { /* Reset max lengths. */ struct udp_recvfrom *rq = (struct udp_recvfrom *)d; - rq->iov[RX].iov_len = KNOT_WIRE_MAX_PKTSIZE; + if (rq->udp_req) { + // we are reusing the request, reset it + rq->req_mgr->restore_network_request_func(rq->req_mgr, rq->udp_req); + } + else { + rq->udp_req = rq->req_mgr->allocate_network_request_func(rq->req_mgr); + if (rq->udp_req == NULL) { + // We could not allocate a request + server_stats_increment_counter(server_stats_udp_no_req_obj, 1); + return 0; // Dont process incoming, let the async handler free a request. + } + } rq->msg[RX].msg_namelen = sizeof(struct sockaddr_storage); - rq->msg[RX].msg_controllen = sizeof(rq->pktinfo); + rq->msg[RX].msg_controllen = sizeof(cmsg_pktinfo_t); int ret = recvmsg(fd, &rq->msg[RX], MSG_DONTWAIT); if (ret > 0) { - rq->fd = fd; - rq->iov[RX].iov_len = ret; + rq->udp_req->dns_req.req_data.fd = fd; + rq->udp_req->iov[RX].iov_len = ret; return 1; } @@ -211,20 +195,33 @@ static void udp_recvfrom_handle(udp_context_t *ctx, void *d) /* Prepare TX address. */ rq->msg[TX].msg_namelen = rq->msg[RX].msg_namelen; - rq->iov[TX].iov_len = KNOT_WIRE_MAX_PKTSIZE; - udp_pktinfo_handle(&rq->msg[RX], &rq->msg[TX]); + udp_pktinfo_handle(&rq->msg[RX], &rq->msg[TX], rq->udp_req->dns_req.req_data.fd, &rq->udp_req->dns_req.req_data.target_addr); /* Process received pkt. */ - udp_handle(ctx, rq->fd, &rq->addr, &rq->iov[RX], &rq->iov[TX], NULL); + handle_dns_request(&ctx->dns_handler, &rq->udp_req->dns_req); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_handler_request_is_async(rq->udp_req->dns_req)) { + // Save udp source state + rq->udp_req->msg_namelen_received = rq->msg[RX].msg_namelen; + rq->udp_req->msg_controllen_received = rq->msg[RX].msg_controllen; + // release the request + rq->udp_req = NULL; + } +#endif + +} + +static void udp_send_single_response(network_dns_request_t *udp_req, struct msghdr *msghdr_tx) { + if (udp_req->iov[TX].iov_len > 0) { + (void)sendmsg(udp_req->dns_req.req_data.fd, msghdr_tx, 0); + } } static void udp_recvfrom_send(void *d) { struct udp_recvfrom *rq = d; - if (rq->iov[TX].iov_len > 0) { - (void)sendmsg(rq->fd, &rq->msg[TX], 0); - } + udp_send_single_response(rq->udp_req, &rq->msg[TX]); } _unused_ @@ -239,43 +236,48 @@ static udp_api_t udp_recvfrom_api = { #ifdef ENABLE_RECVMMSG /* UDP recvmmsg() request struct. */ struct udp_recvmmsg { - int fd; - struct sockaddr_storage addrs[RECVMMSG_BATCHLEN]; - char *iobuf[NBUFS]; - struct iovec *iov[NBUFS]; + network_dns_request_t *udp_reqs[RECVMMSG_BATCHLEN]; struct mmsghdr *msgs[NBUFS]; unsigned rcvd; - knot_mm_t mm; - cmsg_pktinfo_t pktinfo[RECVMMSG_BATCHLEN]; + network_dns_request_manager_t *req_mgr; + size_t udp_reqs_available; + bool udp_reqs_fully_allocated; + int in_progress_fd; }; -static void *udp_recvmmsg_init(_unused_ void *xdp_sock) -{ - knot_mm_t mm; - mm_ctx_mempool(&mm, sizeof(struct udp_recvmmsg)); +/*! + * \brief Sets the DNS request as req_index'th request in mmsghdr. + * + * \param rq udp_recvmmsg context which needs to be udpated. + * \param req_index index where req needs to be set in rq. + * \param req Request to be setup on mmsghdr. + */ +static void udp_recvmmsg_set_request(struct udp_recvmmsg *rq, unsigned req_index, network_dns_request_t *req) { + rq->udp_reqs[req_index] = req; + for (unsigned i = 0; i < NBUFS; ++i) { + udp_set_msghdr_from_req(&rq->msgs[i][req_index].msg_hdr, req, i); + } +} - struct udp_recvmmsg *rq = mm_alloc(&mm, sizeof(struct udp_recvmmsg)); + +static void *udp_recvmmsg_init(_unused_ void *xdp_sock, _unused_ network_dns_request_manager_t *req_mgr) +{ + struct udp_recvmmsg *rq = (struct udp_recvmmsg *) req_mgr->allocate_mem_func(req_mgr, sizeof(struct udp_recvmmsg)); memset(rq, 0, sizeof(*rq)); - memcpy(&rq->mm, &mm, sizeof(knot_mm_t)); + rq->req_mgr = req_mgr; /* Initialize buffers. */ for (unsigned i = 0; i < NBUFS; ++i) { - rq->iobuf[i] = mm_alloc(&mm, KNOT_WIRE_MAX_PKTSIZE * RECVMMSG_BATCHLEN); - rq->iov[i] = mm_alloc(&mm, sizeof(struct iovec) * RECVMMSG_BATCHLEN); - rq->msgs[i] = mm_alloc(&mm, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN); + rq->msgs[i] = req_mgr->allocate_mem_func(req_mgr, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN); memset(rq->msgs[i], 0, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN); - for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) { - rq->iov[i][k].iov_base = rq->iobuf[i] + k * KNOT_WIRE_MAX_PKTSIZE; - rq->iov[i][k].iov_len = KNOT_WIRE_MAX_PKTSIZE; - rq->msgs[i][k].msg_hdr.msg_iov = rq->iov[i] + k; - rq->msgs[i][k].msg_hdr.msg_iovlen = 1; - rq->msgs[i][k].msg_hdr.msg_name = rq->addrs + k; - rq->msgs[i][k].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); - rq->msgs[i][k].msg_hdr.msg_control = &rq->pktinfo[k].cmsg; - rq->msgs[i][k].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t); - } } + for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) { + udp_recvmmsg_set_request(rq, k, req_mgr->allocate_network_request_func(req_mgr)); + } + + rq->udp_reqs_available = RECVMMSG_BATCHLEN; + rq->udp_reqs_fully_allocated = true; return rq; } @@ -283,7 +285,70 @@ static void udp_recvmmsg_deinit(void *d) { struct udp_recvmmsg *rq = d; if (rq != NULL) { - mp_delete(rq->mm.ctx); + for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) { + if (rq->udp_reqs[k] != NULL) { + rq->req_mgr->free_network_request_func(rq->req_mgr, rq->udp_reqs[k]); + } + } + + if (rq->req_mgr != NULL) + { + for (unsigned i = 0; i < NBUFS; ++i) { + rq->req_mgr->free_mem_func(rq->req_mgr, rq->msgs[i]); + } + + rq->req_mgr->free_mem_func(rq->req_mgr, rq); + } + } +} + +/*! + * \brief If any request in mmsg is null, this function tries to allocate the req for NULL. + * If allocation fails, it packs the request to have the first N allocated and updates udp_reqs_available. + * + * \param rq udp_recvmmsg context which needs to be udpated. + */ +static void allocate_or_pack_udp_req(struct udp_recvmmsg *rq) { + int last_non_null_index = RECVMMSG_BATCHLEN - 1; + for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) { + if (rq->udp_reqs[k] == NULL) { + // try to allocate + network_dns_request_t *new_req = rq->req_mgr->allocate_network_request_func(rq->req_mgr); + if (new_req != NULL) { + udp_recvmmsg_set_request(rq, k, new_req); + } else { + // allocation failed. Move something from end to here. + + while (last_non_null_index > k && rq->udp_reqs[last_non_null_index] == NULL) { + last_non_null_index--; + } + + if (last_non_null_index > k) { + // found non-null after current, move it to current + udp_recvmmsg_set_request(rq, k, rq->udp_reqs[last_non_null_index]); + rq->udp_reqs[last_non_null_index] = NULL; + } else { + break; + } + } + } + } + + // At this point, the requests are packed or fully allocated + int reqs_allocated = RECVMMSG_BATCHLEN; + while (reqs_allocated > 0 && rq->udp_reqs[reqs_allocated - 1] == NULL) { + reqs_allocated--; + } + + rq->udp_reqs_available = reqs_allocated; + rq->udp_reqs_fully_allocated = (reqs_allocated == RECVMMSG_BATCHLEN); + + if (!rq->udp_reqs_fully_allocated) { + server_stats_increment_counter(server_stats_udp_req_batch_limited, 1); + } + + if (reqs_allocated == 0) { + server_stats_increment_counter(server_stats_udp_no_req_obj, 1); } } @@ -291,10 +356,17 @@ static int udp_recvmmsg_recv(int fd, void *d) { struct udp_recvmmsg *rq = d; - int n = recvmmsg(fd, rq->msgs[RX], RECVMMSG_BATCHLEN, MSG_DONTWAIT, NULL); + if (!rq->udp_reqs_fully_allocated) { + allocate_or_pack_udp_req(rq); + } + + int n = recvmmsg(fd, rq->msgs[RX], rq->udp_reqs_available, MSG_DONTWAIT, NULL); if (n > 0) { - rq->fd = fd; + for (int i = 0; i < n; i++) { + rq->udp_reqs[i]->dns_req.req_data.fd = fd; + } rq->rcvd = n; + rq->in_progress_fd = fd; } return n; } @@ -306,12 +378,27 @@ static void udp_recvmmsg_handle(udp_context_t *ctx, void *d) /* Handle each received msg. */ for (unsigned i = 0; i < rq->rcvd; ++i) { struct iovec *rx = rq->msgs[RX][i].msg_hdr.msg_iov; - struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov; rx->iov_len = rq->msgs[RX][i].msg_len; /* Received bytes. */ - udp_pktinfo_handle(&rq->msgs[RX][i].msg_hdr, &rq->msgs[TX][i].msg_hdr); + udp_pktinfo_handle(&rq->msgs[RX][i].msg_hdr, &rq->msgs[TX][i].msg_hdr, rq->udp_reqs[i]->dns_req.req_data.fd, &rq->udp_reqs[i]->dns_req.req_data.target_addr); + + handle_dns_request(&ctx->dns_handler, &rq->udp_reqs[i]->dns_req); + } - udp_handle(ctx, rq->fd, rq->addrs + i, rx, tx, NULL); + /* Setup response for each received msg. */ + for (unsigned i = 0; i < rq->rcvd; ++i) { + struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (dns_handler_request_is_async(rq->udp_reqs[i]->dns_req)) { + // Save udp source state + rq->udp_reqs[i]->msg_namelen_received = rq->msgs[RX][i].msg_hdr.msg_namelen; + rq->udp_reqs[i]->msg_controllen_received = rq->msgs[RX][i].msg_hdr.msg_controllen; + + tx->iov_len = 0; // asynced request have nothing to send + rq->udp_reqs[i] = NULL; + rq->udp_reqs_fully_allocated = false; + } +#endif rq->msgs[TX][i].msg_len = tx->iov_len; rq->msgs[TX][i].msg_hdr.msg_namelen = 0; if (tx->iov_len > 0) { @@ -324,18 +411,17 @@ static void udp_recvmmsg_handle(udp_context_t *ctx, void *d) static void udp_recvmmsg_send(void *d) { struct udp_recvmmsg *rq = d; - (void)sendmmsg(rq->fd, rq->msgs[TX], rq->rcvd, 0); + (void)sendmmsg(rq->in_progress_fd, rq->msgs[TX], rq->rcvd, 0); for (unsigned i = 0; i < rq->rcvd; ++i) { - /* Reset buffer size and address len. */ - struct iovec *rx = rq->msgs[RX][i].msg_hdr.msg_iov; - struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov; - rx->iov_len = KNOT_WIRE_MAX_PKTSIZE; /* Reset RX buflen */ - tx->iov_len = KNOT_WIRE_MAX_PKTSIZE; - - memset(rq->addrs + i, 0, sizeof(struct sockaddr_storage)); - rq->msgs[RX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); - rq->msgs[TX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); - rq->msgs[RX][i].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t); + if (rq->udp_reqs[i] != NULL) { + /* Reset buffer size and address len. */ + rq->req_mgr->restore_network_request_func(rq->req_mgr, rq->udp_reqs[i]); + + memset(&rq->udp_reqs[i]->dns_req.req_data.source_addr, 0, sizeof(struct sockaddr_storage)); + rq->msgs[RX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); + rq->msgs[TX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage); + rq->msgs[RX][i].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t); + } } } @@ -350,7 +436,7 @@ static udp_api_t udp_recvmmsg_api = { #ifdef ENABLE_XDP -static void *xdp_recvmmsg_init(void *xdp_sock) +static void *xdp_recvmmsg_init(void *xdp_sock, _unused_ network_dns_request_manager_t *req_mgr) { return xdp_handle_init(xdp_sock); } @@ -367,7 +453,7 @@ static int xdp_recvmmsg_recv(_unused_ int fd, void *d) static void xdp_recvmmsg_handle(udp_context_t *ctx, void *d) { - xdp_handle_msgs(d, &ctx->layer, ctx->server, ctx->thread_id); + xdp_handle_msgs(d, &ctx->dns_handler.layer, ctx->dns_handler.server, ctx->dns_handler.thread_id); } static void xdp_recvmmsg_send(void *d) @@ -453,6 +539,47 @@ static unsigned udp_set_ifaces(const server_t *server, size_t n_ifaces, fdset_t return fdset_get_length(fds); } +#ifdef ENABLE_ASYNC_QUERY_HANDLING +static bool use_numa = false; +static bool udp_use_async = false; +static atomic_shared_dns_request_manager_t udp_shared_req_mgr[KNOT_MAX_NUMA]; +static size_t udp_req_pool_size; + +/*! + * \brief Initialize udp async. + * + * \param pool_size Request pool size. + * \param numa_enabled Indicates if numa available. + * + * \retval KNOT_EOK on success. + */ +int init_udp_async(size_t pool_size, bool numa_enabled) { + for (int i = 0; i < KNOT_MAX_NUMA; i++) { + init_shared_req_mgr(udp_shared_req_mgr[i]); + } + udp_req_pool_size = pool_size; + udp_use_async = true; + use_numa = numa_enabled; + return KNOT_EOK; +} + +static void udp_async_query_completed_callback(dns_request_handler_context_t *net, dns_handler_request_t *req) { + udp_context_t *udp = caa_container_of(net, udp_context_t, dns_handler); + network_dns_request_t *udp_req = caa_container_of(req, network_dns_request_t, dns_req); + + // Prepare response and send it + struct msghdr txmsg; + udp_set_msghdr_from_req(&txmsg, udp_req, TX); + txmsg.msg_namelen = udp_req->msg_namelen_received; + txmsg.msg_controllen = udp_req->msg_controllen_received; + txmsg.msg_control = (txmsg.msg_controllen != 0) ? &udp_req->pktinfo.cmsg : NULL; + udp_send_single_response(udp_req, &txmsg); + + // Free the request + udp->req_mgr->free_network_request_func(udp->req_mgr, udp_req); +} +#endif + int udp_master(dthread_t *thread) { if (thread == NULL || thread->data == NULL) { @@ -466,13 +593,25 @@ int udp_master(dthread_t *thread) return KNOT_EOK; } + _unused_ int numa_node = 0; /* Set thread affinity to CPU core (same for UDP and XDP). */ unsigned cpu = dt_online_cpus(); if (cpu > 1) { unsigned cpu_mask = (dt_get_id(thread) % cpu); dt_setaffinity(thread, &cpu_mask, 1); +#ifdef KNOT_ENABLE_NUMA + if (use_numa) + { + int cpu_numa_node = numa_node_of_cpu(cpu_mask); + numa_node = cpu_numa_node % KNOT_MAX_NUMA; + log_info("UDP thread %d using numa %d, original %d", thread_id, numa_node, cpu_numa_node); + } +#endif } + /* Create UDP answering context. */ + udp_context_t udp = {0}; + /* Choose processing API. */ udp_api_t *api = NULL; if (is_xdp_thread(handler->server, thread_id)) { @@ -484,28 +623,51 @@ int udp_master(dthread_t *thread) } else { #ifdef ENABLE_RECVMMSG api = &udp_recvmmsg_api; + udp.req_mgr = +#ifdef ENABLE_ASYNC_QUERY_HANDLING + udp_use_async ? + network_dns_request_pool_manager_create(&udp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, udp_req_pool_size) : +#endif + network_dns_request_manager_knot_mm_create(KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE); #else api = &udp_recvfrom_api; + udp.req_mgr = +#ifdef ENABLE_ASYNC_QUERY_HANDLING + udp_use_async ? + network_dns_request_pool_manager_create(&udp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, udp_req_pool_size): +#endif + network_dns_request_manager_basic_create(KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE); #endif } + assert(udp.req_mgr != NULL); void *api_ctx = NULL; - /* Create big enough memory cushion. */ - knot_mm_t mm; - mm_ctx_mempool(&mm, 16 * MM_DEFAULT_BLKSIZE); - - /* Create UDP answering context. */ - udp_context_t udp = { - .server = handler->server, - .thread_id = thread_id, - }; - knot_layer_init(&udp.layer, &mm, process_query_layer()); + /* Initialize UDP answering context. */ + if ( initialize_dns_handle( + &udp.dns_handler, + handler->server, + thread_id, + KNOTD_QUERY_FLAG_NO_AXFR | KNOTD_QUERY_FLAG_NO_IXFR | /* No transfers. */ + KNOTD_QUERY_FLAG_LIMIT_SIZE, /* Enforce UDP packet size limit. */ + NULL +#ifdef ENABLE_ASYNC_QUERY_HANDLING + ,udp_async_query_completed_callback +#endif + ) != KNOT_EOK) { + goto finish; + } /* Allocate descriptors for the configured interfaces. */ void *xdp_socket = NULL; size_t nifs = handler->server->n_ifaces; + size_t fds_size = handler->server->n_ifaces; +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (udp_use_async) { + fds_size++; + } +#endif fdset_t fds; - if (fdset_init(&fds, nifs) != KNOT_EOK) { + if (fdset_init(&fds, fds_size) != KNOT_EOK) { goto finish; } unsigned nfds = udp_set_ifaces(handler->server, nifs, &fds, @@ -514,8 +676,15 @@ int udp_master(dthread_t *thread) goto finish; } +#ifdef ENABLE_ASYNC_QUERY_HANDLING + int async_completed_notification = dns_request_handler_context_get_async_notify_handle(&udp.dns_handler); + if (fdset_add(&fds, async_completed_notification, FDSET_POLLIN, NULL) < 0) { + goto finish; + } +#endif + /* Initialize the networking API. */ - api_ctx = api->udp_init(xdp_socket); + api_ctx = api->udp_init(xdp_socket, udp.req_mgr); if (api_ctx == NULL) { goto finish; } @@ -536,9 +705,20 @@ int udp_master(dthread_t *thread) if (!fdset_it_is_pollin(&it)) { continue; } - if (api->udp_recv(fdset_it_get_fd(&it), api_ctx) > 0) { - api->udp_handle(&udp, api_ctx); - api->udp_send(api_ctx); + int ready_handle = fdset_it_get_fd(&it); +#ifdef ENABLE_ASYNC_QUERY_HANDLING + if (ready_handle == async_completed_notification) { + server_stats_increment_counter(server_stats_udp_async_done, 1); + handle_dns_request_async_completed_queries(&udp.dns_handler); + } + else +#endif + { + server_stats_increment_counter(server_stats_udp_received, 1); + if (api->udp_recv(ready_handle, api_ctx) > 0) { + api->udp_handle(&udp, api_ctx); + api->udp_send(api_ctx); + } } } @@ -549,8 +729,18 @@ int udp_master(dthread_t *thread) } finish: +#ifdef ENABLE_ASYNC_QUERY_HANDLING + { + struct timespec five_sec = { 5, 0 }; + nanosleep(&five_sec, &five_sec); + } +#endif + + cleanup_dns_handle(&udp.dns_handler); api->udp_deinit(api_ctx); - mp_delete(mm.ctx); + if (udp.req_mgr) { + udp.req_mgr->delete_req_manager(udp.req_mgr); + } fdset_clear(&fds); return KNOT_EOK; diff --git a/src/knot/server/udp-handler.h b/src/knot/server/udp-handler.h index b09e43e14f..4d4ff3129f 100644 --- a/src/knot/server/udp-handler.h +++ b/src/knot/server/udp-handler.h @@ -28,6 +28,18 @@ #define RECVMMSG_BATCHLEN 10 /*!< Default recvmmsg() batch size. */ +#ifdef ENABLE_ASYNC_QUERY_HANDLING +/*! + * \brief Initialize udp async. + * + * \param pool_size Request pool size. + * \param numa_enabled Indicates if numa available. + * + * \retval KNOT_EOK on success. + */ +int init_udp_async(size_t pool_size, bool numa_enabled); +#endif + /*! * \brief UDP handler thread runnable. * diff --git a/src/knot/zone/node.c b/src/knot/zone/node.c index 291454bd76..f149a644e1 100644 --- a/src/knot/zone/node.c +++ b/src/knot/zone/node.c @@ -17,14 +17,19 @@ #include "knot/zone/node.h" #include "libknot/libknot.h" -void additional_clear(additional_t *additional) +void additional_clear_mm(additional_t *additional, knot_mm_t *mm) { if (additional == NULL) { return; } - free(additional->glues); - free(additional); + mm_free(mm, additional->glues); + mm_free(mm, additional); +} + +void additional_clear(additional_t *additional) +{ + additional_clear_mm(additional, NULL); } bool additional_equal(additional_t *a, additional_t *b) @@ -265,14 +270,14 @@ bool binode_additionals_unchanged(zone_node_t *node, zone_node_t *counterpart) return true; } -void node_free_rrsets(zone_node_t *node, knot_mm_t *mm) +void node_free_rrsets_mm(zone_node_t *node, knot_mm_t *mm, bool use_mm_for_additional) { if (node == NULL) { return; } for (uint16_t i = 0; i < node->rrset_count; ++i) { - additional_clear(node->rrs[i].additional); + additional_clear_mm(node->rrs[i].additional, use_mm_for_additional ? mm : NULL); rr_data_clear(&node->rrs[i], mm); } @@ -281,6 +286,11 @@ void node_free_rrsets(zone_node_t *node, knot_mm_t *mm) node->rrset_count = 0; } +void node_free_rrsets(zone_node_t *node, knot_mm_t *mm) +{ + node_free_rrsets_mm(node, mm, false); +} + void node_free(zone_node_t *node, knot_mm_t *mm) { if (node == NULL) { @@ -335,7 +345,7 @@ int node_add_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm) return add_rrset_no_merge(node, rrset, mm); } -void node_remove_rdataset(zone_node_t *node, uint16_t type) +void node_remove_rdataset_mm(zone_node_t *node, uint16_t type, knot_mm_t *mm) { if (node == NULL) { return; @@ -346,10 +356,10 @@ void node_remove_rdataset(zone_node_t *node, uint16_t type) for (int i = 0; i < node->rrset_count; ++i) { if (node->rrs[i].type == type) { if (!binode_additional_shared(node, type)) { - additional_clear(node->rrs[i].additional); + additional_clear_mm(node->rrs[i].additional, mm); } if (!binode_rdata_shared(node, type)) { - rr_data_clear(&node->rrs[i], NULL); + rr_data_clear(&node->rrs[i], mm); } memmove(node->rrs + i, node->rrs + i + 1, (node->rrset_count - i - 1) * sizeof(struct rr_data)); @@ -359,6 +369,27 @@ void node_remove_rdataset(zone_node_t *node, uint16_t type) } } +void node_remove_rdataset(zone_node_t *node, uint16_t type) +{ + node_remove_rdataset_mm(node, type, NULL); +} + +int node_add_rrset_additional(zone_node_t *node, uint16_t type, additional_t *additional) +{ + if (node == NULL) { + return KNOT_EINVAL; + } + + for (uint16_t i = 0; i < node->rrset_count; ++i) { + if (node->rrs[i].type == type) { + node->rrs[i].additional = additional; + return KNOT_EOK; + } + } + + return KNOT_EINVAL; +} + int node_remove_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm) { if (node == NULL || rrset == NULL) { diff --git a/src/knot/zone/node.h b/src/knot/zone/node.h index d30cc6e1c4..fe1934e865 100644 --- a/src/knot/zone/node.h +++ b/src/knot/zone/node.h @@ -109,6 +109,14 @@ enum node_flags { typedef void (*node_addrem_cb)(zone_node_t *, void *); typedef zone_node_t *(*node_new_cb)(const knot_dname_t *, void *); +/*! + * \brief Clears additional structure. + * + * \param additional Additional to clear. + * \param mm Memory context to use. + */ +void additional_clear_mm(additional_t *additional, knot_mm_t *mm); + /*! * \brief Clears additional structure. * @@ -199,6 +207,16 @@ bool binode_additional_shared(zone_node_t *node, uint16_t type); */ bool binode_additionals_unchanged(zone_node_t *node, zone_node_t *counterpart); +/*! + * \brief Destroys allocated data within the node + * structure, but not the node itself. + * + * \param node Node that contains data to be destroyed. + * \param mm Memory context to use. + * \param use_mm_for_additional Use mm to free the additional data + */ +void node_free_rrsets_mm(zone_node_t *node, knot_mm_t *mm, bool use_mm_for_additional); + /*! * \brief Destroys allocated data within the node * structure, but not the node itself. @@ -231,6 +249,26 @@ void node_free(zone_node_t *node, knot_mm_t *mm); */ int node_add_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm); +/*! + * \brief Adds an additional to the node. Just pointer is saved + * + * \param node Node to add the RRSet to. + * \param type type of the RR to which the additional has to be added. + * \param additional Additional data to be added . + * + * \return KNOT_E* + */ +int node_add_rrset_additional(zone_node_t *node, uint16_t type, additional_t *additional); + +/*! + * \brief Removes data for given RR type from node. + * + * \param node Node we want to delete from. + * \param type RR type to delete. + * \param mm Memory context to use. + */ +void node_remove_rdataset_mm(zone_node_t *node, uint16_t type, knot_mm_t *mm); + /*! * \brief Removes data for given RR type from node. * diff --git a/src/knot/zone/zone.c b/src/knot/zone/zone.c index a827eb0e34..09be1b36a3 100644 --- a/src/knot/zone/zone.c +++ b/src/knot/zone/zone.c @@ -155,15 +155,15 @@ flush_journal_replan: return ret; } -zone_t* zone_new(const knot_dname_t *name) +zone_t* zone_new_mm(const knot_dname_t *name, knot_mm_t *mm) { - zone_t *zone = malloc(sizeof(zone_t)); + zone_t *zone = mm_alloc(mm, sizeof(zone_t)); if (zone == NULL) { return NULL; } memset(zone, 0, sizeof(zone_t)); - zone->name = knot_dname_copy(name, NULL); + zone->name = knot_dname_copy(name, mm); if (zone->name == NULL) { free(zone); return NULL; @@ -188,6 +188,11 @@ zone_t* zone_new(const knot_dname_t *name) return zone; } +zone_t* zone_new(const knot_dname_t *name) +{ + return zone_new_mm(name, NULL); +} + void zone_control_clear(zone_t *zone) { if (zone == NULL) { @@ -199,7 +204,7 @@ void zone_control_clear(zone_t *zone) zone->control_update = NULL; } -void zone_free(zone_t **zone_ptr) +void zone_free_mm(zone_t **zone_ptr, knot_mm_t *mm) { if (zone_ptr == NULL || *zone_ptr == NULL) { return; @@ -209,7 +214,7 @@ void zone_free(zone_t **zone_ptr) zone_events_deinit(zone); - knot_dname_free(zone->name, NULL); + knot_dname_free(zone->name, mm); free_ddns_queue(zone); pthread_mutex_destroy(&zone->ddns_lock); @@ -231,10 +236,15 @@ void zone_free(zone_t **zone_ptr) conf_deactivate_modules(&zone->query_modules, &zone->query_plan); - free(zone); + mm_free(mm, zone); *zone_ptr = NULL; } +void zone_free(zone_t **zone_ptr) +{ + zone_free_mm(zone_ptr, NULL); +} + void zone_reset(conf_t *conf, zone_t *zone) { if (zone == NULL) { diff --git a/src/knot/zone/zone.h b/src/knot/zone/zone.h index 6696efa471..bca86af5f3 100644 --- a/src/knot/zone/zone.h +++ b/src/knot/zone/zone.h @@ -25,6 +25,7 @@ #include "knot/updates/changesets.h" #include "knot/zone/contents.h" #include "knot/zone/timers.h" +#include "knot/dnssec/zone-keys.h" #include "libknot/dname.h" #include "libknot/packet/pkt.h" @@ -42,6 +43,8 @@ typedef enum { ZONE_FORCE_ZSK_ROLL = 1 << 4, /*!< Force ZSK rollover. */ ZONE_IS_CATALOG = 1 << 5, /*!< This is a catalog. */ ZONE_IS_CAT_MEMBER = 1 << 6, /*!< This zone exists according to a catalog. */ + ZONE_DNSSEC_ENABLED = 1 << 14, /*!< Dnssec is enabled for this zone. */ + ZONE_EPHEMERAL = 1 << 15,/*!< Ephemeral zone which is not persisted after query processing */ } zone_flag_t; /*! @@ -104,11 +107,23 @@ typedef struct zone /*! \brief Preferred master for remote operation. */ struct sockaddr_storage *preferred_master; + /*! \brief Zone signing context and keys. (for DNSSEC onlinesign) */ + zone_sign_ctx_t *sign_ctx; + /*! \brief Query modules. */ list_t query_modules; struct query_plan *query_plan; } zone_t; +/*!--- + * \brief Creates new zone with emtpy zone content and marks it as ephemeral. If mm is passed, allocates the zone using mm + * + * \param name Zone name. + * + * \return The initialized zone structure or NULL if an error occurred. + */ +zone_t* zone_new_mm(const knot_dname_t *name, knot_mm_t *mm); + /*! * \brief Creates new zone with emtpy zone content. * @@ -118,6 +133,15 @@ typedef struct zone */ zone_t* zone_new(const knot_dname_t *name); +/*! + * \brief Deallocates the zone structure created with zone_new_mm. + * + * \note The function also deallocates all bound structures (contents, etc.). + * + * \param zone_ptr Zone to be freed. + */ +void zone_free_mm(zone_t **zone_ptr, knot_mm_t *mm); + /*! * \brief Deallocates the zone structure. * @@ -217,4 +241,4 @@ int zone_set_lastsigned_serial(zone_t *zone, uint32_t serial); int zone_get_lastsigned_serial(zone_t *zone, uint32_t *serial); -int slave_zone_serial(zone_t *zone, conf_t *conf, uint32_t *serial); +int slave_zone_serial(zone_t *zone, conf_t *conf, uint32_t *serial); \ No newline at end of file diff --git a/src/libdnssec/key.h b/src/libdnssec/key.h index e113028c13..50269e77e6 100644 --- a/src/libdnssec/key.h +++ b/src/libdnssec/key.h @@ -215,6 +215,17 @@ int dnssec_key_set_rdata(dnssec_key_t *key, const dnssec_binary_t *rdata); */ int dnssec_key_load_pkcs8(dnssec_key_t *key, const dnssec_binary_t *pem); +/*! + * Load PKCS #8 private key in the DER format. + * + * At least an algorithm must be set prior to calling this function. + * + * The function will create public key, unless it was already set (using + * \ref dnssec_key_set_pubkey or \ref dnssec_key_set_rdata). If the public key + * was set, the function will prevent loading of non-matching private key. + */ +int dnssec_key_load_pkcs8_der(dnssec_key_t *key, const dnssec_binary_t *der); + /*! * Check if the key can be used for signing. */ diff --git a/src/libdnssec/key/simple.c b/src/libdnssec/key/simple.c index 10126ccf36..e2695208b8 100644 --- a/src/libdnssec/key/simple.c +++ b/src/libdnssec/key/simple.c @@ -53,3 +53,31 @@ int dnssec_key_load_pkcs8(dnssec_key_t *key, const dnssec_binary_t *pem) return DNSSEC_EOK; } + +/* -- public API ----------------------------------------------------------- */ + +_public_ +int dnssec_key_load_pkcs8_der(dnssec_key_t *key, const dnssec_binary_t *der) +{ + if (!key || !der || !der->data) { + return DNSSEC_EINVAL; + } + + if (dnssec_key_get_algorithm(key) == 0) { + return DNSSEC_INVALID_KEY_ALGORITHM; + } + + gnutls_privkey_t privkey = NULL; + int r = dnssec_der_to_privkey(der, &privkey); + if (r != DNSSEC_EOK) { + return r; + } + + r = key_set_private_key(key, privkey); + if (r != DNSSEC_EOK) { + gnutls_privkey_deinit(privkey); + return r; + } + + return DNSSEC_EOK; +} diff --git a/src/libdnssec/pem.c b/src/libdnssec/pem.c index fa463f69d8..c2f8a68116 100644 --- a/src/libdnssec/pem.c +++ b/src/libdnssec/pem.c @@ -54,6 +54,49 @@ int dnssec_pem_to_x509(const dnssec_binary_t *pem, gnutls_x509_privkey_t *key) return DNSSEC_EOK; } + +static int dnssec_der_to_x509(const dnssec_binary_t *der, gnutls_x509_privkey_t *key) +{ + if (!der || !key) { + return DNSSEC_EINVAL; + } + + gnutls_datum_t data = binary_to_datum(der); + + gnutls_datum_t result; + + int r = gnutls_base64_decode2(&data, &result); + + // handle r + if (r != GNUTLS_E_SUCCESS) { + return DNSSEC_ENOMEM; + } + + + gnutls_x509_privkey_t _key = NULL; + r = gnutls_x509_privkey_init(&_key); + if (r != GNUTLS_E_SUCCESS) { + gnutls_free(result.data); + return DNSSEC_ENOMEM; + } + + int format = GNUTLS_X509_FMT_DER; + char *password = NULL; + int flags = GNUTLS_PKCS_PLAIN; + r = gnutls_x509_privkey_import_pkcs8(_key, &result, format, password, flags); + if (r != GNUTLS_E_SUCCESS) { + gnutls_x509_privkey_deinit(_key); + gnutls_free(result.data); + return DNSSEC_PKCS8_IMPORT_ERROR; + } + + gnutls_free(result.data); + + *key = _key; + + return DNSSEC_EOK; +} + _public_ int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key) { @@ -87,6 +130,39 @@ int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key) return DNSSEC_EOK; } +_public_ +int dnssec_der_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key) +{ + if (!pem || !key) { + return DNSSEC_EINVAL; + } + + gnutls_x509_privkey_t key_x509 = NULL; + int r = dnssec_der_to_x509(pem, &key_x509); + if (r != DNSSEC_EOK) { + return r; + } + + gnutls_privkey_t key_abs = NULL; + r = gnutls_privkey_init(&key_abs); + if (r != GNUTLS_E_SUCCESS) { + gnutls_x509_privkey_deinit(key_x509); + return DNSSEC_ENOMEM; + } + + int flags = GNUTLS_PRIVKEY_IMPORT_AUTO_RELEASE; + r = gnutls_privkey_import_x509(key_abs, key_x509, flags); + if (r != GNUTLS_E_SUCCESS) { + gnutls_x509_privkey_deinit(key_x509); + gnutls_privkey_deinit(key_abs); + return DNSSEC_ENOMEM; + } + + *key = key_abs; + + return DNSSEC_EOK; +} + static int try_export_pem(gnutls_x509_privkey_t key, dnssec_binary_t *pem) { assert(key); diff --git a/src/libdnssec/pem.h b/src/libdnssec/pem.h index c84d87dba4..2c478dd3a5 100644 --- a/src/libdnssec/pem.h +++ b/src/libdnssec/pem.h @@ -50,6 +50,16 @@ int dnssec_pem_to_x509(const dnssec_binary_t *pem, gnutls_x509_privkey_t *key); */ int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key); +/*! + * Create GnuTLS private key from unencrypted PEM data. + * + * \param[in] pem PEM binary data. + * \param[out] key Resulting private key. + * + * \return Error code, DNSSEC_EOK if successful. + */ +int dnssec_der_to_privkey(const dnssec_binary_t *der, gnutls_privkey_t *key); + /*! * Export GnuTLS X.509 private key to PEM binary. * diff --git a/src/libknot/packet/pkt.c b/src/libknot/packet/pkt.c index 381a8dee6e..f4133c1929 100644 --- a/src/libknot/packet/pkt.c +++ b/src/libknot/packet/pkt.c @@ -745,7 +745,17 @@ static int parse_payload(knot_pkt_t *pkt, unsigned flags) /* Check for trailing garbage. */ if (pkt->parsed < pkt->size) { +#ifdef ENABLE_TRAILING_BYTES + for (size_t trail = pkt->parsed; trail < pkt->size; trail++) { + if (pkt->wire[trail] != 0) { + return KNOT_ETRAIL; + } + } + + pkt->size = pkt->parsed; +#else return KNOT_ETRAIL; +#endif } return KNOT_EOK; diff --git a/src/libknot/packet/wire.h b/src/libknot/packet/wire.h index 698ac3dbfe..09cea85445 100644 --- a/src/libknot/packet/wire.h +++ b/src/libknot/packet/wire.h @@ -48,6 +48,7 @@ enum knot_wire_sizes { KNOT_WIRE_QUESTION_MIN_SIZE = 5, KNOT_WIRE_RR_MIN_SIZE = 11, KNOT_WIRE_MIN_PKTSIZE = 512, + KNOT_WIRE_MAX_UDP_PKTSIZE = KNOT_MAX_UDP_REQRESP_SIZE_BYTES, KNOT_WIRE_MAX_PKTSIZE = 65535, KNOT_WIRE_MAX_PAYLOAD = KNOT_WIRE_MAX_PKTSIZE - KNOT_WIRE_HEADER_SIZE diff --git a/src/libknot/rrset.c b/src/libknot/rrset.c index 110422f334..c7b0d2789c 100644 --- a/src/libknot/rrset.c +++ b/src/libknot/rrset.c @@ -146,18 +146,28 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset) /* Convert owner for all RRSets. */ knot_dname_to_lower(rrset->owner); + return knot_rdata_to_canonical(rrset->rrs.rdata->data, rrset->rrs.rdata->len, rrset->type); +} + +_public_ +int knot_rdata_to_canonical(uint8_t *data, uint16_t len, uint16_t type) +{ + if (data == NULL) { + return KNOT_EINVAL; + } + /* Convert DNAMEs in RDATA only for RFC4034 types. */ - if (!knot_rrtype_should_be_lowercased(rrset->type)) { + if (!knot_rrtype_should_be_lowercased(type)) { return KNOT_EOK; } - const knot_rdata_descriptor_t *desc = knot_get_rdata_descriptor(rrset->type); + const knot_rdata_descriptor_t *desc = knot_get_rdata_descriptor(type); if (desc->type_name == NULL) { - desc = knot_get_obsolete_rdata_descriptor(rrset->type); + desc = knot_get_obsolete_rdata_descriptor(type); } - uint16_t rdlen = rrset->rrs.rdata->len; - uint8_t *pos = rrset->rrs.rdata->data; + uint16_t rdlen = len; + uint8_t *pos = data; uint8_t *endpos = pos + rdlen; /* No RDATA */ @@ -167,8 +177,8 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset) /* Otherwise, whole and not malformed RDATA are expected. */ for (int i = 0; desc->block_types[i] != KNOT_RDATA_WF_END; ++i) { - int type = desc->block_types[i]; - switch (type) { + int block_type = desc->block_types[i]; + switch (block_type) { case KNOT_RDATA_WF_COMPRESSIBLE_DNAME: case KNOT_RDATA_WF_DECOMPRESSIBLE_DNAME: case KNOT_RDATA_WF_FIXED_DNAME: @@ -187,8 +197,8 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset) break; default: /* Fixed size block */ - assert(type > 0); - pos += type; + assert(block_type > 0); + pos += block_type; } } diff --git a/src/libknot/rrset.h b/src/libknot/rrset.h index fdc57196a2..5b0d0297dd 100644 --- a/src/libknot/rrset.h +++ b/src/libknot/rrset.h @@ -184,6 +184,22 @@ bool knot_rrset_is_nsec3rel(const knot_rrset_t *rr); */ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset); +/*! + * \brief Convert one Rdata into canonical format. + * + * RDATA domain names are converted only + * for types listed in RFC 4034, Section 6.2, except for NSEC (updated by + * RFC 6840, Section 5.1) and A6 (not supported). + * + * \warning This function expects either empty RDATA or full, not malformed + * RDATA. If malformed RRSet is passed to this function, memory errors + * may occur. + * + * \param data Rdata to convert. + * \param len Length of Rdata + * \param type Type of Rdata. + */ +int knot_rdata_to_canonical(uint8_t *data, uint16_t len, uint16_t type); /*! * \brief Size of rrset in wire format. * diff --git a/src/utils/Makefile.inc b/src/utils/Makefile.inc index db45ce0bd5..b82abe318c 100644 --- a/src/utils/Makefile.inc +++ b/src/utils/Makefile.inc @@ -6,8 +6,8 @@ noinst_LTLIBRARIES += libknotus.la libknotus_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(gnutls_CFLAGS) \ $(libedit_CFLAGS) $(libidn2_CFLAGS) $(libidn_CFLAGS) \ - $(libkqueue_CFLAGS) $(libnghttp2_CFLAGS) $(lmdb_CFLAGS) -libknotus_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) + $(libkqueue_CFLAGS) $(libnghttp2_CFLAGS) $(lmdb_CFLAGS) ${fuzzer_CFLAGS} +libknotus_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) ${fuzzer_CFLAGS} libknotus_la_LIBADD = $(libidn2_LIBS) $(libidn_LIBS) $(libnghttp2_LIBS) libknotus_LIBS = libknotus.la libknot.la libdnssec.la $(libcontrib_LIBS) \ $(gnutls_LIBS) $(libedit_LIBS)