]> git.ipfire.org Git - thirdparty/knot-dns.git/commitdiff
init
authorDaniel Salzman <daniel.salzman@nic.cz>
Mon, 6 Oct 2025 13:18:12 +0000 (15:18 +0200)
committerDaniel Salzman <daniel.salzman@nic.cz>
Mon, 6 Oct 2025 13:21:09 +0000 (15:21 +0200)
74 files changed:
configure.ac
src/contrib/Makefile.inc
src/contrib/dnstap/message.c
src/contrib/dnstap/message.h
src/contrib/memcheck.h [new file with mode: 0644]
src/contrib/time.h
src/contrib/ucw/lists.h
src/contrib/ucw/mempool.c
src/knot/Makefile.inc
src/knot/common/fdset.c
src/knot/common/fdset.h
src/knot/common/qps_limiter.c [new file with mode: 0644]
src/knot/common/qps_limiter.h [new file with mode: 0644]
src/knot/common/stats.c
src/knot/common/stats.h
src/knot/conf/base.c
src/knot/conf/base.h
src/knot/conf/conf.c
src/knot/conf/conf.h
src/knot/conf/module.c
src/knot/conf/module.h
src/knot/conf/schema.c
src/knot/conf/schema.h
src/knot/dnssec/rrset-sign.c
src/knot/dnssec/rrset-sign.h
src/knot/dnssec/zone-keys.h
src/knot/include/atomic.h [new file with mode: 0644]
src/knot/include/lqueue.h [new file with mode: 0644]
src/knot/include/lstack.h [new file with mode: 0644]
src/knot/include/modcounter.h [new file with mode: 0644]
src/knot/include/module.h
src/knot/modules/delay/Makefile.inc [new file with mode: 0644]
src/knot/modules/delay/delay.c [new file with mode: 0644]
src/knot/modules/delay/delay.rst [new file with mode: 0644]
src/knot/modules/dnstap/Makefile.inc
src/knot/modules/dnstap/dnstap.c
src/knot/modules/dnstap/dnstap.rst
src/knot/modules/dnstap/dnstapcounter.c [new file with mode: 0644]
src/knot/modules/dnstap/dnstapcounter.h [new file with mode: 0644]
src/knot/modules/probe/probe.c
src/knot/modules/queryacl/queryacl.c
src/knot/modules/stats/stats.c
src/knot/nameserver/internet.h
src/knot/nameserver/lqueue.c [new file with mode: 0644]
src/knot/nameserver/lstack.c [new file with mode: 0644]
src/knot/nameserver/nsec_proofs.c
src/knot/nameserver/process_query.c
src/knot/nameserver/process_query.h
src/knot/nameserver/query_module.c
src/knot/nameserver/query_module.h
src/knot/nameserver/query_state.h [new file with mode: 0644]
src/knot/query/layer.h
src/knot/server/dns-handler.c [new file with mode: 0644]
src/knot/server/dns-handler.h [new file with mode: 0644]
src/knot/server/network_req_manager.c [new file with mode: 0644]
src/knot/server/network_req_manager.h [new file with mode: 0644]
src/knot/server/server.c
src/knot/server/tcp-handler.c
src/knot/server/tcp-handler.h
src/knot/server/udp-handler.c
src/knot/server/udp-handler.h
src/knot/zone/node.c
src/knot/zone/node.h
src/knot/zone/zone.c
src/knot/zone/zone.h
src/libdnssec/key.h
src/libdnssec/key/simple.c
src/libdnssec/pem.c
src/libdnssec/pem.h
src/libknot/packet/pkt.c
src/libknot/packet/wire.h
src/libknot/rrset.c
src/libknot/rrset.h
src/utils/Makefile.inc

index afe3e276dc6b176129f5117c8a1ce9dce49cde62..be6e1162a933f997a79053a267a3c44b2740902f 100644 (file)
@@ -386,12 +386,15 @@ static_modules_declars=""
 static_modules_init=""
 doc_modules=""
 
+KNOT_MODULE([azuredb],     "yes")
 KNOT_MODULE([cookies],     "yes")
 KNOT_MODULE([dnsproxy],    "yes", "non-shareable")
 KNOT_MODULE([dnstap],      "no")
+KNOT_MODULE([delay],       "no")
 KNOT_MODULE([geoip],       "yes")
 KNOT_MODULE([noudp],       "yes")
 KNOT_MODULE([onlinesign],  "yes", "non-shareable")
+KNOT_MODULE([azurednssec],     "yes")
 KNOT_MODULE([probe],       "yes")
 KNOT_MODULE([queryacl],    "yes")
 KNOT_MODULE([rrl],         "yes")
@@ -699,12 +702,30 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <sched.h>]], [[cpuset_t* set = cpuset
 [AC_DEFINE(HAVE_CPUSET_NETBSD, 1, [Define if cpuset_t and cpuset(3) exists.])])
 
 # Check for '__atomic' compiler builtin atomic functions.
+AC_SEARCH_LIBS([__atomic_compare_exchange_8], [atomic], [], [])
 AC_LINK_IFELSE(
   [AC_LANG_PROGRAM([[#include <stdint.h>]],
-                   [[uint64_t val = 0; __atomic_add_fetch(&val, 1, __ATOMIC_RELAXED);]])],
-  [AC_DEFINE(HAVE_ATOMIC, 1, [Define to 1 if you have '__atomic' functions.])]
+                   [[uint64_t val = 0;
+                   __atomic_store_n(&val, 0UL, __ATOMIC_SEQ_CST);
+                   uint64_t expect = __atomic_add_fetch(&val, 1, __ATOMIC_RELAXED);
+                   __atomic_compare_exchange(&val, &expect, &expect, 0, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME);
+                   ]])],
+  [
+    AC_DEFINE(HAVE_ATOMIC, 1, [Define to 1 if you have '__atomic' functions.])
+    have_atomic=yes
+    have_atomic8_primitives=yes
+  ]
 )
 
+AC_CHECK_HEADER([stdalign.h], [AC_DEFINE(HAVE_STDALIGN, 1, [Define to 1 if you have stdalign.h])])
+AC_CHECK_HEADER([stdatomic.h], [
+  AC_DEFINE(HAVE_STDATOMIC, 1, [Define to 1 if you have stdatomic.h])
+  have_stdatomic=yes
+  have_atomic8_primitives=yes
+])
+
+AM_CONDITIONAL([HAVE_ATOMIC_PRIMITIVES], [test "x$have_atomic8_primitives" = "xyes"])
+
 # Check for '__sync' compiler builtin atomic functions.
 AC_LINK_IFELSE(
   [AC_LANG_PROGRAM([[#include <stdint.h>]],
@@ -745,6 +766,141 @@ AM_CONDITIONAL([HAVE_MAKEINFO], test "$MAKEINFO" != "false")
 filtered_cflags=$(echo -n "$CFLAGS" | sed 's/\W-f\S*-prefix-map=\S*\W/ /g')
 filtered_cppflags=$(echo -n "$CPPFLAGS" | sed 's/\W-f\S*-prefix-map=\S*\W/ /g')
 
+AC_ARG_WITH([memcheck],
+        AC_HELP_STRING([--with-memcheck=auto|yes|no|memcheck_include_folder], [Build with memcheck api. If valgrind/memcheck.h is not in path, define include path in CFLAGS or pass the valgrind/memcheck.h include folder for this parameter. [default=no]]),
+        [with_memcheck=$withval], [with_memcheck=no])
+
+AS_CASE([$with_memcheck],
+    [no], [],
+    [auto], [
+      AC_CHECK_HEADER([valgrind/memcheck.h], [with_memcheck=yes], [with_memcheck=no])
+    ],
+    [yes|*], [
+      AS_IF([test "x$with_memcheck" != "xyes"], [AC_SUBST(CPPFLAGS, "$CPPFLAGS -I$with_memcheck")])
+      AC_CHECK_HEADER([valgrind/memcheck.h], [], [AC_MSG_ERROR([--with-memcheck requries valgrind/memcheck.h header files. If valgrind/memcheck.h is not in path, define include path in CFLAGS or pass the valgrind/memcheck.h include folder for --with-memcheck parameter.])])
+      AC_DEFINE_UNQUOTED([KNOT_ENABLE_MEMCHECK],[1], [Memcheck enabled for this build (1)])
+      with_memcheck=yes
+    ])
+
+AC_ARG_WITH([numa],
+        AC_HELP_STRING([--with-numa=yes|no|numa_include_folder], [Build with numa api. If numa.h is not in path, define include path in CFLAGS or pass the numa.h include folder for this parameter. [default=no]]),
+        [with_numa=$withval], [with_numa=no])
+
+AS_CASE([$with_numa],
+    [no], [],
+    [yes|*], [
+      AC_SEARCH_LIBS([numa_node_of_cpu], [numa],
+      [
+        AC_DEFINE_UNQUOTED([KNOT_ENABLE_NUMA],[1], [NUMA enabled for this build (1)])
+        with_numa=yes
+      ],
+      [AC_MSG_ERROR([--with-numa requries numa.h header files. If numa.h is not in path, define include path in CFLAGS or pass the numa.h include folder for --with-numa parameter.])],
+      [])
+    ])
+
+AC_ARG_WITH([udp-max-size],
+        AC_HELP_STRING([--with-udp-max-size=size], [Size of UDP request/response max size supported. [default=65535]]),
+        [udp_max_size=$withval], [udp_max_size=65535])
+AC_DEFINE_UNQUOTED([KNOT_MAX_UDP_REQRESP_SIZE_BYTES],[$udp_max_size], [Value of udp max message size])
+
+AC_ARG_ENABLE([throttle-dnstap-logs],
+    AS_HELP_STRING([--enable-throttle-dnstap-logs=auto|yes|no], [enable throttling support for dnstap logs [default=auto]]),
+    [enable_throttle_dnstap_logs="$enableval"], [enable_throttle_dnstap_logs=auto])
+
+AS_CASE([$enable_throttle_dnstap_logs],
+   [auto],[
+     AS_IF([test "x$have_atomic8_primitives" = xyes], [
+       AC_DEFINE([ENABLE_THROTTLE_DNSTAP_LOGS], [1], [Define to 1 to enable support for throttling dnstap logs.])
+       enable_throttle_dnstap_logs=yes
+     ],
+     [enable_throttle_dnstap_logs=no])
+   ],
+   [yes],[
+     AS_IF([test "x$have_atomic8_primitives" = xyes], [
+       AC_DEFINE([ENABLE_THROTTLE_DNSTAP_LOGS], [1], [Define to 1 to enable support for throttling dnstap logs.])
+     ],
+     [AC_MSG_ERROR([Throttling support needs __atomic or stdatomic support])])
+   ],
+   [no],[],
+   [*], [AC_MSG_ERROR([Invalid value of --enable-throttle-dnstap-logs.]
+ )])
+
+ AC_ARG_ENABLE([async-query],
+    AS_HELP_STRING([--enable-async-query=yes|no], [Enable asynchronous query handling [default=no]]), [], [enable_async_query=no])
+
+AS_CASE([$enable_async_query],
+   [yes],[
+     AC_DEFINE([ENABLE_ASYNC_QUERY_HANDLING], [], [Enable support for handling queries asynchronously])
+     AS_IF([test "x$have_atomic8_primitives" != "xyes"],
+       [AC_MSG_ERROR([Async query support needs __atomic or stdatomic support])])
+   ],
+   [no],[],
+   [*], [AC_MSG_ERROR([Invalid value of --enable-async-query.]
+ )])
+
+AS_IF([test "x$STATIC_MODULE_delay" = "xyes" -o "x$SHARED_MODULE_delay" = "xyes"],[
+    AS_IF([test "x$enable_async_query" != "xyes"], [
+      AC_MSG_ERROR([delay module requires async query support in knot. Enable with --enable-async-query])
+    ])
+])
+
+AC_ARG_ENABLE([trailing-bytes],
+    AS_HELP_STRING([--enable-trailing-bytes=yes|no], [enable trailing bytes in DNS query [default=no]]),
+    [enable_trailing_bytes="$enableval"], [enable_trailing_bytes=no])
+
+AS_IF([test "$enable_trailing_bytes" = yes], [AC_DEFINE([ENABLE_TRAILING_BYTES], [1], [Define to 1 to enable support for queries with additional trailing 0 bytes.])])
+
+AC_ARG_WITH([unbound],
+        AC_HELP_STRING([--with-unbound=yes|no|unbound_folder], [Build with libunbound api. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for this parameter. [default=no]]),
+        [with_unbound=$withval], [with_unbound=no])
+
+AS_CASE([$with_unbound],
+    [no], [],
+    [yes|*], [
+      AS_IF([test "x$with_unbound" != "xyes"],
+      [
+        AC_SUBST(CPPFLAGS, "$CPPFLAGS -I$with_unbound/include")
+        AC_SUBST(LDFLAGS, "$LDFLAGS -L$with_unbound/lib")
+      ])
+      AC_CHECK_HEADER([unbound.h], [], [AC_MSG_ERROR([--with-unbound requries unbound.h header files. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for --with-unbound parameter.])])
+      AC_SEARCH_LIBS([ub_resolve_async], [unbound], [],
+        [
+          AC_MSG_ERROR([--with-unbound requries unbound.h header nad library files. If unbound.h is not in path, define include path in CFLAGS or pass the unbound.h include folder for --with-unbound parameter.])
+        ])
+      AC_DEFINE_UNQUOTED([KNOT_ENABLE_UNBOUND],[1], [unbound enabled for this build (1)])
+      with_unbound=yes
+    ])
+
+with_aliasrr=no
+AS_IF([test "x$enable_async_query" = "xyes"] && [test "x$SUPPORTS_MODULE_azuredb" = "xyes"] && [test "x$with_unbound" = "xyes"],
+  [
+    AM_CONDITIONAL([HAVE_ALIASRR], [true])
+    AC_DEFINE_UNQUOTED([KNOT_ENABLE_ALIASRR],[1], [AliasRR enabled for this build (1)])
+    with_aliasrr=yes
+  ],
+  AM_CONDITIONAL([HAVE_ALIASRR], [false]))
+
+# TrafficManager support
+AC_ARG_ENABLE([trafficmanager],
+    AS_HELP_STRING([--enable-trafficmanager=yes|no], [enable TrafficManager module [default=no]]),
+    [enable_trafficmanager="$enableval"], [enable_trafficmanager=no])
+
+libnethost_CFLAGS=
+libnethost_LIBS=
+trafficmanager_CFLAGS=
+AS_IF([test "$enable_daemon" = "no"],[enable_trafficmanager=no])
+AS_IF([test "$enable_trafficmanager" = yes], [
+  NETCOREAPP_VERSION=$(ls /usr/share/dotnet/packs/Microsoft.NETCore.App.Host.linux-x64 | tail -n 1)
+  libnethost_CFLAGS="-I/usr/share/dotnet/packs/Microsoft.NETCore.App.Host.linux-x64/$NETCOREAPP_VERSION/runtimes/linux-x64/native"
+  libnethost_LIBS=""
+  trafficmanager_CFLAGS="-I../microsoft/trafficmanager/PolicyProvider.Rescue.Native/include"
+  AC_SUBST([libnethost_CFLAGS])
+  AC_SUBST([libnethost_LIBS])
+  AC_SUBST([trafficmanager_CFLAGS])
+])
+AS_IF([test "$enable_trafficmanager" = yes], [AC_DEFINE([USE_TRAFFICMANAGER], [1], [Define to 1 to enable TrafficManager module.])])
+AM_CONDITIONAL([USE_TRAFFICMANAGER], [test "$enable_trafficmanager" = yes])
+
 result_msg_base="  Knot DNS $VERSION
 
     Target:   $host_os $host_cpu $endianity
@@ -762,6 +918,8 @@ result_msg_base="  Knot DNS $VERSION
     Storage dir: ${storage_dir}
     Config dir:  ${config_dir}
     Module dir:  ${module_dir}
+    With valgrind memcheck: ${with_memcheck}
+    UDP Max Size: ${udp_max_size}
 
     Static modules: ${static_modules}
     Shared modules: ${shared_modules}
@@ -774,6 +932,11 @@ result_msg_base="  Knot DNS $VERSION
     Use recvmmsg:           ${enable_recvmmsg}
     Use SO_REUSEPORT(_LB):  ${enable_reuseport}
     XDP support:            ${enable_xdp}
+    Async Query Handling:   ${enable_async_query}
+    AzureDB Module:         ${SUPPORTS_MODULE_azuredb}
+    AliasRR Support:        ${with_aliasrr}
+    Unbound library support ${with_unbound}
+    NUMA Support            ${with_numa}
     Socket polling:         ${socket_polling}
     Memory allocator:       ${with_memory_allocator}
     Fast zone parser:       ${enable_fastparser}
@@ -787,6 +950,9 @@ result_msg_base="  Knot DNS $VERSION
     Ed25519 support:        ${enable_ed25519}
     Ed448 support:          ${enable_ed448}
     Reproducible signing:   ${enable_repro_signing}
+    Throttle sup for dnstap ${enable_throttle_dnstap_logs}
+    Allow Trail Query bytes ${enable_trailing_bytes}
+    Traffic Manager         ${enable_trafficmanager}
     Code coverage:          ${enable_code_coverage}
     Sanitizer:              ${with_sanitizer}
     LibFuzzer:              ${with_fuzzer}
index ee987071d893ccda78f9d7fe2869159269a1e2e4..f71d136c23e1a25d43475e160cdf6b7fe5b57798 100644 (file)
@@ -1,7 +1,7 @@
 noinst_LTLIBRARIES += libcontrib.la
 
-libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY)
-libcontrib_la_LDFLAGS  = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) ${fuzzer_CFLAGS}
+libcontrib_la_LDFLAGS  = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) ${fuzzer_CFLAGS}
 libcontrib_la_LIBADD   = $(pthread_LIBS)
 libcontrib_LIBS        = libcontrib.la
 if USE_GNUTLS_MEMSET
@@ -56,6 +56,7 @@ libcontrib_la_SOURCES = \
        contrib/tolower.h                       \
        contrib/trim.h                          \
        contrib/wire_ctx.h                      \
+       contrib/memcheck.h                      \
        contrib/openbsd/siphash.c               \
        contrib/openbsd/siphash.h               \
        contrib/openbsd/strlcat.c               \
@@ -133,7 +134,7 @@ libdnstap_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(DNSTAP_CFLAGS)
 libdnstap_la_LDFLAGS  = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
 libdnstap_LIBS        = libdnstap.la $(DNSTAP_LIBS)
 
-SUFFIXES = .proto .pb-c.c .pb-c.h
+SUFFIXES = .proto .pb-c.c .pb-c.h .c .pp
 
 .proto.pb-c.c:
        $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $<
@@ -141,6 +142,10 @@ SUFFIXES = .proto .pb-c.c .pb-c.h
 .proto.pb-c.h:
        $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $<
 
+# Allows the creation of preprocessed file using "make <file-name-without-c>.pp"
+.c.pp:
+       $(COMPILE) -o $@ -E $<
+
 libdnstap_la_SOURCES = \
        contrib/dnstap/convert.c        \
        contrib/dnstap/convert.h        \
index a5f798e2b7df56cc28bc758d745469b839b5bb7c..a9057609b5e2ed0b5550042fb8a7040da6591d7f 100644 (file)
@@ -73,7 +73,10 @@ int dt_message_fill(Dnstap__Message             *m,
                     const int                   protocol,
                     const void                  *wire,
                     const size_t                len_wire,
-                    const struct timespec       *mtime)
+                    const struct timespec       *mtime,
+                                       const void                  *wire2,
+                                       const size_t                len_wire2,
+                    const struct timespec       *mtime2)
 {
        if (m == NULL) {
                return KNOT_EINVAL;
@@ -124,6 +127,20 @@ int dt_message_fill(Dnstap__Message             *m,
                        m->has_response_time_sec = 1;
                        m->has_response_time_nsec = 1;
                }
+
+               if (len_wire2 > 0) {
+                       // Message.query_message
+                       m->query_message.len = len_wire2;
+                       m->query_message.data = (uint8_t *)wire2;
+                       m->has_query_message = 1;
+                       // Message.query_time_sec, Message.query_time_nsec
+                       if (mtime != NULL) {
+                               m->query_time_sec = mtime2->tv_sec;
+                               m->query_time_nsec = mtime2->tv_nsec;
+                               m->has_query_time_sec = 1;
+                               m->has_query_time_nsec = 1;
+                       }
+               }
        }
 
        return KNOT_EOK;
index b9e3aff66b4dade78cbd712a3a088fc0964bef48..425e01b8f2609698b1cb140b0e7665b0e7bd46db 100644 (file)
  *     Length in bytes of 'wire'.
  * \param mtime
  *     Message time. May be NULL.
+ * \param wire2
+ * Wire-format request message received (used only when logging query and response in same message)
+ * \param len_wire2
+ * Length in bytes of 'wire2'.
+ * \param mtime2
+ * Request message time.
  *
  * \retval KNOT_EOK
  * \retval KNOT_EINVAL
@@ -60,4 +66,7 @@ int dt_message_fill(Dnstap__Message             *m,
                     const int                   protocol,
                     const void                  *wire,
                     const size_t                len_wire,
-                    const struct timespec       *mtime);
+                    const struct timespec       *mtime,
+                    const void                  *wire2,
+                    const size_t                len_wire2,
+                    const struct timespec       *mtime2);
diff --git a/src/contrib/memcheck.h b/src/contrib/memcheck.h
new file mode 100644 (file)
index 0000000..96d326a
--- /dev/null
@@ -0,0 +1,10 @@
+#pragma once
+#ifdef KNOT_ENABLE_MEMCHECK
+#include <valgrind/memcheck.h>
+#include <valgrind/valgrind.h>
+#else
+#define RUNNING_ON_VALGRIND 0
+#define VALGRIND_MAKE_MEM_NOACCESS(...)
+#define VALGRIND_MAKE_MEM_UNDEFINED(...)
+#define VALGRIND_MAKE_MEM_DEFINED(...)
+#endif
index 019b8c3fbac7f58ead81719bec981a82973d11ba..22c1a13f417c97588df33874624d442d2d408a64 100644 (file)
 #include <time.h>
 #include <inttypes.h>
 
+#ifndef CLOCK_REALTIME_COARSE
+#define CLOCK_REALTIME_COARSE CLOCK_REALTIME
+#endif
+
 #ifdef __APPLE__
  #define st_mtim st_mtimespec
 #endif
index ee06e7d15efc4b5fb413e03d2a46c0f5b6ac9e13..03149e5aaccde150a0c764180d572d29fcb8a694 100644 (file)
@@ -25,6 +25,8 @@ typedef struct list {
 #define TAIL(list) ((void *)((list).tail.prev))
 #define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \
                                n=(void *)((NODE (n))->next))
+#define WALK_LIST_RESUME(n,list) for((n)= (void *)((NODE (n)) ? (NODE(n))->next : HEAD(list)); ((NODE (n)) && (NODE (n))->next) || ((n) = NULL); \
+                               (n)=(void *)((NODE (n))->next))
 #define WALK_LIST_DELSAFE(n,nxt,list) \
      for(n=HEAD(list); (nxt=(void *)((NODE (n))->next)); n=(void *) nxt)
 /* WALK_LIST_FIRST supposes that called code removes each processed node */
index 8e835c117f611869eaa0765409295b8087300b2d..d93eca056a39bc00744a111bbfc4e5c6de479d64 100644 (file)
@@ -18,6 +18,7 @@
 #include <assert.h>
 #include "contrib/asan.h"
 #include "contrib/macros.h"
+#include "contrib/memcheck.h"
 #include "contrib/ucw/mempool.h"
 
 /** \todo This shouldn't be precalculated, but computed on load. */
@@ -198,6 +199,8 @@ mp_flush(struct mempool *pool)
                if ((uint8_t *)chunk - chunk->size == (uint8_t *)pool) {
                        break;
                }
+               /* Inform valgrind all data in this chunk are no longer initialized to ensure next allocator initializes before reading */
+               VALGRIND_MAKE_MEM_UNDEFINED((uint8_t *)chunk - chunk->size, chunk->size);
                next = chunk->next;
                chunk->next = pool->unused;
                ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
@@ -207,6 +210,8 @@ mp_flush(struct mempool *pool)
        pool->state.last[0] = chunk;
        if (chunk) {
                pool->state.free[0] = chunk->size - sizeof(*pool);
+               /* Inform valgrind all data in this pool are no longer initialized to ensure next allocator initializes before reading */
+               VALGRIND_MAKE_MEM_UNDEFINED((uint8_t *)chunk - chunk->size + sizeof(*pool), chunk->size - sizeof(*pool));
                ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
        } else {
                pool->state.free[0] = 0;
@@ -297,6 +302,7 @@ mp_alloc(struct mempool *pool, unsigned size)
                ptr = mp_alloc_internal(pool, size);
        }
        ASAN_UNPOISON_MEMORY_REGION(ptr, size);
+       VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
        return ptr;
 }
 
@@ -311,6 +317,7 @@ mp_alloc_noalign(struct mempool *pool, unsigned size)
                ptr = mp_alloc_internal(pool, size);
        }
        ASAN_UNPOISON_MEMORY_REGION(ptr, size);
+       VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
        return ptr;
 }
 
index 335acacc899eb5699fa4c9f69f3dd8b524533898..39a75e90a9c6dd9bfb419c73679e9ddf8f8792b2 100644 (file)
@@ -1,7 +1,7 @@
 libknotd_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(libkqueue_CFLAGS) \
                        $(liburcu_CFLAGS) $(lmdb_CFLAGS) $(systemd_CFLAGS) \
-                       -DKNOTD_MOD_STATIC
-libknotd_la_LDFLAGS  = $(AM_LDFLAGS) -export-symbols-regex '^knotd_'
+                       -DKNOTD_MOD_STATIC ${fuzzer_CFLAGS}
+libknotd_la_LDFLAGS  = $(AM_LDFLAGS) -export-symbols-regex '^knotd_' ${fuzzer_CFLAGS}
 libknotd_la_LIBADD   = $(dlopen_LIBS) $(libkqueue_LIBS) $(pthread_LIBS)
 libknotd_LIBS        = libknotd.la libknot.la libdnssec.la libzscanner.la \
                        $(libcontrib_LIBS) $(liburcu_LIBS) $(lmdb_LIBS) \
@@ -103,6 +103,7 @@ libknotd_la_SOURCES = \
        knot/nameserver/notify.h                \
        knot/nameserver/nsec_proofs.c           \
        knot/nameserver/nsec_proofs.h           \
+       knot/nameserver/query_state.h           \
        knot/nameserver/process_query.c         \
        knot/nameserver/process_query.h         \
        knot/nameserver/query_module.c          \
@@ -148,6 +149,10 @@ libknotd_la_SOURCES = \
        knot/journal/serialization.h            \
        knot/server/server.c                    \
        knot/server/server.h                    \
+       knot/server/dns-handler.c               \
+       knot/server/dns-handler.h               \
+       knot/server/network_req_manager.c               \
+       knot/server/network_req_manager.h               \
        knot/server/tcp-handler.c               \
        knot/server/tcp-handler.h               \
        knot/server/udp-handler.c               \
@@ -207,6 +212,16 @@ libknotd_la_SOURCES = \
        knot/zone/zonefile.c                    \
        knot/zone/zonefile.h
 
+if HAVE_ATOMIC_PRIMITIVES
+libknotd_la_SOURCES += \
+       knot/common/qps_limiter.c   \
+       knot/common/qps_limiter.h   \
+       knot/nameserver/lstack.c    \
+       knot/include/lstack.h       \
+       knot/nameserver/lqueue.c    \
+       knot/include/lqueue.h
+endif HAVE_ATOMIC_PRIMITIVES
+
 if HAVE_DAEMON
 noinst_LTLIBRARIES += libknotd.la
 pkgconfig_DATA     += knotd.pc
@@ -218,6 +233,7 @@ KNOTD_MOD_LDFLAGS  = $(AM_LDFLAGS) -module -shared -avoid-version
 pkglibdir = $(module_instdir)
 pkglib_LTLIBRARIES =
 
+# include $(srcdir)/knot/modules/azuredb/Makefile.inc
 include $(srcdir)/knot/modules/cookies/Makefile.inc
 include $(srcdir)/knot/modules/dnsproxy/Makefile.inc
 include $(srcdir)/knot/modules/dnstap/Makefile.inc
@@ -230,3 +246,5 @@ include $(srcdir)/knot/modules/rrl/Makefile.inc
 include $(srcdir)/knot/modules/stats/Makefile.inc
 include $(srcdir)/knot/modules/synthrecord/Makefile.inc
 include $(srcdir)/knot/modules/whoami/Makefile.inc
+include $(srcdir)/knot/modules/delay/Makefile.inc
+# include $(srcdir)/knot/modules/azurednssec/Makefile.inc
index 3514edc524f215b3ae12553f5ddc5c86f7043a80..475a4aa47cc0ee061495312073fae3a4b349e6b1 100644 (file)
@@ -128,6 +128,37 @@ int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx)
        return idx;
 }
 
+int fdset_set_ctx(fdset_t *set, unsigned i, void *ctx)
+{
+       if (i < set->n) {
+               set->ctx[i] = ctx;
+               return i;
+       } else {
+               return -1;
+       }
+}
+
+void* fdset_get_ctx(fdset_t *set, unsigned i)
+{
+       if (i < set->n) {
+               return set->ctx[i];
+       } else {
+               return NULL;
+       }
+}
+
+int fdset_set_ctx_on_fd(fdset_t *set, int fd, void *ctx)
+{
+       for (unsigned i = 0; i < set->n; i++) {
+               if (fdset_get_fd(set, i) == fd) {
+                       set->ctx[i] = ctx;
+                       return i;
+               }
+       }
+
+       return -1;
+}
+
 int fdset_remove(fdset_t *set, const unsigned idx)
 {
        if (set == NULL || idx >= set->n) {
@@ -326,7 +357,7 @@ void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data)
                /* Check sweep state, remove if requested. */
                if (set->timeout[idx] > 0 && set->timeout[idx] <= now.tv_sec) {
                        const int fd = fdset_get_fd(set, idx);
-                       if (cb(set, fd, data) == FDSET_SWEEP) {
+                       if (cb(set, fd, set->ctx[idx], data) == FDSET_SWEEP) {
                                (void)fdset_remove(set, idx);
                                continue;
                        }
index 95a5c61e4975e22e296e8018b18de34130b3b955..2a3b662a2333a8268d9210874f045a1007441cdd 100644 (file)
@@ -96,7 +96,7 @@ typedef enum {
 } fdset_sweep_state_t;
 
 /*! \brief Sweep callback (set, index, data) */
-typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *);
+typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *ctx, void *arg);
 
 /*!
  * \brief Initialize fdset to given size.
@@ -128,6 +128,39 @@ void fdset_clear(fdset_t *set);
  */
 int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx);
 
+/*!
+ * \brief Set the context on the descriptor at i.
+ *
+ * \param set Target set.
+ * \param i Index to set ctx at.
+ *
+ * \retval index of the added fd if successful.
+ * \retval -1 on errors.
+ */
+int fdset_set_ctx(fdset_t *set, unsigned i, void *ctx);
+
+/*!
+ * \brief Get the context on the descriptor at i.
+ *
+ * \param set Target set.
+ * \param i Index to get ctx at.
+ *
+ * \retval context value.
+ */
+void* fdset_get_ctx(fdset_t *set, unsigned i);
+
+/*!
+ * \brief Clear the context for the given fd
+ *
+ * \param set Target set.
+ * \param fd Handle whose context needs to be cancelled.
+ * \param ctx Context (optional).
+ *
+ * \retval index of the fd if successful.
+ * \retval -1 on errors.
+ */
+int fdset_set_ctx_on_fd(fdset_t *set, int fd, void *ctx);
+
 /*!
  * \brief Remove and close file descriptor from watched set.
  *
@@ -201,6 +234,27 @@ inline static int fdset_get_fd(const fdset_t *set, const unsigned idx)
 #endif
 }
 
+/*!
+ * \brief Returns index for file descriptor.
+ *
+ * \param set  Target set.
+ * \param .
+ *
+ * \retval Index of the file descriptor. ret >= 0 for file descriptor found.
+ * \retval ret < 0 on errors.
+ */
+inline static int fdset_get_index_for_fd(const fdset_t *set, int fd)
+{
+       assert(set);
+       for (unsigned i = 0; i < set->n; i++) {
+               if (fdset_get_fd(set, i) == fd) {
+                       return i;
+               }
+       }
+
+       return -1;
+}
+
 /*!
  * \brief Returns number of file descriptors stored in set.
  *
@@ -277,6 +331,20 @@ inline static void fdset_it_next(fdset_it_t *it)
 #endif
 }
 
+/*!
+ * \brief Get context from the iterator.
+ *
+ * \param it  Target iterator.
+ *
+ * \return Context at the iterator.
+ */
+inline static void* fdset_it_get_ctx(fdset_it_t *it)
+{
+       assert(it);
+       unsigned idx = fdset_it_get_idx(it);
+       return fdset_get_ctx(it->set, idx);
+}
+
 /*!
  * \brief Remove file descriptor referenced by iterator from watched set.
  *
@@ -304,9 +372,9 @@ inline static void fdset_it_remove(fdset_it_t *it)
        /*       EVFILT_WRITE (1) -> -2                    */
        /* If not marked for delete then mark for delete.  */
 #if defined(__NetBSD__)
-       if ((signed short)it->set->ev[idx].filter >= 0) 
+       if ((signed short)it->set->ev[idx].filter >= 0)
 #else
-       if (it->set->ev[idx].filter < 0) 
+       if (it->set->ev[idx].filter < 0)
 #endif
        {
                it->set->ev[idx].filter = ~it->set->ev[idx].filter;
diff --git a/src/knot/common/qps_limiter.c b/src/knot/common/qps_limiter.c
new file mode 100644 (file)
index 0000000..b84ff5b
--- /dev/null
@@ -0,0 +1,83 @@
+#include "qps_limiter.h"
+#include "contrib/time.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
+int qps_limiter_init(qps_limiter_t *limiter)
+{
+       if (posix_memalign( (void**)&limiter->qps_limit, 16, sizeof(qps_limit_t)) != 0) {
+               return ENOMEM;
+       }
+
+       memset((void*)limiter->qps_limit, 0, sizeof(qps_limit_t));
+
+       struct timespec tv;
+       if (clock_gettime(CLOCK_REALTIME_COARSE, &tv)) {
+               return errno;
+       }
+
+       limiter->start_time = tv.tv_sec;
+
+       qps_limit_t limit = {0};
+
+       KNOT_ATOMIC_INIT(limiter->qps_limit[0], limit);
+
+       return 0;
+}
+
+void qps_limiter_cleanup(qps_limiter_t *limiter)
+{
+       free((void*)limiter->qps_limit);
+       limiter->qps_limit = NULL;
+}
+
+bool qps_limiter_is_allowed(qps_limiter_t *limiter, time_t time, bool is_err)
+{
+       qps_limit_t expect;
+       qps_limit_t new;
+
+       KNOT_ATOMIC_GET(limiter->qps_limit, expect);
+
+        /* time_t could be 64bit. Using 64bit time will cause 128bit atomic operations which is not optimal.
+           We can keep time as from start of program to ensure 32bit gives us 68 years of run without restart,
+               and still be 64bit atomic operations. */
+       uint32_t rel_time = time - limiter->start_time;
+
+       do
+       {
+               int diff = rel_time - expect.time;
+               if (diff <= 0) {
+                       /* time is old, need throttling */
+                       new.time = expect.time;
+                       if ( (expect.query_cnt < limiter->log_qps)
+                                || (is_err && expect.query_cnt < limiter->log_err_qps) ) {
+                               new.query_cnt = expect.query_cnt + 1;
+                       } else {
+                               return false;
+                       }
+               } else  {
+                       /* time is new, reset limit */
+                       new.time = rel_time;
+                       long time_adj = (long)diff * limiter->log_qps;
+                       if (expect.query_cnt < time_adj) {
+                               /* enough time passed to reset the time */
+                               new.query_cnt = 1;
+                       } else {
+                               /* already used more time previously */
+                               int spill = expect.query_cnt - time_adj;
+                               if ( (spill < limiter->log_qps)
+                                        || (is_err && spill < limiter->log_err_qps) ) {
+                                       new.query_cnt = spill + 1;
+                               } else {
+                                       return false;
+                               }
+                       }
+               }
+       }
+       while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(limiter->qps_limit, expect, new));
+
+       return true;
+}
+
+#pragma GCC diagnostic pop
\ No newline at end of file
diff --git a/src/knot/common/qps_limiter.h b/src/knot/common/qps_limiter.h
new file mode 100644 (file)
index 0000000..4af7ac3
--- /dev/null
@@ -0,0 +1,42 @@
+#pragma once
+#include "knot/include/atomic.h"
+
+#pragma pack(push, 1)
+typedef struct qps_limit {
+       KNOT_ALIGN(8) uint32_t time;
+       uint32_t query_cnt;
+} qps_limit_t;
+#pragma pack(pop)
+
+typedef struct qps_limiter {
+       time_t start_time;
+       int log_qps;
+       int log_err_qps;
+       KNOT_ATOMIC     qps_limit_t *qps_limit;
+} qps_limiter_t;
+
+/*!
+ * \brief Initialize QPS Limiter object.
+ *
+ * \param limiter QPS Limiter object.
+ *
+ * \retval 0 if sucessfully initialized.
+ * \retval Other values to indicate error.
+ */
+int qps_limiter_init(qps_limiter_t *limiter);
+
+/*!
+ * \brief Cleans up QPS Limiter object.
+ *
+ * \param limiter QPS Limiter object.
+ */
+void qps_limiter_cleanup(qps_limiter_t *limiter);
+
+/*!
+ * \brief Checks whether the query is allowed and meets qps limits.
+ *
+ * \param limiter QPS Limiter object.
+ * \param time Time second value from real time clock.
+ * \param is_err Is the log required for error case.
+ */
+bool qps_limiter_is_allowed(qps_limiter_t *limiter, time_t time, bool is_err);
index 48c51e2c9a5bfb22d63982f7f99016d7fd001fa2..b9c925594cca957f3d870e70713b5e8a179db8f7 100644 (file)
@@ -25,6 +25,8 @@
 #include "knot/common/log.h"
 #include "knot/nameserver/query_module.h"
 
+#define ARRAY_SIZE(array)       (sizeof(array) / sizeof((array)[0]))
+
 struct {
        bool active_dumper;
        pthread_t dumper;
@@ -51,6 +53,26 @@ uint64_t server_zone_count(server_t *server)
        return knot_zonedb_size(server->zone_db);
 }
 
+const char *server_stat_names[] = {
+       "udp_received",
+    "udp_async_done",
+    "udp_no_req_obj",
+       "udp_req_batch_limited",
+       "tcp_accept",
+    "tcp_received",
+    "tcp_async_done",
+    "tcp_no_req_obj",
+       "tcp_multiple_req",
+};
+
+static uint64_t server_stat_counters[ARRAY_SIZE(server_stat_names)];
+
+void server_stats_increment_counter(server_stats_counter_t counter, uint64_t value)
+{
+       assert(counter < server_stats_max);
+       ATOMIC_ADD(server_stat_counters[counter], value);
+}
+
 const stats_item_t server_stats[] = {
        { "zone-count", server_zone_count },
        { 0 }
@@ -181,6 +203,10 @@ static void dump_to_file(FILE *fd, server_t *server)
                DUMP_CTR(fd, 1, "%s", item->name, item->val(server));
        }
 
+       for(int i = 0; i < ARRAY_SIZE(server_stat_names); i++) {
+               DUMP_CTR(fd, 1, "%s", server_stat_names[i], ATOMIC_GET(server_stat_counters[i]));
+       }
+
        dump_ctx_t ctx = {
                .fd = fd,
                .query_modules = conf()->query_modules,
@@ -267,6 +293,7 @@ static void *dumper(void *data)
 
 void stats_reconfigure(conf_t *conf, server_t *server)
 {
+       assert(server_stats_max == ARRAY_SIZE(server_stat_names)); // Ensure enum and names are setup consistently.
        if (conf == NULL || server == NULL) {
                return;
        }
index bd6df6d2267e77f150d58652d4407ca2479cd08a..f10eab2c31e2c937b5c993c1fe61e2a2c737aea0 100644 (file)
@@ -37,6 +37,27 @@ typedef struct {
  */
 extern const stats_item_t server_stats[];
 
+/*!
+ * \brief Statistics metrics item.
+ */
+typedef enum {
+    server_stats_udp_received,
+    server_stats_udp_async_done,
+    server_stats_udp_no_req_obj,
+    server_stats_udp_req_batch_limited,
+    server_stats_tcp_accept,
+    server_stats_tcp_received,
+    server_stats_tcp_async_done,
+    server_stats_tcp_no_req_obj,
+    server_stats_tcp_multiple_req,
+    server_stats_max,
+} server_stats_counter_t;
+
+/*!
+ * \brief Increment the server stats.
+ */
+void server_stats_increment_counter(server_stats_counter_t counter, uint64_t value);
+
 /*!
  * \brief Read out value of single counter summed across threads.
  */
index 967317c89279476b4719389cc1aa222098689a26..26a6218869fbbe50be099a76d3015e459af30957 100644 (file)
@@ -131,6 +131,12 @@ static void init_cache(
        static size_t running_tcp_threads;
        static size_t running_xdp_threads;
        static size_t running_bg_threads;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       static bool numa_enabled = false;
+       static size_t udp_async_reqs;
+       static size_t tcp_async_reqs;
+       static size_t xdp_async_reqs;
+#endif
 
        if (first_init || reinit_cache) {
                running_tcp_reuseport = conf_get_bool(conf, C_SRV, C_TCP_REUSEPORT);
@@ -142,6 +148,12 @@ static void init_cache(
                running_xdp_threads = conf_xdp_threads(conf);
                running_bg_threads = conf_bg_threads(conf);
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               numa_enabled = conf_is_numa_enabled(conf);
+               udp_async_reqs = conf_udp_async_req(conf);
+               tcp_async_reqs = conf_tcp_async_req(conf);
+               xdp_async_reqs = conf_xdp_async_req(conf);
+#endif
                first_init = false;
        }
 
@@ -181,6 +193,13 @@ static void init_cache(
 
        conf->cache.srv_bg_threads = running_bg_threads;
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       conf->cache.numa_enabled = numa_enabled;
+       conf->cache.udp_srv_async_reqs = udp_async_reqs;
+       conf->cache.tcp_srv_async_reqs = tcp_async_reqs;
+       conf->cache.xdp_srv_async_reqs = xdp_async_reqs;
+#endif
+
        conf->cache.srv_tcp_max_clients = conf_tcp_max_clients(conf);
 
        val = conf_get(conf, C_XDP, C_TCP_MAX_CLIENTS);
@@ -210,6 +229,9 @@ static void init_cache(
 
        val = conf_get(conf, C_SRV, C_ANS_ROTATION);
        conf->cache.srv_ans_rotate = conf_bool(&val);
+
+       val = conf_get(conf, C_SRV, C_DISABLE_ANY);
+       conf->cache.srv_disable_any = conf_bool(&val);
 }
 
 int conf_new(
index 62b6ee22b893e919f56acd2638434c67c4775f1c..81b3a497d26add37249fb212e646f8e673ed8768 100644 (file)
@@ -137,10 +137,17 @@ typedef struct {
                uint32_t xdp_tcp_idle_reset;
                bool xdp_tcp;
                bool xdp_route_check;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               bool numa_enabled;
+               size_t udp_srv_async_reqs;
+               size_t tcp_srv_async_reqs;
+               size_t xdp_srv_async_reqs;
+#endif
                int ctl_timeout;
                conf_val_t srv_nsid;
                bool srv_ecs;
                bool srv_ans_rotate;
+               bool srv_disable_any;
        } cache;
 
        /*! List of dynamically loaded modules. */
index 5c0f6c63aa59d71786c1d991566f7ba185125a09..2c5560af9bd0478f741ebf3e14760c0b18744c14 100644 (file)
@@ -1345,6 +1345,65 @@ conf_remote_t conf_remote_txn(
        return out;
 }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#define UDP_ASYNC_DEFAULT_REQ 1024
+#define TCP_ASYNC_DEFAULT_REQ 128
+#define XDP_ASYNC_DEFAULT_REQ 0
+
+bool conf_is_numa_enabled_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn)
+{
+       conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_ENABLE_NUMA);
+       return conf_bool(&val);
+}
+
+size_t conf_udp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn)
+{
+       conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_UDP_ASYNC_REQS);
+       int64_t reqs = conf_int(&val);
+       if (reqs == YP_NIL) {
+               return UDP_ASYNC_DEFAULT_REQ;
+       }
+
+       return reqs;
+}
+
+size_t conf_tcp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn)
+{
+       conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_TCP_ASYNC_REQS);
+       int64_t reqs = conf_int(&val);
+       if (reqs == YP_NIL) {
+               return TCP_ASYNC_DEFAULT_REQ;
+       }
+
+       return reqs;
+}
+
+size_t conf_xdp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn)
+{
+       conf_val_t lisxdp_val = conf_get(conf, C_SRV, C_LISTEN_XDP);
+       if (lisxdp_val.code == KNOT_EOK) {
+               conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_XDP_ASYNC_REQS);
+               int64_t reqs = conf_int(&val);
+               if (reqs == YP_NIL) {
+                       return XDP_ASYNC_DEFAULT_REQ;
+               }
+
+               return reqs;
+       } else {
+               return 0;
+       }
+}
+#endif
+
+
 int conf_xdp_iface(
        struct sockaddr_storage *addr,
        conf_xdp_iface_t *iface)
index 68463a10c6a1343fa7e7699706bd13643d078849..471ab6328febaa95a57f395a0e1afc86e0ed808f 100644 (file)
@@ -829,6 +829,82 @@ conf_remote_t conf_remote_txn(
        conf_val_t *id,
        size_t index
 );
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * Gets the value of numa enabled.
+ *
+ * \param[in] conf  Configuration.
+ * \param[in] txn   Configuration DB transaction.
+ *
+ * \return true if numa is enabed.
+ */
+bool conf_is_numa_enabled_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn
+);
+
+static inline bool conf_is_numa_enabled(
+       conf_t *conf)
+{
+       return conf_is_numa_enabled_txn(conf, &conf->read_txn);
+}
+
+/*!
+ * Gets the configured number of UDP async requests.
+ *
+ * \param[in] conf  Configuration.
+ * \param[in] txn   Configuration DB transaction.
+ *
+ * \return Number of threads.
+ */
+size_t conf_udp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn
+);
+static inline size_t conf_udp_async_req(
+       conf_t *conf)
+{
+       return conf_udp_async_req_txn(conf, &conf->read_txn);
+}
+
+/*!
+ * Gets the configured number of TCP async requests.
+ *
+ * \param[in] conf  Configuration.
+ * \param[in] txn   Configuration DB transaction.
+ *
+ * \return Number of threads.
+ */
+size_t conf_tcp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn
+);
+static inline size_t conf_tcp_async_req(
+       conf_t *conf)
+{
+       return conf_tcp_async_req_txn(conf, &conf->read_txn);
+}
+
+/*!
+ * Gets the configured number of XDP async requests.
+ *
+ * \param[in] conf  Configuration.
+ * \param[in] txn   Configuration DB transaction.
+ *
+ * \return Number of threads.
+ */
+size_t conf_xdp_async_req_txn(
+       conf_t *conf,
+       knot_db_txn_t *txn
+);
+static inline size_t conf_xdp_async_req(
+       conf_t *conf)
+{
+       return conf_xdp_async_req_txn(conf, &conf->read_txn);
+}
+#endif
+
 static inline conf_remote_t conf_remote(
        conf_t *conf,
        conf_val_t *id,
index f1c1f248f98d6e0725300e3117fbaec9a2965b40..56446ab054ab9aa69de528e0167d85557ba6aee1 100644 (file)
@@ -343,12 +343,13 @@ void conf_mod_unload_shared(
        else \
                log_##level(LOG_ARGS(mod_id, msg), ##__VA_ARGS__);
 
-void conf_activate_modules(
+void conf_activate_given_module_conf(
        conf_t *conf,
        struct server *server,
        const knot_dname_t *zone_name,
        list_t *query_modules,
-       struct query_plan **query_plan)
+       struct query_plan **query_plan,
+       conf_val_t val)
 {
        int ret = KNOT_EOK;
 
@@ -357,15 +358,6 @@ void conf_activate_modules(
                goto activate_error;
        }
 
-       conf_val_t val;
-
-       // Get list of associated modules.
-       if (zone_name != NULL) {
-               val = conf_zone_get(conf, C_MODULE, zone_name);
-       } else {
-               val = conf_default_get(conf, C_GLOBAL_MODULE);
-       }
-
        switch (val.code) {
        case KNOT_EOK:
                break;
@@ -439,11 +431,42 @@ activate_error:
        CONF_LOG(LOG_ERR, "failed to activate modules (%s)", knot_strerror(ret));
 }
 
+void conf_activate_modules(
+       conf_t *conf,
+       struct server *server,
+       const knot_dname_t *zone_name,
+       list_t *query_modules,
+       struct query_plan **query_plan)
+{
+
+       int ret = KNOT_EOK;
+
+       if (conf == NULL || query_modules == NULL || query_plan == NULL) {
+               ret = KNOT_EINVAL;
+               goto activate_error;
+       }
+
+       conf_val_t val;
+
+       // Get list of associated modules.
+       if (zone_name != NULL) {
+               val = conf_zone_get(conf, C_MODULE, zone_name);
+       } else {
+               val = conf_default_get(conf, C_GLOBAL_MODULE);
+       }
+
+       conf_activate_given_module_conf(conf, server, zone_name, query_modules, query_plan, val);
+
+       return;
+activate_error:
+       CONF_LOG(LOG_ERR, "failed to activate modules (%s)", knot_strerror(ret));
+}
+
 void conf_deactivate_modules(
        list_t *query_modules,
        struct query_plan **query_plan)
 {
-       if (query_modules == NULL || query_plan == NULL) {
+       if (query_modules == NULL || query_plan == NULL || *query_plan == NULL) {
                return;
        }
 
index 9a6accbb592e4e11516eb0cf51f4c0131b379514..4c62be8c843ef5282c8f9ea44e1f55cf7694602d 100644 (file)
@@ -101,6 +101,24 @@ void conf_activate_modules(
        struct query_plan **query_plan
 );
 
+/*!
+ * Activates configured query modules for the specified zone or for all zones.
+ *
+ * \param[in] conf           Configuration.
+ * \param[in] zone_name      Zone name, NULL for all zones.
+ * \param[in] query_modules  Destination query modules list.
+ * \param[in] query_plan     Destination query plan.
+ * \param[in] val            conf val to activate.
+ */
+void conf_activate_given_module_conf(
+       conf_t *conf,
+       struct server *server,
+       const knot_dname_t *zone_name,
+       list_t *query_modules,
+       struct query_plan **query_plan,
+       conf_val_t val
+);
+
 /*!
  * Deactivates query modules list.
  *
index e2c3ddba1ac2a48c454a1003a857e64534a2a3a9..b23056779869b56a4f36fd5df1015071db213332 100644 (file)
@@ -222,6 +222,13 @@ static const yp_item_t desc_server[] = {
        { C_MAX_IPV6_UDP_PAYLOAD, YP_TINT,  YP_VINT = { KNOT_EDNS_MIN_DNSSEC_PAYLOAD,
                                                        KNOT_EDNS_MAX_UDP_PAYLOAD,
                                                        1232, YP_SSIZE } },
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       { C_ENABLE_NUMA,          YP_TBOOL, YP_VBOOL = {false}},
+       { C_UDP_ASYNC_REQS,       YP_TINT,  YP_VINT = { 0, INT32_MAX, 4 * 1024 } },
+       { C_TCP_ASYNC_REQS,       YP_TINT,  YP_VINT = { 0, INT32_MAX,  1 * 1024 } },
+       { C_XDP_ASYNC_REQS,       YP_TINT,  YP_VINT = { 0, INT32_MAX, 2 * 1024 } },
+#endif
+       { C_DISABLE_ANY,         YP_TBOOL, YP_VNONE },
        { NULL }
 };
 
@@ -429,6 +436,8 @@ static const yp_item_t desc_template[] = {
        { C_ID, YP_TSTR, YP_VNONE, CONF_IO_FREF },
        { C_GLOBAL_MODULE,       YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt },
                                           YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } },
+       { C_AZURE_MODULE,       YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt },
+                                          YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } },
        ZONE_ITEMS(CONF_IO_FRLD_ZONES)
        // Legacy items.
        { C_TIMER_DB,            YP_TSTR,  YP_VSTR = { "timers" }, CONF_IO_FRLD_SRV },
index ac590655a9a72d247479cdb1ae2bffbded1dce40..cd2e40ea0ae95d598f9906e4c0bd75cba633cb40 100644 (file)
@@ -55,6 +55,7 @@
 #define C_ECS                  "\x12""edns-client-subnet"
 #define C_FILE                 "\x04""file"
 #define C_GLOBAL_MODULE                "\x0D""global-module"
+#define C_AZURE_MODULE         "\x0C""azure-module"
 #define C_ID                   "\x02""id"
 #define C_IDENT                        "\x08""identity"
 #define C_INCL                 "\x07""include"
 #define C_MAX_JOURNAL_DEPTH    "\x11""max-journal-depth"
 #define C_MAX_JOURNAL_USAGE    "\x11""max-journal-usage"
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#define C_ENABLE_NUMA       "\x0b""enable-numa"
+#define C_UDP_ASYNC_REQS    "\x0d""udp-async-req"
+#define C_TCP_ASYNC_REQS    "\x0d""tcp-async-req"
+#define C_XDP_ASYNC_REQS    "\x0d""xdp-async-req"
+#endif
+
 enum {
        KEYSTORE_BACKEND_PEM    = 1,
        KEYSTORE_BACKEND_PKCS11 = 2,
index ccaa23c37a6b265fda36411a416f40ef4c3cefa1..0301c64094afc1b72907383fe9900e1ee430ab3a 100644 (file)
@@ -207,7 +207,7 @@ int knot_sign_ctx_add_data(dnssec_sign_ctx_t *ctx,
  *
  * \return Error code, KNOT_EOK if successful.
  */
-static int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx,
+int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx,
                                const knot_rrset_t *covered,
                                const dnssec_key_t *key,
                                uint32_t sig_incepted, uint32_t sig_expires,
index 7114cb184cd021e98ae8975ccd1a270caaeb1009..9d8c850b1533393888314f9440b7e29c091de160 100644 (file)
@@ -58,6 +58,27 @@ int knot_sign_rrset2(knot_rrset_t *rrsigs,
                      zone_sign_ctx_t *sign_ctx,
                      knot_mm_t *mm);
 
+/*!
+ * \brief Create RRSIG RDATA.
+ *
+ * \param[in]  rrsigs        RR set with RRSIGS.
+ * \param[in]  ctx           DNSSEC signing context.
+ * \param[in]  covered       RR covered by the signature.
+ * \param[in]  key           Key used for signing.
+ * \param[in]  sig_incepted  Timestamp of signature inception.
+ * \param[in]  sig_expires   Timestamp of signature expiration.
+ * \param[in]  sign_flags    Signing flags.
+ * \param[in]  mm            Memory context.
+ *
+ * \return Error code, KNOT_EOK if successful.
+ */
+int rrsigs_create_rdata(knot_rrset_t *rrsigs, dnssec_sign_ctx_t *ctx,
+                               const knot_rrset_t *covered,
+                               const dnssec_key_t *key,
+                               uint32_t sig_incepted, uint32_t sig_expires,
+                               dnssec_sign_flags_t sign_flags,
+                               knot_mm_t *mm);
+
 /*!
  * \brief Add all data covered by signature into signing context.
  *
index 80b200a6ab838d7cdebd8d1918b90f04d599f147..15bc25e91fa53a5bd125095c493d072ad26aa24d 100644 (file)
@@ -44,6 +44,9 @@ typedef struct {
        bool is_ksk_active_plus;
        bool is_pub_only;
        bool is_revoked;
+
+       int signature_validity_offset;
+       int signature_validity_period;
 } zone_key_t;
 
 knot_dynarray_declare(keyptr, zone_key_t *, DYNARRAY_VISIBILITY_NORMAL, 1)
@@ -61,6 +64,7 @@ typedef struct {
        zone_key_t *keys;                 // keys in keyset
        dnssec_sign_ctx_t **sign_ctxs;    // signing buffers for keys in keyset
        const kdnssec_ctx_t *dnssec_ctx;  // dnssec context
+       zone_node_t *rrsig; //rrsig
 } zone_sign_ctx_t;
 
 /*!
diff --git a/src/knot/include/atomic.h b/src/knot/include/atomic.h
new file mode 100644 (file)
index 0000000..d42c26c
--- /dev/null
@@ -0,0 +1,40 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <time.h>
+#ifdef HAVE_STDATOMIC
+#include <stdatomic.h>
+#endif
+#ifdef HAVE_STDALIGN
+#include <stdalign.h>
+#define KNOT_ALIGN(align) alignas(align)
+#else
+#define KNOT_ALIGN(align)
+#endif
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#ifdef HAVE_STDATOMIC
+#define KNOT_ATOMIC _Atomic
+#define KNOT_ATOMIC_INIT(dst, src) atomic_init(&(dst), src)
+#define KNOT_ATOMIC_GET(src, dst) (dst) = atomic_load(src)
+#define KNOT_ATOMIC_GET_RELAXED(src, dst) (dst) = atomic_load_explicit(src, memory_order_relaxed)
+#define KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(src, cmp, val) atomic_compare_exchange_weak(src, &(cmp), val)
+#define KNOT_ATOMIC_COMPARE_EXCHANGE_STRONG(src, cmp, val) atomic_compare_exchange_strong(src, &(cmp), val)
+#define KNOT_ATOMIC_GET_SUB(src, val) atomic_fetch_sub(src, val)
+#define KNOT_ATOMIC_GET_ADD(src, val) atomic_fetch_add(src, val)
+#else
+#ifdef HAVE_ATOMIC
+#define KNOT_ATOMIC volatile
+#define KNOT_ATOMIC_INIT(dst, src) __atomic_store(&(dst), &(src), __ATOMIC_SEQ_CST)
+#define KNOT_ATOMIC_GET(src, dst) __atomic_load(src, &(dst), __ATOMIC_CONSUME)
+#define KNOT_ATOMIC_GET_RELAXED(src, dst) __atomic_load(src, &(dst), __ATOMIC_RELAXED)
+#define KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(src, cmp, val) __atomic_compare_exchange(src, &(cmp), &(val), true, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME)
+#define KNOT_ATOMIC_COMPARE_EXCHANGE_STRONG KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK
+#define KNOT_ATOMIC_GET_SUB(src, val) __atomic_fetch_sub(src, val, __ATOMIC_ACQ_REL)
+#define KNOT_ATOMIC_GET_ADD(src, val) __atomic_fetch_add(src, val, __ATOMIC_ACQ_REL)
+#else
+#error Need atomic or stdatomic support
+#endif
+#endif
+
diff --git a/src/knot/include/lqueue.h b/src/knot/include/lqueue.h
new file mode 100644 (file)
index 0000000..afff170
--- /dev/null
@@ -0,0 +1,101 @@
+#pragma once
+#include "knot/include/atomic.h"
+
+#pragma pack(push, 1)
+/*!
+ * \brief To maintain performance, size is setup to uint16_t to keep atomic operation to 8 byte.
+ * This allows the size of the queue to be restricted to 2^16 - 2. If larger size is needed, the type needs to be changed.
+ */
+#define KNOTD_LOCKLESS_QUEUE_COUNT_TYPE uint16_t
+
+/*!
+ * \brief Queue state.
+ */
+typedef struct knotd_lockless_queue_state {
+       KNOT_ALIGN(sizeof(KNOTD_LOCKLESS_QUEUE_COUNT_TYPE) * 4)
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE head;               /*!< Head where insertion can be performed. */
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE tail;               /*!< Tail where removal can be performed. */
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE head_reserved;      /*!< Head reservation to insert. */
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE unused;             /*!< To ensure queue state size is in power of 2 for atomic operation. */
+} knotd_lockless_queue_state_t;
+
+/*!
+ * \brief The lockless queue structure. Allocate the queue by calling knotd_lockless_queue_create.
+ *
+ * The queue is implemented using circular queue. The only variation is insertion cannot be performed atomically for following reason.
+ * If an item need to be inserted at head+1 position and update the head to head+1, these two operation have to performed in two location.
+ * The object insertion needs to be performed on item[head+1], but update to head need to be performed in state.
+ * These two memory are located too far apart to be used in single atomic memory operation.
+ * Performing these operation independently will cause race condition.
+ * If we write to array before incrementing head, two threads can write to items array in head+1, say thread1 followed by thread2, but head could be incremented by thread1.
+ * But what is left in item[head+1] is from thread2. So thread1's object is lost when it completes, but thread2 object may be duplicated as it retries and inserts again.
+ * if we write to array after incrementing head, a pop operation might see the head != tail, and hence assume head has the data and consume it before it is initialized.
+ *
+ * To eliminate the race condition, create a reservation first by incrementing head_reserved.
+ * This guarantees that no one will use the insertion position other than the thread that reserved it. Also pop will not consider reserved areas, but only committed areas (i.e. head).
+ * After reserving and setting the memory, the head needs to be moved. But the current thread can not move the head to its reserved position for following reason.
+ *
+ * Lets say initial state is (head=1, tail=1, head_reserved=1).
+ * Thread 1 reserves position 2. (head=1, tail=1, head_reserved=2).
+ * Thread 2 reserves position 3. (head=1, tail=1, head_reserved=3).
+ * Thread 1 has not yet finished assigning value to array item at 2. If thread 2 completes assinging value to 3.
+ * At this stage, if the head was set to 3. The pop operation will assume that memory in array index 2 is valid, which is still not assigned.
+ * To overcome this issue, the threads are allowed to move the head to head_reserved only if head is one before reserved.
+ * With this logic, thread 2 will spin until head = 2. Only thread that can make head to 2 is thread 1. Thread 1 will increment head to 2 only after setting array object at index 2.
+ * This ensures the thread safety with added atomic operation and potential spin by a thread for another thread.
+ */
+typedef struct {
+    KNOT_ATOMIC knotd_lockless_queue_state_t state;
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size;
+    void* items[];
+} knotd_lockless_queue_t;
+#pragma pack(pop)
+
+/*!
+ * \brief Create lockless queue structure.
+ *
+ * \param queue Queue to be initialized.
+ * \param size Size of the max number of objects in queue to be supported. This is limited to 2^16 - 2.
+ *
+ * \retval 0 if successful.
+ */
+int knotd_lockless_queue_create(knotd_lockless_queue_t **queue, KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size);
+
+/*!
+ * \brief Frees lockless queue structure.
+ *
+ * \param queue Queue previously created using call to knotd_lockless_queue_create.
+ * \param size Size of the max number of objects in queue to be supported. This is limited to 2^16 - 2.
+ *
+ * \retval 0 if successful.
+ */
+void knotd_lockless_queue_delete(knotd_lockless_queue_t *queue);
+
+/*!
+ * \brief Enqueue an object into a queue.
+ *
+ * \param queue Queue previously created using call to knotd_lockless_queue_create.
+ * \param item Item to be inserted.
+ * \param first On return, if it is true, the object inserted is the first item in the queue currently.
+ *
+ * \retval 0 if successful.
+ */
+int knotd_lockless_queue_enqueue(knotd_lockless_queue_t *queue, void *item, bool *first);
+
+/*!
+ * \brief Dequeues an object from queue.
+ *
+ * \param queue Queue previously created using call to knotd_lockless_queue_create.
+ *
+ * \retval Item retrieved from queue, NULL if no object found.
+ */
+void* knotd_lockless_queue_dequeue(knotd_lockless_queue_t *queue);
+
+/*!
+ * \brief Get the number of objects in the queue.
+ *
+ * \param queue Queue previously created using call to knotd_lockless_queue_create.
+ *
+ * \retval Number of objects in the queue.
+ */
+KNOTD_LOCKLESS_QUEUE_COUNT_TYPE knotd_lockless_queue_count(knotd_lockless_queue_t *queue);
diff --git a/src/knot/include/lstack.h b/src/knot/include/lstack.h
new file mode 100644 (file)
index 0000000..a85f766
--- /dev/null
@@ -0,0 +1,74 @@
+#pragma once
+#include "knot/include/atomic.h"
+
+#pragma pack(push, 1)
+/*!
+ * \brief A node object that can be inserted into stack.
+ */
+typedef struct knotd_lockless_stack_node {
+    struct knotd_lockless_stack_node *next;      /*!< Pointer to next node in the stack. */
+} knotd_lockless_stack_node_t;
+
+/*!
+ * \brief The head of the stack linked list.
+ */
+typedef struct {
+       KNOT_ALIGN(16)
+    knotd_lockless_stack_node_t *next;       /*!< Pointer to top/first node in the stack. */
+    uint32_t count;                          /*!< Keeps track of number of elements in the stack. */
+    uint32_t aba_cookie;                     /*!< Value used to determine if the stack was updated to ensure atomicity.*/
+                                             /*!< To detect pop(item1), pop(item2), push(item1) causing issues with pop(item1) executing in parallel. */
+} knotd_lockless_stack_head_t;
+
+/*!
+ * \brief Lockless stack structure. Call knotd_lockless_stack_init to initialize before using this structure.
+ * The stack is implemented using linked list of nodes. The node is preallocated as part of the items that are allocated.
+ * So there is no size limit on number of objects that can be added in the stack.
+ * Also, push and pop operations wont fail as the required memory for list are already pre-allocated as part of the object being pushed.
+ */
+typedef struct {
+    KNOT_ATOMIC knotd_lockless_stack_head_t *head;
+} knotd_lockless_stack_t;
+#pragma pack(pop)
+
+/*!
+ * \brief Initialize lockless structure.
+ *
+ * \param stack Stack to be initialized.
+ *
+ * \retval 0 if successful.
+ */
+int knotd_lockless_stack_init(knotd_lockless_stack_t *stack);
+
+/*!
+ * \brief Cleanup lockless structure. The members in the stack are not altered.
+ *
+ * \param stack Stack initialized using knotd_lockless_stack_init.
+ */
+void knotd_lockless_stack_cleanup(knotd_lockless_stack_t *stack);
+
+/*!
+ * \brief Push the node into the lockless stack.
+ *
+ * \param stack Stack initialized using knotd_lockless_stack_init.
+ * \param node Node to be inserted into stack.
+ */
+void knotd_lockless_stack_push(knotd_lockless_stack_t *stack, knotd_lockless_stack_node_t *node);
+
+/*!
+ * \brief Pop the node from the stack.
+ *
+ * \param stack Stack initialized using knotd_lockless_stack_init.
+ *
+ * \retval Node that is popped from stack, NULL if no nodes present.
+ */
+knotd_lockless_stack_node_t *knotd_lockless_stack_pop(knotd_lockless_stack_t *stack);
+
+/*!
+ * \brief Get the number of elements in the stack.
+ *
+ * \param stack Stack initialized using knotd_lockless_stack_init.
+ *
+ * \retval Count of objects in the stack.
+ */
+uint32_t knotd_lockless_stack_count(knotd_lockless_stack_t *stack);
diff --git a/src/knot/include/modcounter.h b/src/knot/include/modcounter.h
new file mode 100644 (file)
index 0000000..1d51db2
--- /dev/null
@@ -0,0 +1,50 @@
+// <copyright file="modcounter.h" company="Microsoft">
+//  Copyright (c) Microsoft Corporation. All rights reserved.
+// </copyright>
+
+#pragma once
+#include <stdio.h>
+#include "knot/include/module.h"
+
+//# Start : DONOT MAKE CHANGES HERE TO ADD COUNTERS
+#define COMBINE_NAME(p1, p2) p1##_##p2
+#define COMMA_SEPARATED_P2(p1, p2) p2,
+#define STRING_P2(p1, p2) #p2,
+#define ARRAY_SIZE_P2_COMMA(p1, p2) ARRAY_SIZE(p2),
+#define NO_CHANGE_P2(p1, p2) p2
+
+#define CREATE_ENUM(ename, foreach) \
+typedef enum { \
+    foreach(COMBINE_NAME, ename, COMMA_SEPARATED_P2, _) \
+} ename##_enum_t;
+
+#define CREATE_STR_ARR(ename, foreach) \
+static const char *str_map_##ename[] = { \
+    foreach(STRING_P2, _, NO_CHANGE_P2, _) \
+};
+
+#define CREATE_SUB_ENUM(ename, foreach) CREATE_ENUM(ename, foreach)
+
+#define ARRAY_SIZE(array)       (sizeof(array) / sizeof((array)[0]))
+
+#ifdef CREATE_COUNTER_DEFINITIONS
+#define CREATE_COUNTERS(ename, foreach) \
+    CREATE_ENUM(ename, foreach) \
+    CREATE_STR_ARR(ename, foreach)
+#define CREATE_DIMENSIONS(ename, foreach) \
+    CREATE_ENUM(ename, foreach) \
+    CREATE_STR_ARR(ename, foreach) \
+static char *to_str_function_##ename(uint32_t idx, uint32_t count) { assert(idx < ARRAY_SIZE(str_map_##ename)); return strdup(str_map_##ename[idx]); }
+#define CREATE_NAME_MAP(name, foreach) \
+static const knotd_mod_idx_to_str_f name##_map_to_str[] = { \
+    foreach(COMBINE_NAME, to_str_function, COMMA_SEPARATED_P2, _) \
+}; \
+static const int name##_dim_size[] = { \
+    foreach(COMBINE_NAME, str_map, ARRAY_SIZE_P2_COMMA, _) \
+};
+#else
+#define CREATE_COUNTERS(ename, foreach) CREATE_ENUM(ename, foreach)
+#define CREATE_DIMENSIONS(ename, foreach) CREATE_ENUM(ename, foreach)
+#define CREATE_NAME_MAP(name, foreach)
+#endif
+//# End of DONOT MAKE CHANGES HERE TO ADD COUNTERS
\ No newline at end of file
index ed5a06cd6204a359578cf27b59e7b9d9ae803d93..af9ba821d9385f6a0d12563fddc0f813887c1946 100644 (file)
@@ -396,18 +396,29 @@ typedef enum {
        KNOTD_QUERY_FLAG_COOKIE     = 1 << 3, /*!< Valid DNS Cookie indication. */
 } knotd_query_flag_t;
 
+typedef struct knotd_qdata_params knotd_qdata_params_t;
+typedef int (*async_operation_completion_callback)(knotd_qdata_params_t *params);
+
 /*! Query processing data context parameters. */
-typedef struct {
+struct knotd_qdata_params {
        knotd_query_flag_t flags;              /*!< Current query flgas. */
        const struct sockaddr_storage *remote; /*!< Current remote address. */
+       const struct sockaddr_storage *local;  /*!< Current local address. */
        int socket;                            /*!< Current network socket. */
        unsigned thread_id;                    /*!< Current thread id. */
        void *server;                          /*!< Server object private item. */
        const struct knot_xdp_msg *xdp_msg;    /*!< Possible XDP message context. */
-} knotd_qdata_params_t;
+       void *dns_req;                         /*!< Request this param belongs to. */
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       async_operation_completion_callback async_completed_callback; /*!< handler for async operation completion at layer */
+#endif
+};
+
+typedef struct knotd_qdata knotd_qdata_t;
+typedef int (*module_async_operation_completed)(knotd_qdata_t *query, int state);
 
 /*! Query processing data context. */
-typedef struct {
+struct knotd_qdata {
        knot_pkt_t *query;              /*!< Query to be solved. */
        knotd_query_type_t type;        /*!< Query packet type. */
        const knot_dname_t *name;       /*!< Currently processed name. */
@@ -424,7 +435,14 @@ typedef struct {
        knotd_qdata_params_t *params; /*!< Low-level processing parameters. */
 
        struct knotd_qdata_extra *extra; /*!< Private items (process_query.h). */
-} knotd_qdata_t;
+
+       struct timespec query_time;      /*!< Time when the query was received. */
+       void *state;                     /*!< State of the query processor. */
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       module_async_operation_completed async_completed;    /*!< handler for completinig the query async in module. */
+       module_async_operation_completed async_in_completed; /*!< handler for completinig the query in async in module. */
+#endif
+};
 
 /*!
  * Gets the local (destination) address of the query.
@@ -434,8 +452,7 @@ typedef struct {
  *
  * \return Local address or NULL if error.
  */
-const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata,
-                                                      struct sockaddr_storage *buff);
+const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata);
 
 /*!
  * Gets the remote (source) address of the query.
@@ -471,11 +488,16 @@ typedef enum {
        KNOTD_STATE_DONE  = 4, /*!< Finished. */
        KNOTD_STATE_FAIL  = 5, /*!< Error. */
        KNOTD_STATE_FINAL = 6, /*!< Finished and finalized (QNAME, EDNS, TSIG). */
+       KNOTD_STATE_ZONE_LOOKUPDONE = 7, /*!< Positive result for zone fetch */
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       KNOT_STATE_ASYNC  = 100,    //!< The request needs to be async handled.  Value should match KNOT_LAYER_STATE_ASYNC.
+#endif
 } knotd_state_t;
 
 /*! brief Internet query processing states. */
 typedef enum {
        KNOTD_IN_STATE_BEGIN,  /*!< Begin name resolution. */
+       KNOTD_IN_STATE_LOOKUPDONE, /*!< Name lookup completed for the qname */
        KNOTD_IN_STATE_NODATA, /*!< Positive result with NO data. */
        KNOTD_IN_STATE_HIT,    /*!< Positive result. */
        KNOTD_IN_STATE_MISS,   /*!< Negative result. */
@@ -483,12 +505,17 @@ typedef enum {
        KNOTD_IN_STATE_FOLLOW, /*!< Resolution not complete (CNAME/DNAME chain). */
        KNOTD_IN_STATE_TRUNC,  /*!< Finished, packet size limit encountered. */
        KNOTD_IN_STATE_ERROR,  /*!< Resolution failed. */
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       KNOTD_IN_STATE_ASYNC = 100,  /*!< The request needs to be async handled. */
+#endif
 } knotd_in_state_t;
 
 /*! Query module processing stages. */
 typedef enum {
        KNOTD_STAGE_BEGIN = 0,  /*!< Before query processing. */
+       KNOTD_STAGE_ZONE_LOOKUP,/*!< Before zone lookup is done. */
        KNOTD_STAGE_PREANSWER,  /*!< Before section processing. */
+       KNOTD_STAGE_NAME_LOOKUP,/*!< Before name lookup is done */
        KNOTD_STAGE_ANSWER,     /*!< Answer section processing. */
        KNOTD_STAGE_AUTHORITY,  /*!< Authority section processing. */
        KNOTD_STAGE_ADDITIONAL, /*!< Additional section processing. */
diff --git a/src/knot/modules/delay/Makefile.inc b/src/knot/modules/delay/Makefile.inc
new file mode 100644 (file)
index 0000000..6f1ff2e
--- /dev/null
@@ -0,0 +1,13 @@
+knot_modules_delay_la_SOURCES = knot/modules/delay/delay.c
+EXTRA_DIST +=                    knot/modules/delay/delay.rst
+
+if STATIC_MODULE_delay
+libknotd_la_SOURCES += $(knot_modules_delay_la_SOURCES)
+endif
+
+if SHARED_MODULE_delay
+knot_modules_delay_la_LDFLAGS = $(KNOTD_MOD_LDFLAGS)
+knot_modules_delay_la_CPPFLAGS = $(KNOTD_MOD_CPPFLAGS) $(DELAY_CFLAGS)
+knot_modules_delay_la_LIBADD = $(DELAY_LIBS)
+pkglib_LTLIBRARIES += knot/modules/delay.la
+endif
\ No newline at end of file
diff --git a/src/knot/modules/delay/delay.c b/src/knot/modules/delay/delay.c
new file mode 100644 (file)
index 0000000..29a41f8
--- /dev/null
@@ -0,0 +1,240 @@
+#include <netinet/in.h>
+#include "contrib/time.h"
+#include "knot/include/module.h"
+#include "contrib/mempattern.h"
+#include "knot/query/layer.h"
+#include <pthread.h>
+#include "knot/include/lqueue.h"
+#include <time.h>
+
+/* This module is for demonstation of async mode */
+
+#define MOD_DELAY      "\x05""delay"
+#define MOD_THREADS "\x07""threads"
+#define MOD_ALL        "\x03""all"
+#define MOD_ID         "\x08""identity"
+
+const yp_item_t delay_conf[] = {
+       { MOD_DELAY,     YP_TINT,  YP_VINT = { 1, INT32_MAX, 10} },
+       { MOD_THREADS,   YP_TINT,  YP_VINT = { 1, INT8_MAX, 4} },
+       { MOD_ALL,  YP_TBOOL, YP_VBOOL = { false } },
+       { MOD_ID, YP_TSTR,   YP_VNONE },
+    { NULL }
+};
+
+typedef struct queue_node {
+       struct timespec wake_time;
+       knotd_qdata_t *qdata;
+       bool is_in_state;
+       int return_state;
+} queue_node_t;
+
+typedef struct {
+       char id[64];
+       int delay_ms;
+       bool exit;
+       int thread_count;
+       knotd_lockless_queue_t *queue;
+       pthread_t dispatch_thread[];
+} delay_ctx_t;
+
+int delay_conf_check(knotd_conf_check_args_t *args)
+{
+       return KNOT_EOK;
+}
+
+static int delay_query(knotd_qdata_t *qdata, int return_state, bool is_in_state, knotd_mod_t *mod)
+{
+       struct timespec curr_time;
+       int ret = KNOT_EOK;
+       if (clock_gettime(CLOCK_MONOTONIC, &curr_time) == 0) {
+               delay_ctx_t *ctx = knotd_mod_ctx(mod);
+               queue_node_t *node = mm_alloc(qdata->mm, sizeof(*node));
+               if (node == NULL) {
+                       ret = KNOT_ENOMEM;
+               }
+               else {
+                       long val = curr_time.tv_nsec + ctx->delay_ms * 1000 * 1000;
+                       node->wake_time.tv_nsec = val % (1000 * 1000 * 1000);
+                       node->wake_time.tv_sec = curr_time.tv_sec + val / (1000 * 1000 * 1000);
+                       node->qdata = qdata;
+                       node->is_in_state = is_in_state;
+                       node->return_state = return_state;
+
+                       bool first;
+                       if ((ret = knotd_lockless_queue_enqueue(ctx->queue, node, &first)) != KNOT_EOK) {
+                               mm_free(qdata->mm, node);
+                               ret = KNOT_ESYSTEM;
+                       }
+               }
+       } else {
+               ret = KNOT_ESYSTEM;
+       }
+
+       return ret;
+}
+
+static knotd_state_t delay_message(knotd_state_t state, knot_pkt_t *pkt,
+                                 knotd_qdata_t *qdata, knotd_mod_t *mod)
+{
+       return delay_query(qdata, state, false, mod) == KNOT_EOK ? KNOT_STATE_ASYNC : state;
+}
+
+static knotd_in_state_t delay_message_in(knotd_in_state_t state, knot_pkt_t *pkt,
+                                 knotd_qdata_t *qdata, knotd_mod_t *mod)
+{
+       return delay_query(qdata, state, true, mod) == KNOT_EOK ? KNOTD_IN_STATE_ASYNC : state;
+}
+
+static bool check_time(queue_node_t *node, struct timespec *delay_time) {
+       delay_time->tv_sec = 0;
+       delay_time->tv_nsec = 1000 * 1000;
+       struct timespec curr_time;
+       if (clock_gettime(CLOCK_MONOTONIC, &curr_time) == 0) {
+               if (curr_time.tv_sec > node->wake_time.tv_sec
+                       || (curr_time.tv_sec == node->wake_time.tv_sec
+                               && curr_time.tv_nsec >= node->wake_time.tv_nsec)) {
+                       return true;
+               }
+
+               delay_time->tv_nsec = node->wake_time.tv_nsec - curr_time.tv_nsec;
+               delay_time->tv_sec = node->wake_time.tv_sec - curr_time.tv_sec;
+               if (node->wake_time.tv_nsec < curr_time.tv_nsec) {
+                       delay_time->tv_nsec += 1000 * 1000 * 1000;
+                       delay_time->tv_sec -= 1;
+               }
+       }
+
+       return false;
+}
+
+static struct timespec dispatch_queue(delay_ctx_t *ctx, bool all)
+{
+       queue_node_t *node;
+       struct timespec sleep_time = {0, 1000 * 1000};
+       while (true) {
+               node = knotd_lockless_queue_dequeue(ctx->queue);
+
+               if (!node) {
+                       break;
+               }
+
+               /* Should be the last call on the object or memory.
+                * Further access to node, qdata or mm will result in race condition as network thread can processs and cleanup */
+               if (node->is_in_state) {
+                       node->qdata->async_in_completed(node->qdata, node->return_state);
+               } else {
+                       node->qdata->async_completed(node->qdata, node->return_state);
+               }
+       }
+
+       return sleep_time;
+}
+
+static void *dispatch_thread(void *d)
+{
+       delay_ctx_t *ctx = d;
+       while (!ctx->exit) {
+               queue_node_t *node = knotd_lockless_queue_dequeue(ctx->queue);
+
+               if (node == NULL) {
+            struct timespec tenth_ms = { 0, 100000};
+            nanosleep(&tenth_ms, &tenth_ms);
+                       continue;
+               } else {
+                       struct timespec delay_time;
+                       if (!check_time(node, &delay_time)) {
+                               nanosleep(&delay_time, &delay_time);
+                       }
+               }
+
+               /* Should be the last call on the object or memory.
+                * Further access to node, qdata or mm will result in race condition as network thread can processs and cleanup */
+               if (node->is_in_state) {
+                       node->qdata->async_in_completed(node->qdata, node->return_state);
+               } else {
+                       node->qdata->async_completed(node->qdata, node->return_state);
+               }
+       }
+
+       return d;
+}
+
+int delay_load(knotd_mod_t *mod)
+{
+       knotd_conf_t threads = knotd_conf_mod(mod, MOD_THREADS);
+       /* Create delay context. */
+       delay_ctx_t *ctx = calloc(1, sizeof(*ctx) + sizeof(pthread_t) * threads.single.integer);
+       if (ctx == NULL) {
+               return KNOT_ENOMEM;
+       }
+
+       int rc;
+       if ((rc = knotd_lockless_queue_create(&ctx->queue, 16 * 1024))) {
+               free(ctx);
+               return rc;
+       }
+
+       for (int i = 0; i < threads.single.integer; i++) {
+               if(pthread_create(&ctx->dispatch_thread[i], NULL, dispatch_thread, ctx) != 0)
+               {
+                       ctx->exit = 1;
+                       for (int j = 0; j < i; j++) {
+                               void *retval;
+                               pthread_join(ctx->dispatch_thread[j], &retval);
+                       }
+                       knotd_lockless_queue_delete(ctx->queue);
+                       free(ctx);
+                       return KNOT_ESYSTEM;
+               }
+               ctx->thread_count++;
+       }
+
+       /* Set delay. */
+       knotd_conf_t conf = knotd_conf_mod(mod, MOD_DELAY);
+       ctx->delay_ms = conf.single.integer;
+
+       /* Set id. */
+       conf = knotd_conf_mod(mod, MOD_ID);
+       if (conf.single.string) {
+               strncpy(ctx->id, conf.single.string, sizeof(ctx->id));
+               ctx->id[sizeof(ctx->id) - 1] = '\0';
+       }
+
+       /* Set scope. */
+       conf = knotd_conf_mod(mod, MOD_ALL);
+       bool all = conf.single.boolean;
+
+       knotd_mod_ctx_set(mod, ctx);
+
+       knotd_mod_hook(mod, KNOTD_STAGE_END, delay_message);
+       if (all) {
+               knotd_mod_hook(mod, KNOTD_STAGE_BEGIN, delay_message);
+        knotd_mod_in_hook(mod, KNOTD_STAGE_NAME_LOOKUP, delay_message_in);
+               knotd_mod_in_hook(mod, KNOTD_STAGE_PREANSWER, delay_message_in);
+               knotd_mod_in_hook(mod, KNOTD_STAGE_ANSWER, delay_message_in);
+               knotd_mod_in_hook(mod, KNOTD_STAGE_AUTHORITY, delay_message_in);
+               knotd_mod_in_hook(mod, KNOTD_STAGE_ADDITIONAL, delay_message_in);
+       }
+
+       return KNOT_EOK;
+}
+
+void delay_unload(knotd_mod_t *mod)
+{
+       delay_ctx_t *ctx = knotd_mod_ctx(mod);
+       if (ctx) {
+               ctx->exit = true;
+               void *retval;
+               for (int i = 0; i < ctx->thread_count; i++) {
+                       pthread_join(ctx->dispatch_thread[i], &retval);
+               }
+               knotd_mod_ctx_set(mod, NULL);
+               dispatch_queue(ctx, true);
+               knotd_lockless_queue_delete(ctx->queue);
+               free(ctx);
+       }
+}
+
+KNOTD_MOD_API(delay, KNOTD_MOD_FLAG_SCOPE_ANY,
+              delay_load, delay_unload, delay_conf, delay_conf_check);
diff --git a/src/knot/modules/delay/delay.rst b/src/knot/modules/delay/delay.rst
new file mode 100644 (file)
index 0000000..39954b5
--- /dev/null
@@ -0,0 +1,50 @@
+.. _mod-delay:
+
+``delay`` â€“ delay the query response
+====================================
+
+A module for delaying response to a query.
+
+Example
+-------
+
+   mod-delay:
+     - id: delay_10ms
+       delay: 10ms
+
+   template:
+     - id: default
+       global-module: mod-delay/delay_10ms
+
+The above configuration delays the response by 10ms.
+
+Module reference
+----------------
+
+For delaying query response, use this module.
+
+::
+
+ mod-delay:
+   - id: STR
+     delay: INT
+     all: BOOL
+
+id
+..
+
+A module identifier.
+
+.. _mod-dnstap_sink:
+
+delay
+....
+
+Number of ms to delay the module call.
+*Required*
+
+all
+........
+
+The value indicates if all module call needs to be delayed or final response alone needs to be delayed.
+If all module calls are delayed, then total time the query is delayed will be number of module hooks available * delay time.
\ No newline at end of file
index e69b56c7b98b8f95b80c42afab4b2df9568f02fb..d1536ae1e8702d048c58dd6dc8c2ca3e1cbf7e1c 100644 (file)
@@ -1,4 +1,7 @@
-knot_modules_dnstap_la_SOURCES = knot/modules/dnstap/dnstap.c
+knot_modules_dnstap_la_SOURCES = knot/modules/dnstap/dnstap.c \
+                                 knot/modules/dnstap/dnstapcounter.c \
+                                 knot/modules/dnstap/dnstapcounter.h
+
 EXTRA_DIST +=                    knot/modules/dnstap/dnstap.rst
 
 if STATIC_MODULE_dnstap
index b74cc5274afa5511385d0ef268aa5c16cc351052..2bf62664dc870de22d285de34a620ea8e4cad00f 100644 (file)
 #include <netinet/in.h>
 #include <sys/socket.h>
 
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+#include "knot/common/qps_limiter.h"
+#endif
 #include "contrib/dnstap/dnstap.h"
 #include "contrib/dnstap/dnstap.pb-c.h"
 #include "contrib/dnstap/message.h"
 #include "contrib/dnstap/writer.h"
 #include "contrib/time.h"
 #include "knot/include/module.h"
+#include "dnstapcounter.h"
 
 #define MOD_SINK       "\x04""sink"
 #define MOD_IDENTITY   "\x08""identity"
 #define MOD_VERSION    "\x07""version"
 #define MOD_QUERIES    "\x0B""log-queries"
 #define MOD_RESPONSES  "\x0D""log-responses"
+#define MOD_COMBINED  "\x0F""query-with-resp"
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+#define MOD_QPS_LIMIT "\x09""qps-limit"
+#define MOD_ERR_LIMIT "\x09""err-limit"
+#endif
 
 const yp_item_t dnstap_conf[] = {
        { MOD_SINK,      YP_TSTR,  YP_VNONE },
@@ -36,6 +45,11 @@ const yp_item_t dnstap_conf[] = {
        { MOD_VERSION,   YP_TSTR,  YP_VNONE },
        { MOD_QUERIES,   YP_TBOOL, YP_VBOOL = { true } },
        { MOD_RESPONSES, YP_TBOOL, YP_VBOOL = { true } },
+       { MOD_COMBINED,  YP_TBOOL, YP_VBOOL = { false } },
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+       { MOD_QPS_LIMIT, YP_TINT,  YP_VINT = { 0, INT32_MAX, 0 } },
+       { MOD_ERR_LIMIT, YP_TINT,  YP_VINT = { 0, INT32_MAX, 0 } },
+#endif
        { NULL }
 };
 
@@ -56,10 +70,14 @@ typedef struct {
        size_t identity_len;
        char *version;
        size_t version_len;
+       bool log_query_with_resp;
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+       qps_limiter_t qps_limiter;
+#endif
 } dnstap_ctx_t;
 
 static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt,
-                                 knotd_qdata_t *qdata, knotd_mod_t *mod)
+                                 knotd_qdata_t *qdata, knotd_mod_t *mod, struct timespec *tv)
 {
        assert(pkt && qdata && mod);
 
@@ -73,14 +91,17 @@ static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt,
        struct fstrm_iothr_queue *ioq =
                fstrm_iothr_get_input_queue_idx(ctx->iothread, qdata->params->thread_id);
 
-       /* Unless we want to measure the time it takes to process each query,
-        * we can treat Q/R times the same. */
-       struct timespec tv = { .tv_sec = time(NULL) };
-
+       void *wire2 = NULL;
+       size_t len_wire2 = 0;
        /* Determine query / response. */
        Dnstap__Message__Type msgtype = DNSTAP__MESSAGE__TYPE__AUTH_QUERY;
        if (knot_wire_get_qr(pkt->wire)) {
                msgtype = DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE;
+
+               if (ctx->log_query_with_resp) {
+                       wire2 = qdata->query->wire;
+                       len_wire2 = qdata->query->size;
+               }
        }
 
        /* Determine whether we run on UDP/TCP. */
@@ -90,12 +111,11 @@ static knotd_state_t log_message(knotd_state_t state, const knot_pkt_t *pkt,
        }
 
        /* Create a dnstap message. */
-       struct sockaddr_storage buff;
        Dnstap__Message msg;
        int ret = dt_message_fill(&msg, msgtype,
                                  (const struct sockaddr *)knotd_qdata_remote_addr(qdata),
-                                 (const struct sockaddr *)knotd_qdata_local_addr(qdata, &buff),
-                                 protocol, pkt->wire, pkt->size, &tv);
+                                 (const struct sockaddr *)knotd_qdata_local_addr(qdata),
+                                 protocol, pkt->wire, pkt->size, tv, wire2, len_wire2, &qdata->query_time);
        if (ret != KNOT_EOK) {
                return state;
        }
@@ -140,16 +160,98 @@ static knotd_state_t dnstap_message_log_query(knotd_state_t state, knot_pkt_t *p
                                               knotd_qdata_t *qdata, knotd_mod_t *mod)
 {
        assert(qdata);
+       struct timespec tv;
+       clock_gettime(CLOCK_REALTIME_COARSE, &tv);
+
+       knotd_mod_stats_incr(
+        mod,
+        qdata->params->thread_id,
+        dnstap_counter_log_emitted,
+        log_emitted_QUERY,
+        1);
 
-       return log_message(state, qdata->query, qdata, mod);
+       return log_message(state, qdata->query, qdata, mod, &tv);
 }
 
 /*! \brief Submit message - response. */
 static knotd_state_t dnstap_message_log_response(knotd_state_t state, knot_pkt_t *pkt,
                                                  knotd_qdata_t *qdata, knotd_mod_t *mod)
 {
-       return log_message(state, pkt, qdata, mod);
+       struct timespec tv;
+       clock_gettime(CLOCK_REALTIME_COARSE, &tv);
+
+       knotd_mod_stats_incr(
+        mod,
+        qdata->params->thread_id,
+        dnstap_counter_log_emitted,
+        log_emitted_RESPONSE,
+        1);
+
+       return log_message(state, pkt, qdata, mod, &tv);
+}
+
+
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+/*! \brief Submit message - query. */
+static knotd_state_t dnstap_message_log_query_limit(knotd_state_t state, knot_pkt_t *pkt,
+                                              knotd_qdata_t *qdata, knotd_mod_t *mod)
+{
+       struct timespec tv;
+       clock_gettime(CLOCK_REALTIME_COARSE, &tv);
+
+       dnstap_ctx_t *ctx = knotd_mod_ctx(mod);
+       if (qps_limiter_is_allowed(&ctx->qps_limiter, tv.tv_sec, false)) {
+               knotd_mod_stats_incr(
+                       mod,
+                       qdata->params->thread_id,
+                       dnstap_counter_log_emitted,
+                       log_emitted_QUERY,
+                       1);
+
+               return log_message(state, qdata->query, qdata, mod, &tv);
+       } else {
+               knotd_mod_stats_incr(
+                       mod,
+                       qdata->params->thread_id,
+                       dnstap_counter_log_dropped,
+                       log_dropped_QUERY,
+                       1);
+
+               return state;
+       }
+}
+
+/*! \brief Submit message - response. */
+static knotd_state_t dnstap_message_log_response_limit(knotd_state_t state, knot_pkt_t *pkt,
+                                                 knotd_qdata_t *qdata, knotd_mod_t *mod)
+{
+       struct timespec tv;
+       clock_gettime(CLOCK_REALTIME_COARSE, &tv);
+
+       bool err = KNOT_RCODE_SERVFAIL == knot_wire_get_rcode(pkt->wire);
+
+       dnstap_ctx_t *ctx = knotd_mod_ctx(mod);
+       if (qps_limiter_is_allowed(&ctx->qps_limiter, tv.tv_sec, err)) {
+               knotd_mod_stats_incr(
+                       mod,
+                       qdata->params->thread_id,
+                       dnstap_counter_log_emitted,
+                       log_emitted_RESPONSE,
+                       1);
+
+               return log_message(state, pkt, qdata, mod, &tv);
+       } else {
+               knotd_mod_stats_incr(
+                       mod,
+                       qdata->params->thread_id,
+                       dnstap_counter_log_dropped,
+                       log_dropped_RESPONSE,
+                       1);
+
+               return state;
+       }
 }
+#endif
 
 /*! \brief Create a UNIX socket sink. */
 static struct fstrm_writer* dnstap_unix_writer(const char *path)
@@ -261,6 +363,28 @@ int dnstap_load(knotd_mod_t *mod)
        conf = knotd_conf_mod(mod, MOD_RESPONSES);
        const bool log_responses = conf.single.boolean;
 
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+       /* Get QPS_limit. */
+       conf = knotd_conf_mod(mod, MOD_QPS_LIMIT);
+       ctx->qps_limiter.log_qps = conf.single.integer;
+
+       /* Get Err QPS_limit. */
+       conf = knotd_conf_mod(mod, MOD_ERR_LIMIT);
+       ctx->qps_limiter.log_err_qps = conf.single.integer;
+
+       /* Get Log query with resp. */
+       conf = knotd_conf_mod(mod, MOD_COMBINED);
+       ctx->log_query_with_resp = conf.single.boolean;
+
+       bool limit_by_qps = ctx->qps_limiter.log_qps || ctx->qps_limiter.log_err_qps;
+
+       if (limit_by_qps) {
+               if (qps_limiter_init(&ctx->qps_limiter) != KNOT_EOK) {
+                       goto fail;
+               }
+       }
+#endif
+
        /* Initialize the writer and the options. */
        struct fstrm_writer *writer = dnstap_writer(sink);
        if (writer == NULL) {
@@ -288,16 +412,31 @@ int dnstap_load(knotd_mod_t *mod)
 
        /* Hook to the query plan. */
        if (log_queries) {
-               knotd_mod_hook(mod, KNOTD_STAGE_BEGIN, dnstap_message_log_query);
+               knotd_mod_hook(mod, KNOTD_STAGE_BEGIN,
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+                       limit_by_qps ? dnstap_message_log_query_limit :
+#endif
+                       dnstap_message_log_query);
        }
        if (log_responses) {
-               knotd_mod_hook(mod, KNOTD_STAGE_END, dnstap_message_log_response);
+               knotd_mod_hook(mod, KNOTD_STAGE_END,
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+                       limit_by_qps ? dnstap_message_log_response_limit :
+#endif
+                       dnstap_message_log_response);
        }
 
+       if (KNOT_EOK != dnstap_create_counters(mod)) {
+        goto fail;
+    }
+
        return KNOT_EOK;
 fail:
        knotd_mod_log(mod, LOG_ERR, "failed to init sink '%s'", sink);
 
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+       qps_limiter_cleanup(&ctx->qps_limiter);
+#endif
        free(ctx->identity);
        free(ctx->version);
        free(ctx);
@@ -308,8 +447,12 @@ fail:
 void dnstap_unload(knotd_mod_t *mod)
 {
        dnstap_ctx_t *ctx = knotd_mod_ctx(mod);
+       dnstap_delete_counters(mod);
 
        fstrm_iothr_destroy(&ctx->iothread);
+#ifdef ENABLE_THROTTLE_DNSTAP_LOGS
+       qps_limiter_cleanup(&ctx->qps_limiter);
+#endif
        free(ctx->identity);
        free(ctx->version);
        free(ctx);
index 02071dc156451ee64b25aef8a27d6bee7474c2d7..5a2668cea46f47399179f0fa14243f48fdddff00 100644 (file)
@@ -102,3 +102,32 @@ log-responses
 If enabled, response messages will be logged.
 
 *Default:* on
+
+qps-limit
+.........
+If set to non-zero, the server will log maximum of qps-limit queries per second and drop other logs.
+This is a token bucket approach of how many QPS will be logged every second. Any unused tokens will expire at the end of the second.
+
+*Default:* 0
+
+err-limit
+.........
+If set to non-zero, the server will log errors upto the err-limit. qps-limit applies to the error logs as well.
+But, error queries are allowed to consume additional tokens from future seconds upto err-limit.
+If more error queries are logged, that reduces the number of tokens available for regular queries in future without changing logging qps.
+In the worst case, if there are too many errors, every second releases qps-limit tokens and only consumed by error queries.
+
+Ex, if qps-limit is 10 and err-limit is 100, after first 10 successful queries are logged, success queries on that second are ignored.
+During that second, if failures happen, 90 more failure queries are logged. At the beginning of next second, the available token to normal queries becomes -90 + 10 = -80.
+In this case, error has consumed the qps token for next 9 seconds and no success query logs will be added for next 9 seconds.
+But, if there were errors during those seconds, it still has 10 tokens per second to consume on error side. So upto 10 errors can be logged during those seconds.
+If nothing is logged for next 9 seconds, at the end of 9 seconds, the system resets to default limit of regular queries with 10 tokens, and errors with 100 tokens.
+In the long run, errors and success can consume only qps-limit. But errors are prioritized and allowed to consume more tokens at the expense of success.
+
+*Default:* 0
+
+query-with-resp
+...............
+If set to on, logs query packet along with response packet to reduce round trip and also to make analysis easier.
+
+*Default:* off
\ No newline at end of file
diff --git a/src/knot/modules/dnstap/dnstapcounter.c b/src/knot/modules/dnstap/dnstapcounter.c
new file mode 100644 (file)
index 0000000..3d6743d
--- /dev/null
@@ -0,0 +1,20 @@
+
+#define CREATE_COUNTER_DEFINITIONS
+#include "dnstapcounter.h"
+
+int dnstap_create_counters(knotd_mod_t *mod) {
+    int rc = 0;
+    for(int i = 0; i < dnstap_counter_max; i++) {
+        rc = knotd_mod_stats_add(mod, str_map_dnstap_counter[i], dnstap_counter_dim_size[i], dnstap_counter_map_to_str[i]);
+        if (rc) {
+            break;
+        }
+    }
+
+    return rc;
+}
+
+void dnstap_delete_counters(knotd_mod_t *mod) {
+    // This API is not exposed in knot. So nothing to free at this stage.
+    // knotd_mod_stats_free(mod);
+}
\ No newline at end of file
diff --git a/src/knot/modules/dnstap/dnstapcounter.h b/src/knot/modules/dnstap/dnstapcounter.h
new file mode 100644 (file)
index 0000000..44807f2
--- /dev/null
@@ -0,0 +1,42 @@
+// <copyright file="dnstapcounter.h" company="Microsoft">
+//  Copyright (c) Microsoft Corporation. All rights reserved.
+// </copyright>
+
+#pragma once
+#include "knot/include/modcounter.h"
+
+#define FOREACH_DNSTAP_COUNTER(OPS1, param1, OPS2, param2) \
+    OPS2(param2, OPS1(param1, log_emitted)) \
+    OPS2(param2, OPS1(param1, log_dropped)) \
+    OPS2(param2, OPS1(param1, max))
+
+#define FOREACH_LOG_EMITTED(OPS1, param1, OPS2, param2) \
+    OPS2(param2, OPS1(param1, QUERY)) \
+    OPS2(param2, OPS1(param1, RESPONSE))
+
+#define FOREACH_LOG_DROPPED(OPS1, param1, OPS2, param2) \
+    OPS2(param2, OPS1(param1, QUERY)) \
+    OPS2(param2, OPS1(param1, RESPONSE))
+
+#define FOREACH_MAX(OPS1, param1, OPS2, param2) OPS2(param2, OPS1(param1, max))
+
+CREATE_COUNTERS(dnstap_counter, FOREACH_DNSTAP_COUNTER)
+CREATE_DIMENSIONS(log_emitted, FOREACH_LOG_EMITTED)
+CREATE_DIMENSIONS(log_dropped, FOREACH_LOG_DROPPED)
+CREATE_DIMENSIONS(max, FOREACH_MAX)
+CREATE_NAME_MAP(dnstap_counter, FOREACH_DNSTAP_COUNTER)
+
+/*!
+ * \brief Creates all counters for module.
+ *
+ * \param mod Module handle for the counters.
+ *
+ */
+int dnstap_create_counters(knotd_mod_t *mod);
+
+/*!
+ * \brief cleans up the counters for the module.
+ *
+ * \param mod Module handle for the counters.
+ */
+void dnstap_delete_counters(knotd_mod_t *mod);
\ No newline at end of file
index 963fc7c21e8da3c5251b64772878c61cc5e0768e..af92a06c66a54681a05b672c85a8ee0e8ee76e13 100644 (file)
@@ -80,8 +80,7 @@ static knotd_state_t export(knotd_state_t state, knot_pkt_t *pkt,
        ATOMIC_SET(ctx->last_times[idx], now_ns);
 
        // Prepare data sources.
-       struct sockaddr_storage buff;
-       const struct sockaddr_storage *local = knotd_qdata_local_addr(qdata, &buff);
+       const struct sockaddr_storage *local = knotd_qdata_local_addr(qdata);
        const struct sockaddr_storage *remote = knotd_qdata_remote_addr(qdata);
 
        bool tcp = !(qdata->params->flags & KNOTD_QUERY_FLAG_LIMIT_SIZE);
index e787083222765008e267b127ed6ce981ed0885b0..9711769426e2bcb48d4bd1c8c7640589124cd452 100644 (file)
@@ -52,8 +52,7 @@ static knotd_state_t queryacl_process(knotd_state_t state, knot_pkt_t *pkt,
        }
 
        if (ctx->allow_iface.count > 0) {
-               struct sockaddr_storage buff;
-               const struct sockaddr_storage *addr = knotd_qdata_local_addr(qdata, &buff);
+               const struct sockaddr_storage *addr = knotd_qdata_local_addr(qdata);
                if (!knotd_conf_addr_range_match(&ctx->allow_iface, addr)) {
                        qdata->rcode = KNOT_RCODE_NOTAUTH;
                        return KNOTD_STATE_FAIL;
index cedbd144ce87043c331f84bdc4c32e802840b0fa..5024e8d0b454721f0ee834958dbefc5eb2b066a1 100644 (file)
@@ -32,6 +32,8 @@
 #define MOD_QTYPE      "\x0A""query-type"
 #define MOD_QSIZE      "\x0A""query-size"
 #define MOD_RSIZE      "\x0A""reply-size"
+#define MOD_NO_RESP "\x07""no-resp"
+#define MOD_RESP_TIME "\x09""resp-time"
 
 #define OTHER          "other"
 
@@ -49,6 +51,8 @@ const yp_item_t stats_conf[] = {
        { MOD_QTYPE,      YP_TBOOL, YP_VNONE },
        { MOD_QSIZE,      YP_TBOOL, YP_VNONE },
        { MOD_RSIZE,      YP_TBOOL, YP_VNONE },
+       { MOD_NO_RESP,    YP_TBOOL, YP_VNONE },
+       { MOD_RESP_TIME,      YP_TBOOL, YP_VBOOL = { true } },
        { NULL }
 };
 
@@ -66,6 +70,8 @@ enum {
        CTR_QTYPE,
        CTR_QSIZE,
        CTR_RSIZE,
+       CTR_NO_RESP,
+       CTR_RESP_TIME,
 };
 
 typedef struct {
@@ -82,6 +88,8 @@ typedef struct {
        bool qtype;
        bool qsize;
        bool rsize;
+       bool no_resp;
+       bool resp_time;
 } stats_t;
 
 typedef struct {
@@ -171,6 +179,21 @@ static char *resp_bytes_to_str(uint32_t idx, uint32_t count)
        }
 }
 
+enum {
+       WITH_FAILURE,
+       WITHOUT_FAILURE,
+       NO_RESP_TYPE_COUNT
+};
+
+static char *no_resp_to_str(uint32_t idx, uint32_t count)
+{
+       switch (idx) {
+       case WITH_FAILURE:    return strdup("failed");
+       case WITHOUT_FAILURE: return strdup("success");
+       default:                  assert(0); return NULL;
+       }
+}
+
 enum {
        EDNS_REQ = 0,
        EDNS_RESP,
@@ -334,6 +357,29 @@ static char *rsize_to_str(uint32_t idx, uint32_t count)
        return size_to_str(idx, count);
 }
 
+static uint32_t resp_time_bucket[] = { 0, 5, 10, 20, 50, 100, 150, 200, 250, 500, 750, 1000, 1300, 1500, 1800, 2000, 3000, 5000, 0xFFFFFFFF };
+static char *resp_time_to_str(uint32_t idx, uint32_t count)
+{
+       char str[64];
+       if (idx < count - 1) {
+               snprintf(str, sizeof(str), "%u-%u", resp_time_bucket[idx], resp_time_bucket[idx+1]);
+       } else {
+               snprintf(str, sizeof(str), "%u-max", resp_time_bucket[idx]);
+       }
+       return strdup(str);
+}
+
+static int resp_time_to_bucket_id(uint32_t resp_time) {
+       for (int i = 1; i < sizeof(resp_time_bucket)/sizeof(uint32_t); i++)
+       {
+               if (resp_time <= resp_time_bucket[i]) {
+                       return i-1;
+               }
+       }
+
+       return 0;
+}
+
 static const ctr_desc_t ctr_descs[] = {
        #define item(macro, name, count) \
                [CTR_##macro] = { MOD_##macro, offsetof(stats_t, name), (count), name##_to_str }
@@ -350,6 +396,8 @@ static const ctr_desc_t ctr_descs[] = {
        item(QTYPE,      qtype,      QTYPE__COUNT),
        item(QSIZE,      qsize,      QSIZE_MAX_IDX + 1),
        item(RSIZE,      rsize,      RSIZE_MAX_IDX + 1),
+       item(NO_RESP,    no_resp,    NO_RESP_TYPE_COUNT),
+       item(RESP_TIME,  resp_time,  ((sizeof(resp_time_bucket)/sizeof(uint32_t)) - 1)),
        { NULL }
 };
 
@@ -452,6 +500,27 @@ static knotd_state_t update_counters(knotd_state_t state, knot_pkt_t *pkt,
                                             knot_pkt_size(pkt));
                        break;
                }
+       } else if (stats->resp_bytes == 0) {
+               knotd_mod_stats_incr(mod, tid, CTR_NO_RESP, state != KNOTD_STATE_FAIL ? WITHOUT_FAILURE : WITH_FAILURE,
+                                                               1);
+       }
+
+       struct timespec now;
+       if (clock_gettime(CLOCK_REALTIME_COARSE, &now) != -1) {
+               // Calculate resp time
+               uint32_t time = (now.tv_sec - qdata->query_time.tv_sec) * 1000;
+               time += (now.tv_nsec - qdata->query_time.tv_nsec) / 1000000;
+               if (now.tv_nsec < qdata->query_time.tv_nsec) {
+                       // time -= 1000; This results in overflow. Commenting this will round up instead of rounding down. 
+                       if (now.tv_sec <= qdata->query_time.tv_sec) {
+                               // now is smaller than query time
+                               time = 0;
+                       }
+               }
+
+               int id = resp_time_to_bucket_id(time);
+               knotd_mod_stats_incr(mod, tid, CTR_RESP_TIME, id,
+                                                               1);
        }
 
        // Get the extended response code.
index 52afe624147d122f166ddfa7567d9b98a3c40bcb..ace2adb21e528c748e7b5324218db25987f978f5 100644 (file)
@@ -20,7 +20,7 @@
 #include "knot/include/module.h"
 #include "knot/nameserver/process_query.h"
 
-/*! \brief Don't follow CNAME/DNAME chain beyond this depth. */
+/*! \brief Don't follow CNAME/DNAME chain beyond this depth of 5. */
 #define CNAME_CHAIN_MAX 5
 
 /*!
diff --git a/src/knot/nameserver/lqueue.c b/src/knot/nameserver/lqueue.c
new file mode 100644 (file)
index 0000000..16f83b3
--- /dev/null
@@ -0,0 +1,91 @@
+#include <stddef.h>
+#include "knot/include/lqueue.h"
+#include <assert.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
+int knotd_lockless_queue_create(knotd_lockless_queue_t **queue, KNOTD_LOCKLESS_QUEUE_COUNT_TYPE size) {
+    assert(size < 0xFFFFU);
+    size_t size_to_alloc = sizeof(knotd_lockless_queue_t) + ((size + 1) * sizeof(void*));
+       if (posix_memalign( (void**)queue, 16, size_to_alloc) != 0) {
+               return ENOMEM;
+       }
+
+       memset((void*)*queue, 0, sizeof(knotd_lockless_queue_t));
+    (*queue)->size = size + 1;
+       return 0;
+}
+
+void knotd_lockless_queue_delete(knotd_lockless_queue_t *queue)
+{
+    free(queue);
+}
+
+int knotd_lockless_queue_enqueue(knotd_lockless_queue_t *queue, void *item, bool *first)
+{
+    // Make a reservation
+    knotd_lockless_queue_state_t state, target_state;
+    KNOTD_LOCKLESS_QUEUE_COUNT_TYPE prev_head, insert_pos;
+
+    KNOT_ATOMIC_GET_RELAXED(&queue->state, state);
+    do
+    {
+        insert_pos = (state.head_reserved + 1) % queue->size;
+        if (insert_pos == state.tail)
+        {
+            return ENOMEM; // queue is full
+        }
+
+        prev_head = state.head_reserved;
+        target_state = state;
+        target_state.head_reserved = insert_pos;
+    } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state));
+
+    // save the object in reserved position
+    queue->items[insert_pos] = item;
+
+    // Commit the progress, only if all previous reservations have committed
+    do
+    {
+        KNOT_ATOMIC_GET_RELAXED(&queue->state, state);
+    } while (state.head != prev_head); // Prev reservation is not yet committed
+
+    do
+    {
+        target_state = state;
+        target_state.head = insert_pos;
+    } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state));
+
+    *first = state.head == state.tail;
+
+    return 0;
+}
+
+void* knotd_lockless_queue_dequeue(knotd_lockless_queue_t *queue) {
+    knotd_lockless_queue_state_t state, target_state;
+    void *item;
+
+    KNOT_ATOMIC_GET_RELAXED(&queue->state, state);
+
+    do
+    {
+        if (state.head == state.tail)
+        {
+            return NULL;
+        }
+
+        target_state = state;
+        target_state.tail = (target_state.tail + 1) % queue->size;
+        item = queue->items[target_state.tail];
+    } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(&queue->state, state, target_state));
+
+    return item;
+}
+
+KNOTD_LOCKLESS_QUEUE_COUNT_TYPE knotd_lockless_queue_count(knotd_lockless_queue_t *queue) {
+    knotd_lockless_queue_state_t state;
+    KNOT_ATOMIC_GET_RELAXED(&queue->state, state);
+
+    return (queue->size + state.head - state.tail) % queue->size;
+}
\ No newline at end of file
diff --git a/src/knot/nameserver/lstack.c b/src/knot/nameserver/lstack.c
new file mode 100644 (file)
index 0000000..a963315
--- /dev/null
@@ -0,0 +1,68 @@
+#include <stddef.h>
+#include "knot/include/lstack.h"
+#include <assert.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
+int knotd_lockless_stack_init(knotd_lockless_stack_t *stack) {
+       if (posix_memalign( (void**)&stack->head, 16, sizeof(knotd_lockless_stack_head_t)) != 0) {
+               return ENOMEM;
+       }
+
+       memset((void*)stack->head, 0, sizeof(knotd_lockless_stack_head_t));
+       knotd_lockless_stack_head_t head = {0};
+       KNOT_ATOMIC_INIT(stack->head[0], head);
+       return 0;
+}
+
+void knotd_lockless_stack_cleanup(knotd_lockless_stack_t *stack) {
+    free(stack->head);
+    stack->head = NULL;
+}
+
+void knotd_lockless_stack_push(knotd_lockless_stack_t *stack, knotd_lockless_stack_node_t *node) {
+    knotd_lockless_stack_head_t expect, new;
+    assert(node->next == NULL);
+
+    KNOT_ATOMIC_GET(stack->head, expect);
+    do
+    {
+        node->next = expect.next;
+        new.next = node;
+        new.count = expect.count + 1;
+        new.aba_cookie = expect.aba_cookie + 1;
+    }
+    while(!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(stack->head, expect, new));
+}
+
+knotd_lockless_stack_node_t *knotd_lockless_stack_pop(knotd_lockless_stack_t *stack) {
+    knotd_lockless_stack_head_t expect, new;
+
+    KNOT_ATOMIC_GET(stack->head, expect);
+    do
+    {
+        if (expect.next == NULL)
+        {
+            assert(expect.count == 0);
+            return NULL;
+        }
+
+        new.next = expect.next->next;       // DONOT free up stack nodes after pop, it can cause invalid memory access here.
+        new.count = expect.count - 1;
+        new.aba_cookie = expect.aba_cookie + 1;
+    }
+    while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(stack->head, expect, new));
+
+    expect.next->next = NULL;
+    return expect.next;
+}
+
+uint32_t knotd_lockless_stack_count(knotd_lockless_stack_t *stack)
+{
+    knotd_lockless_stack_head_t expect;
+    KNOT_ATOMIC_GET_RELAXED(stack->head, expect);
+    return expect.count;
+}
+
+#pragma GCC diagnostic pop
index 93e47ffc2121ebac87e7f4becb45895453f22652..49872efd339daed833b9d5c831ccd07cd26946ac 100644 (file)
@@ -637,7 +637,12 @@ int nsec_append_rrsigs(knot_pkt_t *pkt, knotd_qdata_t *qdata, bool optional)
 {
        int ret = KNOT_EOK;
        uint16_t flags = optional ? KNOT_PF_NOTRUNC : KNOT_PF_NULL;
-       flags |= KNOT_PF_FREE; // Free all RRSIGs, they are synthesized
+       if (qdata->extra->zone->sign_ctx == NULL)
+       {
+                // Free all RRSIGs, they are synthesized
+                // For online sign module, it will freed as part of azuredb module.
+               flags |= KNOT_PF_FREE;
+       }
        flags |= KNOT_PF_ORIGTTL;
 
        /* Append RRSIGs for section. */
index 1489dc01d6b410a7a2411631b6a26e747f7d4797..8ad684c784ba6bd482357cf23c357eeea7475f78 100644 (file)
@@ -32,6 +32,7 @@
 #include "libknot/libknot.h"
 #include "contrib/macros.h"
 #include "contrib/mempattern.h"
+#include "knot/nameserver/query_state.h"
 
 /*! \brief Accessor to query-specific data. */
 #define QUERY_DATA(ctx) ((knotd_qdata_t *)(ctx)->data)
@@ -64,6 +65,8 @@ static void query_data_init(knot_layer_t *ctx, knotd_qdata_params_t *params,
        data->extra = extra;
        data->rcode_ede = KNOT_EDNS_EDE_NONE;
 
+       clock_gettime(CLOCK_REALTIME_COARSE, &data->query_time);
+
        /* Initialize lists. */
        memset(extra, 0, sizeof(*extra));
        init_list(&extra->wildcards);
@@ -181,8 +184,42 @@ static int query_chaos(knot_pkt_t *pkt, knot_layer_t *ctx)
        return KNOT_STATE_DONE;
 }
 
+/*!
+ * \brief Lookup zone from plan if available, else fallback to regular DB lookup.
+ */
+static zone_t *lookup_zone(struct query_plan *plan, knotd_qdata_t *qdata, knot_pkt_t *query,
+                                                  knot_zonedb_t *db, const knot_dname_t *zone_name, bool is_suffix_match)
+{
+       struct query_step *step;
+       int next_state = KNOT_STATE_PRODUCE;
+       qdata->extra->zone_lookup_params.is_suffix_match = is_suffix_match;
+       qdata->extra->zone_lookup_params.zone_name = zone_name;
+
+       zone_t *static_zone_response = is_suffix_match ? knot_zonedb_find_suffix(db, zone_name) : knot_zonedb_find(db, zone_name);
+       if (static_zone_response != NULL) {
+               return static_zone_response;
+       }
+
+       if (plan != NULL) {
+               WALK_LIST(step, plan->stage[KNOTD_STAGE_ZONE_LOOKUP]) {
+                       next_state = step->process(next_state, query, qdata, step->ctx);
+                       if (next_state == KNOT_STATE_FAIL) {
+                               break;
+                       }
+               }
+       }
+
+       if (next_state == KNOTD_STATE_ZONE_LOOKUPDONE)
+       {
+               return (zone_t*) qdata->extra->zone;
+       }
+
+       return NULL;
+}
+
 /*! \brief Find zone for given question. */
-static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zonedb)
+static const zone_t *answer_zone_find(struct query_plan *plan, knotd_qdata_t *qdata,
+                                      knot_pkt_t *query, knot_zonedb_t *zonedb)
 {
        uint16_t qtype = knot_pkt_qtype(query);
        uint16_t qclass = knot_pkt_qclass(query);
@@ -200,7 +237,7 @@ static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zo
         */
        if (qtype == KNOT_RRTYPE_DS) {
                const knot_dname_t *parent = knot_wire_next_label(qname, NULL);
-               zone = knot_zonedb_find_suffix(zonedb, parent);
+               zone = lookup_zone(plan, qdata, query, zonedb, parent, true);
                /* If zone does not exist, search for its parent zone,
                   this will later result to NODATA answer. */
                /*! \note This is not 100% right, it may lead to DS name for example
@@ -211,10 +248,10 @@ static const zone_t *answer_zone_find(const knot_pkt_t *query, knot_zonedb_t *zo
 
        if (zone == NULL) {
                if (query_type(query) == KNOTD_QUERY_TYPE_NORMAL) {
-                       zone = knot_zonedb_find_suffix(zonedb, qname);
+                       zone = lookup_zone(plan, qdata, query, zonedb, qname, true);
                } else {
                        // Direct match required.
-                       zone = knot_zonedb_find(zonedb, qname);
+                       zone = lookup_zone(plan, qdata, query, zonedb, qname, false);
                }
        }
 
@@ -266,6 +303,32 @@ static int answer_edns_init(const knot_pkt_t *query, knot_pkt_t *resp,
                knot_edns_set_do(&qdata->opt_rr);
        }
 
+       /* Append Microsoft origin scope if requested. */
+       uint8_t *ms_origin_scope_opt = knot_pkt_edns_option(query, KNOT_EDNS_MICROSOFT_ORIGIN_SCOPE_CODE);
+       if (ms_origin_scope_opt) {
+               const uint8_t *ms_origin_scope_data = knot_edns_opt_get_data(ms_origin_scope_opt);
+               uint16_t ms_origin_scope_len = knot_edns_opt_get_length(ms_origin_scope_opt);
+               ret = knot_edns_add_option(&qdata->opt_rr,
+                                                       KNOT_EDNS_MICROSOFT_ORIGIN_SCOPE_CODE,
+                                                       ms_origin_scope_len,
+                                                       ms_origin_scope_data,
+                                                       qdata->mm);
+               if (ret != KNOT_EOK) {
+                       return ret;
+               }
+
+               /* Append billing id information */
+               const uint8_t data_tag_val[MAX_DATA_TAG_LEN] = {0};
+               ret = knot_edns_add_option(&qdata->opt_rr,
+                                                       KNOT_EDNS_OPTION_DATA_TAG_CODE,
+                                                       sizeof(MAX_DATA_TAG_LEN)-1,
+                                                       data_tag_val,
+                                                       qdata->mm);
+               if (ret != KNOT_EOK) {
+                       return ret;
+               }
+       }
+
        /* Append NSID if requested and available. */
        if (knot_pkt_edns_option(query, KNOT_EDNS_OPTION_NSID) != NULL) {
                conf_val_t *nsid = &conf()->cache.srv_nsid;
@@ -383,9 +446,9 @@ static int answer_edns_put(knot_pkt_t *resp, knotd_qdata_t *qdata)
 }
 
 /*! \brief Initialize response, sizes and find zone from which we're going to answer. */
-static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx)
+static int prepare_answer(struct query_plan *plan, knotd_qdata_t *qdata, knot_pkt_t *query,
+                          knot_pkt_t *resp, knot_layer_t *ctx)
 {
-       knotd_qdata_t *qdata = QUERY_DATA(ctx);
        server_t *server = qdata->params->server;
 
        /* Initialize response. */
@@ -403,8 +466,9 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx
 
        /* Update maximal answer size. */
        bool has_limit = qdata->params->flags & KNOTD_QUERY_FLAG_LIMIT_SIZE;
+       uint16_t resp_size = KNOT_WIRE_MAX_PKTSIZE;
        if (has_limit) {
-               resp->max_size = KNOT_WIRE_MIN_PKTSIZE;
+               resp_size = KNOT_WIRE_MIN_PKTSIZE;
                if (knot_pkt_has_edns(query)) {
                        uint16_t server_size;
                        switch (knotd_qdata_remote_addr(qdata)->ss_family) {
@@ -419,11 +483,10 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx
                        }
                        uint16_t client_size = knot_edns_get_payload(query->opt_rr);
                        uint16_t transfer = MIN(client_size, server_size);
-                       resp->max_size = MAX(resp->max_size, transfer);
+                       resp_size = MAX(resp_size, transfer);
                }
-       } else {
-               resp->max_size = KNOT_WIRE_MAX_PKTSIZE;
        }
+       resp->max_size = MIN(resp->max_size, resp_size);
 
        /* All supported OPCODEs require a question. */
        const knot_dname_t *qname = knot_pkt_qname(query);
@@ -447,7 +510,7 @@ static int prepare_answer(knot_pkt_t *query, knot_pkt_t *resp, knot_layer_t *ctx
        process_query_qname_case_lower(query);
 
        /* Find zone for QNAME. */
-       qdata->extra->zone = answer_zone_find(query, server->zone_db);
+       qdata->extra->zone = answer_zone_find(plan, qdata, query, server->zone_db);
        if (qdata->extra->zone != NULL && qdata->extra->contents == NULL) {
                qdata->extra->contents = qdata->extra->zone->contents;
        }
@@ -528,11 +591,19 @@ static int process_query_err(knot_layer_t *ctx, knot_pkt_t *pkt)
        return KNOT_STATE_DONE;
 }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#define NON_CONTINUABLE_STATE(next_state) ((next_state) == KNOT_STATE_FAIL || (next_state) == KNOT_STATE_ASYNC)
+#define BREAK_IF_ASYNC(next_state) if ((next_state) == KNOT_STATE_ASYNC) { break; }
+#else
+#define NON_CONTINUABLE_STATE(next_state) ((next_state) == KNOT_STATE_FAIL)
+#define BREAK_IF_ASYNC(next_state)
+#endif
+
 #define PROCESS_BEGIN(plan, step, next_state, qdata) \
        if (plan != NULL) { \
-               WALK_LIST(step, plan->stage[KNOTD_STAGE_BEGIN]) { \
-                       next_state = step->process(next_state, pkt, qdata, step->ctx); \
-                       if (next_state == KNOT_STATE_FAIL) { \
+               WALK_LIST_RESUME((step), plan->stage[KNOTD_STAGE_BEGIN]) { \
+                       next_state = (step)->process(next_state, pkt, qdata, (step)->ctx); \
+                       if (NON_CONTINUABLE_STATE(next_state)) { \
                                goto finish; \
                        } \
                } \
@@ -540,14 +611,44 @@ static int process_query_err(knot_layer_t *ctx, knot_pkt_t *pkt)
 
 #define PROCESS_END(plan, step, next_state, qdata) \
        if (plan != NULL) { \
-               WALK_LIST(step, plan->stage[KNOTD_STAGE_END]) { \
-                       next_state = step->process(next_state, pkt, qdata, step->ctx); \
+               WALK_LIST_RESUME((step), plan->stage[KNOTD_STAGE_END]) { \
+                       next_state = (step)->process(next_state, pkt, qdata, (step)->ctx); \
                        if (next_state == KNOT_STATE_FAIL) { \
                                next_state = process_query_err(ctx, pkt); \
                        } \
+                       BREAK_IF_ASYNC(next_state); \
                } \
        }
 
+static void init_state_machine(state_machine_t *state) {
+    memset(state, 0, sizeof(*state));
+       state->process_query_next_state = KNOT_STATE_PRODUCE;
+}
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+static int complete_async_call(knotd_qdata_t *qdata)
+{
+       return qdata->params->async_completed_callback(qdata->params);
+}
+
+static int async_operation_in_completed_callback(knotd_qdata_t *qdata, int state)
+{
+       assert(qdata->state);
+       state_machine_t *state_machine = qdata->state;
+       state_machine->process_query_next_state_in = state;
+       return complete_async_call(qdata);
+}
+
+static int async_operation_completed_callback(knotd_qdata_t *qdata, int state)
+{
+       assert(qdata->state);
+       state_machine_t *state_machine = qdata->state;
+       state_machine->process_query_next_state = state;
+
+       return complete_async_call(qdata);
+}
+#endif
+
 static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt)
 {
        assert(pkt && ctx);
@@ -557,9 +658,10 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt)
        knotd_qdata_t *qdata = QUERY_DATA(ctx);
        struct query_plan *plan = conf()->query_plan;
        struct query_plan *zone_plan = NULL;
-       struct query_step *step;
-
-       int next_state = KNOT_STATE_PRODUCE;
+       state_machine_t *state = NULL;
+       struct query_step *step = NULL;
+       struct query_step **step_to_use = &step;
+       int next_state;
 
        /* Check parse state. */
        knot_pkt_t *query = qdata->query;
@@ -569,10 +671,48 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt)
                goto finish;
        }
 
-       /* Preprocessing. */
-       if (prepare_answer(query, pkt, ctx) != KNOT_EOK) {
-               next_state = KNOT_STATE_FAIL;
-               goto finish;
+       state = qdata->state;
+       if (state == NULL) {
+               state = mm_alloc(ctx->mm, sizeof(*state));
+               if (state == NULL) {
+                       qdata->rcode = KNOT_RCODE_SERVFAIL;
+                       next_state = KNOT_STATE_FAIL;
+                       goto finish;
+               }
+               init_state_machine(state);
+               qdata->state = state;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               qdata->async_completed = async_operation_completed_callback;
+               qdata->async_in_completed = async_operation_in_completed_callback;
+#endif
+       }
+       step_to_use = &state->step;
+
+       next_state = state->process_query_next_state;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       assert(next_state != KNOT_STATE_ASYNC); /* at the beginning or resuming cant be in async */
+#endif
+       if (NON_CONTINUABLE_STATE(next_state)) {
+               if (state->process_query_state == PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN)
+               {
+                       /* Async state and failurs are result of query_* methods
+                        * Go to query_* and recover execution from there. */
+                       goto run_query;
+               }
+               else
+               {
+                       goto finish;
+               }
+       }
+
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER) {
+               /* Preprocessing. */
+               if (prepare_answer(plan, qdata, query, pkt, ctx) != KNOT_EOK) {
+                       next_state = KNOT_STATE_FAIL;
+                       goto finish;
+               }
+
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER);
        }
 
        if (qdata->extra->zone != NULL && qdata->extra->zone->query_plan != NULL) {
@@ -580,75 +720,129 @@ static int process_query_out(knot_layer_t *ctx, knot_pkt_t *pkt)
        }
 
        /* Before query processing code. */
-       PROCESS_BEGIN(plan, step, next_state, qdata);
-       PROCESS_BEGIN(zone_plan, step, next_state, qdata);
-
-       /* Answer based on qclass. */
-       if (next_state == KNOT_STATE_PRODUCE) {
-               switch (knot_pkt_qclass(pkt)) {
-               case KNOT_CLASS_CH:
-                       next_state = query_chaos(pkt, ctx);
-                       break;
-               case KNOT_CLASS_ANY:
-               case KNOT_CLASS_IN:
-                       next_state = query_internet(pkt, ctx);
-                       break;
-               default:
-                       qdata->rcode = KNOT_RCODE_REFUSED;
-                       next_state = KNOT_STATE_FAIL;
-                       break;
-               }
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_BEGIN) {
+               PROCESS_BEGIN(plan, *step_to_use, next_state, qdata);
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_BEGIN);
        }
 
-       /* Postprocessing. */
-       if (next_state == KNOT_STATE_DONE || next_state == KNOT_STATE_PRODUCE) {
-               /* Restore original QNAME. */
-               process_query_qname_case_restore(pkt, qdata);
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN) {
+               PROCESS_BEGIN(zone_plan, *step_to_use, next_state, qdata);
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN);
+       }
 
-               /* Move to Additionals to add OPT and TSIG. */
-               if (pkt->current != KNOT_ADDITIONAL) {
-                       (void)knot_pkt_begin(pkt, KNOT_ADDITIONAL);
+run_query:
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_QUERY) {
+               /* Answer based on qclass. */
+               if (next_state == KNOT_STATE_PRODUCE) {
+                       switch (knot_pkt_qclass(pkt)) {
+                       case KNOT_CLASS_CH:
+                               next_state = query_chaos(pkt, ctx);
+                               break;
+                       case KNOT_CLASS_ANY:
+                       case KNOT_CLASS_IN:
+                               next_state = query_internet(pkt, ctx);
+                               break;
+                       default:
+                               qdata->rcode = KNOT_RCODE_REFUSED;
+                               next_state = KNOT_STATE_FAIL;
+                               break;
+                       }
                }
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_QUERY);
+       }
 
-               /* Put OPT RR to the additional section. */
-               if (answer_edns_put(pkt, qdata) != KNOT_EOK) {
-                       qdata->rcode = KNOT_RCODE_FORMERR;
-                       next_state = KNOT_STATE_FAIL;
-                       goto finish;
-               }
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_POST_QUERY) {
+               /* Postprocessing. */
+               if (next_state == KNOT_STATE_DONE || next_state == KNOT_STATE_PRODUCE) {
+                       /* Restore original QNAME. */
+                       process_query_qname_case_restore(pkt, qdata);
 
-               /* Transaction security (if applicable). */
-               if (process_query_sign_response(pkt, qdata) != KNOT_EOK) {
-                       next_state = KNOT_STATE_FAIL;
-                       goto finish;
+                       /* Move to Additionals to add OPT and TSIG. */
+                       if (pkt->current != KNOT_ADDITIONAL) {
+                               (void)knot_pkt_begin(pkt, KNOT_ADDITIONAL);
+                       }
+
+                       /* Put OPT RR to the additional section. */
+                       if (answer_edns_put(pkt, qdata) != KNOT_EOK) {
+                               qdata->rcode = KNOT_RCODE_FORMERR;
+                               next_state = KNOT_STATE_FAIL;
+                               goto finish;
+                       }
+
+                       /* Transaction security (if applicable). */
+                       if (process_query_sign_response(pkt, qdata) != KNOT_EOK) {
+                               next_state = KNOT_STATE_FAIL;
+                               goto finish;
+                       }
                }
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_POST_QUERY);
        }
 
 finish:
-       switch (next_state) {
-       case KNOT_STATE_NOOP:
-               break;
-       case KNOT_STATE_FAIL:
-               /* Error processing. */
-               next_state = process_query_err(ctx, pkt);
-               break;
-       case KNOT_STATE_FINAL:
-               /* Just skipped postprocessing. */
-               next_state = KNOT_STATE_DONE;
-               break;
-       default:
-               set_rcode_to_packet(pkt, qdata);
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_HANDLE_ERROR) {
+               switch (next_state) {
+               case KNOT_STATE_NOOP:
+                       break;
+               case KNOT_STATE_FAIL:
+                       /* Error processing. */
+                       next_state = process_query_err(ctx, pkt);
+                       break;
+               case KNOT_STATE_FINAL:
+                       /* Just skipped postprocessing. */
+                       next_state = KNOT_STATE_DONE;
+                       break;
+               default:
+                       set_rcode_to_packet(pkt, qdata);
+               }
+
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_HANDLE_ERROR);
        }
 
        /* After query processing code. */
-       PROCESS_END(plan, step, next_state, qdata);
-       PROCESS_END(zone_plan, step, next_state, qdata);
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_END) {
+               PROCESS_END(plan, *step_to_use, next_state, qdata);
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_PLAN_END);
+       }
+
+       STATE_MACHINE_RUN_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END) {
+               PROCESS_END(zone_plan, *step_to_use, next_state, qdata);
+               STATE_MACHINE_COMPLETED_STATE(state, next_state, KNOT_STATE_ASYNC, process_query_state, PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END);
+       }
 
        rcu_read_unlock();
 
+       if (knot_layer_active_state(next_state)) {
+               /*
+                * Exiting the state machine with an active state will result in more produce calls which need to resume from beginning.
+                * Reset the state machine so it will execute all steps.
+                */
+               if (state) {
+                       init_state_machine(state);
+               }
+       }
+
        return next_state;
 }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+static int process_query_set_async_state(knot_layer_t *ctx, knot_pkt_t *pkt, int layer_state)
+{
+       assert(pkt && ctx);
+       knotd_qdata_t *qdata = QUERY_DATA(ctx);
+       if (qdata != NULL) {
+               state_machine_t *state = qdata->state;
+               if (state != NULL) {
+                       state->process_query_next_state = layer_state;
+                       if (layer_state == KNOT_STATE_FAIL) {
+                               state->process_query_next_state_in = KNOTD_IN_STATE_ERROR;
+                       }
+               }
+       }
+
+       return layer_state;
+}
+#endif
+
 bool process_query_acl_check(conf_t *conf, acl_action_t action,
                              knotd_qdata_t *qdata)
 {
@@ -944,6 +1138,9 @@ const knot_layer_api_t *process_query_layer(void)
                .finish  = &process_query_finish,
                .consume = &process_query_in,
                .produce = &process_query_out,
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               .set_async_state  = &process_query_set_async_state,
+#endif
        };
        return &api;
 }
index 00e1790c03361a51191a398086b3dc6ce1df0aa8..c4bdfbc7ca8f8236d10675753d99ebdd1bdb1154 100644 (file)
@@ -24,6 +24,9 @@
 /* Query processing module implementation. */
 const knot_layer_api_t *process_query_layer(void);
 
+/*Note: temp length for data tag option*/
+#define MAX_DATA_TAG_LEN 25
+
 /*! \brief Query processing intermediate data. */
 typedef struct knotd_qdata_extra {
        const zone_t *zone;  /*!< Zone from which is answered. */
@@ -42,6 +45,16 @@ typedef struct knotd_qdata_extra {
        /* Extensions. */
        void *ext;
        void (*ext_cleanup)(knotd_qdata_t *); /*!< Extensions cleanup callback. */
+
+       union
+       {
+               struct {
+                       const knot_dname_t *zone_name;
+                       bool is_suffix_match;
+               } zone_lookup_params;
+       };
+
+       int ext_result; /*!< Additional error code from module. Modules MUST return this value for : KNOTD_STAGE_NAME_LOOKUP */
 } knotd_qdata_extra_t;
 
 /*! \brief Visited wildcard node list. */
index 0effefe524004a462340c2aabdcd742a9fe47e9d..e7a59f78a3c7d926c02c1482b3b4d17b006a8135 100644 (file)
@@ -107,7 +107,7 @@ int query_plan_step(struct query_plan *plan, knotd_stage_t stage,
 _public_
 int knotd_mod_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_hook_f hook)
 {
-       if (stage != KNOTD_STAGE_BEGIN && stage != KNOTD_STAGE_END) {
+       if (stage != KNOTD_STAGE_BEGIN && stage != KNOTD_STAGE_END && stage != KNOTD_STAGE_ZONE_LOOKUP) {
                return KNOT_EINVAL;
        }
 
@@ -117,7 +117,7 @@ int knotd_mod_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_hook_f hook)
 _public_
 int knotd_mod_in_hook(knotd_mod_t *mod, knotd_stage_t stage, knotd_mod_in_hook_f hook)
 {
-       if (stage == KNOTD_STAGE_BEGIN || stage == KNOTD_STAGE_END) {
+       if (stage == KNOTD_STAGE_BEGIN || stage == KNOTD_STAGE_END || stage == KNOTD_STAGE_ZONE_LOOKUP) {
                return KNOT_EINVAL;
        }
 
@@ -618,47 +618,31 @@ void knotd_conf_free(knotd_conf_t *conf)
 }
 
 _public_
-const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata,
-                                                      struct sockaddr_storage *buff)
+const struct sockaddr_storage *knotd_qdata_local_addr(knotd_qdata_t *qdata)
 {
-       if (qdata == NULL) {
+       if (qdata == NULL || qdata->params == NULL) {
                return NULL;
        }
 
-       if (qdata->params->xdp_msg != NULL) {
-#ifdef ENABLE_XDP
-               return (struct sockaddr_storage *)&qdata->params->xdp_msg->ip_to;
-#else
-               assert(0);
-               return NULL;
-#endif
-       } else {
-               socklen_t buff_len = sizeof(*buff);
-               if (getsockname(qdata->params->socket, (struct sockaddr *)buff,
-                               &buff_len) != 0) {
-                       return NULL;
-               }
-               return buff;
+       if (qdata->params->local != NULL && qdata->params->local->ss_family != AF_UNSPEC) {
+               return qdata->params->local;
        }
+
+       return NULL;
 }
 
 _public_
 const struct sockaddr_storage *knotd_qdata_remote_addr(knotd_qdata_t *qdata)
 {
-       if (qdata == NULL) {
+       if (qdata == NULL || qdata->params == NULL) {
                return NULL;
        }
 
-       if (qdata->params->xdp_msg != NULL) {
-#ifdef ENABLE_XDP
-               return (struct sockaddr_storage *)&qdata->params->xdp_msg->ip_from;
-#else
-               assert(0);
-               return NULL;
-#endif
-       } else {
+       if (qdata->params->remote != NULL && qdata->params->remote->ss_family != AF_UNSPEC) {
                return qdata->params->remote;
        }
+
+       return NULL;
 }
 
 _public_
index 5cc905b909425fd90c670d9eef9b15a4bbefd45e..c1fec8aa88969cb6df55582539ea66144b99be6b 100644 (file)
 
 #ifdef HAVE_ATOMIC
  #define ATOMIC_GET(src) __atomic_load_n(&(src), __ATOMIC_RELAXED)
+ #define ATOMIC_ADD(dst, val) __atomic_add_fetch(&(dst), (val), __ATOMIC_RELAXED)
 #else
- #define ATOMIC_GET(src) (src)
+#define ATOMIC_GET(src) (src)
+#define ATOMIC_ADD(dst, val) ((dst) += (src))
 #endif
 
 #define KNOTD_STAGES (KNOTD_STAGE_END + 1)
diff --git a/src/knot/nameserver/query_state.h b/src/knot/nameserver/query_state.h
new file mode 100644 (file)
index 0000000..8b8b972
--- /dev/null
@@ -0,0 +1,83 @@
+#pragma once
+
+typedef enum {
+       PROCESS_QUERY_STATE_BEGIN,
+       PROCESS_QUERY_STATE_DONE_PREPARE_ANSWER,
+       PROCESS_QUERY_STATE_DONE_PLAN_BEGIN,
+       PROCESS_QUERY_STATE_DONE_ZONE_PLAN_BEGIN,
+       PROCESS_QUERY_STATE_DONE_QUERY,
+       PROCESS_QUERY_STATE_DONE_POST_QUERY,
+       PROCESS_QUERY_STATE_DONE_HANDLE_ERROR,
+       PROCESS_QUERY_STATE_DONE_PLAN_END,
+       PROCESS_QUERY_STATE_DONE_ZONE_PLAN_END,
+} process_query_state_t;
+
+typedef enum {
+       INTERNET_PROCESS_QUERY_STATE_BEGIN,
+       INTERNET_PROCESS_QUERY_STATE_DONE_PREPROCESS,
+} internet_process_query_state_t;
+
+typedef enum {
+       ANSWER_QUERY_STATE_BEGIN,
+       ANSWER_QUERY_STATE_DONE_PREANSWER,
+       ANSWER_QUERY_STATE_DONE_ANSWER_BEGIN,
+       ANSWER_QUERY_STATE_DONE_SOLVE_ANSWER,
+       ANSWER_QUERY_STATE_DONE_SOLVE_ANSWER_DNSSEC,
+       ANSWER_QUERY_STATE_DONE_STAGE_ANSWER,
+       ANSWER_QUERY_STATE_DONE_AUTH_BEGIN,
+       ANSWER_QUERY_STATE_DONE_SOLVE_AUTH,
+       ANSWER_QUERY_STATE_DONE_SOLVE_AUTH_DNSSEC,
+       ANSWER_QUERY_STATE_DONE_STAGE_AUTH,
+       ANSWER_QUERY_STATE_DONE_ADDITIONAL_BEGIN,
+       ANSWER_QUERY_STATE_DONE_SOLVE_ADDITIONAL,
+       ANSWER_QUERY_STATE_DONE_SOLVE_AADDITIONAL_DNSSEC,
+       ANSWER_QUERY_STATE_DONE_STAGE_AADDITIONAL,
+       ANSWER_QUERY_STATE_DONE_SET_ERROR,
+} answer_query_state_t;
+
+typedef enum {
+       SOLVE_ANSWER_STATE_BEGIN,
+       SOLVE_ANSWER_HANDLE_INCOMING_STATE,
+       SOLVE_ANSWER_SOLVE_NAME_FIRST,
+       SOLVE_ANSWER_SOLVE_NAME_FIRST_DONE,
+       SOLVE_ANSWER_SOLVE_NAME_FOLLOW,
+} solve_answer_query_state_t;
+
+typedef enum {
+       SOLVE_NAME_STATE_BEGIN,
+       SOLVE_NAME_HANDLE_INCOMING_STATE,
+       SOLVE_NAME_STAGE_LOOKUP,
+       SOLVE_NAME_STAGE_LOOKUP_DONE,
+} solve_name_query_state_t;
+
+/*! \brief State machine state data.
+       Preserves execution state of the function like
+               1. Position within function
+               2. Local variables whose values need to be preserved between resume.
+       TBD: Unions can be used to save space for functions that cant execute in parallel. */
+typedef struct {
+       struct query_step *step;
+       process_query_state_t process_query_state;
+       internet_process_query_state_t internet_process_query_state;
+       answer_query_state_t answer_query_state;
+       solve_answer_query_state_t solve_answer_state;
+       solve_name_query_state_t solve_name_state;
+       int process_query_next_state;
+       int process_query_next_state_in;
+       int solve_answer_old_state;
+       int solve_name_incoming_state;
+       bool solve_answer_loop_in_async;
+} state_machine_t;
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#define STATE_MACHINE_RUN_STATE(state, next_state, async_state, sub_state, curr_state) \
+       if (((state) == NULL) || ((next_state) != async_state && (state)->sub_state < (curr_state)))
+
+#define STATE_MACHINE_COMPLETED_STATE(state, next_state, async_state, sub_state, curr_state) \
+       if (((state) != NULL) && (next_state) != async_state) { (state)->sub_state = (curr_state); }
+#else
+#define STATE_MACHINE_RUN_STATE(state, next_state, async_state, sub_state, curr_state)
+
+#define STATE_MACHINE_COMPLETED_STATE(state, next_state, async_state, sub_state, curr_state)
+#endif
+
index 252d043ae807c05fcfd13e96d31d217db6d18317..21bc570bef0e7284450ef47d68a818cb3db0fb8f 100644 (file)
@@ -34,20 +34,34 @@ typedef enum {
        KNOT_STATE_DONE,       //!< Finished.
        KNOT_STATE_FAIL,       //!< Error.
        KNOT_STATE_FINAL,      //!< Finished and finalized.
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       KNOT_LAYER_STATE_ASYNC  = 100,    //!< The request needs to be async handled. Value should match KNOT_STATE_ASYNC.
+#endif
 } knot_layer_state_t;
 
+#define knot_layer_active_state(state) ((state) == KNOT_STATE_PRODUCE || (state) == KNOT_STATE_FAIL)
+#define knot_layer_send_state(state)   ((state) != KNOT_STATE_FAIL && (state) != KNOT_STATE_NOOP)
+
 typedef struct knot_layer_api knot_layer_api_t;
 
 /*! \brief Packet processing context. */
 typedef struct {
        const knot_layer_api_t *api;  //!< Layer API.
-       knot_mm_t *mm;                //!< Processing memory context.
+       knot_mm_t *mm;                //!< Processing memory context. This memory is setup from the req to the layer when request is processed.
        knot_layer_state_t state;     //!< Processing state.
        void *data;                   //!< Module specific.
        tsig_ctx_t *tsig;             //!< TODO: remove
        unsigned flags;               //!< Custom flags.
 } knot_layer_t;
 
+#define knot_layer_clear_req_data(layer) { \
+       (layer).mm = NULL;                     \
+       (layer).state = KNOT_STATE_NOOP;       \
+       (layer).data = NULL;                   \
+       (layer).tsig = NULL;                   \
+       (layer).flags = 0;                     \
+}
+
 /*! \brief Packet processing module API. */
 struct knot_layer_api {
        int (*begin)(knot_layer_t *ctx, void *params);
@@ -55,6 +69,9 @@ struct knot_layer_api {
        int (*finish)(knot_layer_t *ctx);
        int (*consume)(knot_layer_t *ctx, knot_pkt_t *pkt);
        int (*produce)(knot_layer_t *ctx, knot_pkt_t *pkt);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       int (*set_async_state)(knot_layer_t *ctx, knot_pkt_t *pkt, int layer_state);
+#endif
 };
 
 /*! \brief Helper for conditional layer call. */
@@ -133,3 +150,17 @@ inline static void knot_layer_produce(knot_layer_t *ctx, knot_pkt_t *pkt)
 {
        LAYER_CALL(ctx, produce, pkt);
 }
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Set the state from layer.
+ *
+ * \param ctx Layer context.
+ * \param pkt Data packet.
+ * \param state State to be set.
+ */
+inline static void knot_layer_set_async_state(knot_layer_t *ctx, knot_pkt_t *pkt, int state)
+{
+       LAYER_CALL(ctx, set_async_state, pkt, ctx->state);
+}
+#endif
diff --git a/src/knot/server/dns-handler.c b/src/knot/server/dns-handler.c
new file mode 100644 (file)
index 0000000..135dd95
--- /dev/null
@@ -0,0 +1,291 @@
+/*  Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#define __APPLE_USE_RFC_3542
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/param.h>
+#ifdef HAVE_SYS_UIO_H  // struct iovec (OpenBSD)
+#include <sys/uio.h>
+#endif /* HAVE_SYS_UIO_H */
+#include <unistd.h>
+#include <urcu.h>
+#include "knot/server/dns-handler.h"
+
+#define DISPATCH_QUEUE_SIZE (8 * 1024)
+
+/*!
+ * \brief Process dns request for first time or resume processing if it was suspended due to async handling.
+ *
+ * \param dns_handler DNS Handler to process the request.
+ * \param dns_req DNS request to process for first time or after it is resumed from async delay state.
+ *
+ * \retval KNOT_EOK if succeeded.
+ */
+static int handle_dns_request_continue(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req) {
+       assert(dns_handler->layer.mm == dns_req->req_data.mm);
+       int ret = KNOT_EOK;
+       knot_pkt_t *ans = dns_req->handler_data.ans;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       if (dns_req->handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) {
+               /* force state to be in failed state and execute produce. This guarantees module cleanup are performed */
+               knot_layer_set_async_state(&dns_handler->layer, ans, KNOT_STATE_FAIL);
+       }
+#endif
+
+       /* Process answer. */
+       while (knot_layer_active_state(dns_handler->layer.state)) {
+               knot_layer_produce(&dns_handler->layer, ans);
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               if (dns_req->handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED) {
+                       ret = KNOT_EOF;
+                       break;
+               } else if (dns_handler->layer.state != KNOT_LAYER_STATE_ASYNC)
+#endif
+               {
+                       /* Send, if response generation passed and wasn't ignored. */
+                       if (dns_handler->send_result && ans->size > 0 && knot_layer_send_state(dns_handler->layer.state)) {
+                               int sent = dns_handler->send_result(dns_handler, dns_req, ans->size);
+                               if (sent != ans->size) {
+                                       ret = KNOT_EOF;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       if (dns_handler->layer.state != KNOT_LAYER_STATE_ASYNC) {
+#endif
+               /* Send response only if finished successfully. */
+               if (dns_handler->layer.state == KNOT_STATE_DONE) {
+                       dns_req->req_data.tx->iov_len = ans->size;
+               } else {
+                       dns_req->req_data.tx->iov_len = 0;
+               }
+
+               /* Reset after processing. */
+               knot_layer_finish(&dns_handler->layer);
+
+               /* Flush per-query memory (including query and answer packets). */
+               mp_flush(dns_handler->layer.mm->ctx);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       } else {
+               dns_req->handler_data.flag |= DNS_HANDLER_REQUEST_FLAG_IS_ASYNC;
+               knot_layer_backup_to_dns_handler_request(dns_handler->layer, *dns_req);
+       }
+#endif
+
+       return ret;
+}
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Resume the execution of request processing interrupted in produce call which is currently in async delayed state.
+ *
+ * \param dns_handler dns request handler context.
+ * \param dns_req Request to be Resumed.
+ *
+ * \retval KNOT_EOK if succeeded.
+ */
+static int handle_dns_request_resume(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req)
+{
+       assert(dns_handler_request_is_async(*dns_req));
+       knot_layer_backup_from_dns_handler_request(dns_handler->layer, *dns_req);
+       dns_req->handler_data.flag &= ~DNS_HANDLER_REQUEST_FLAG_IS_ASYNC;
+       dns_handler->layer.state = KNOT_STATE_PRODUCE; /* State is ignored by the produce itself. But helps handle_dns_request_continue to start the produce */
+
+       int ret = handle_dns_request_continue(dns_handler, dns_req);
+       if (!dns_handler_request_is_async(*dns_req)) {
+               // The requeste is completed. Notify network layer, this is done.
+               dns_handler->async_complete(dns_handler, dns_req);
+       }
+       return ret;
+}
+
+/*!
+ * \brief Callback from knot_layer_t indicating the async request is complete.
+ * NOTE: This WILL BE called on the thread different from that owns the dns_request_handler_context_t.
+ * Any step that creates race condition with dns_request_handler_context_t thread has to be mutexed.
+ *
+ * \param params params for the knot_layer_t which completed async operation.
+ */
+static int dns_handler_notify_async_completed(knotd_qdata_params_t *params)
+{
+       dns_handler_request_t *dns_req = params->dns_req;
+       uint64_t value = 1;
+
+       // capture the handle. As soon as req is put in queue, ownership of the req moves to queue and req can be dispatched and cleaned up.
+       int async_notify_handle = dns_req->handler_data.dns_handler_ctx->async_notify_handle;
+
+       bool first = false;
+       int rc = knotd_lockless_queue_enqueue(dns_req->handler_data.dns_handler_ctx->async_completed_reqs, dns_req, &first);
+       assert(rc == 0);
+
+       if (first && write(async_notify_handle, &value, sizeof(value)) == -1) {
+               /* Request is queued, we just did not wake up async handler, next might be able to */
+               return KNOT_ESYSTEM;
+       }
+       else {
+               return KNOT_EOK;
+       }
+}
+
+/*!
+ * \brief Continune processing async completed requests.
+ * Network layer is expected to call this function, when dns_request_handler_context_get_async_notify_handle is signaled.
+ *
+ * \param dns_handler dns request handler context.
+ */
+void handle_dns_request_async_completed_queries(dns_request_handler_context_t *dns_handler_ctx)
+{
+    /* cleanup read ctx */
+    uint8_t buff[8];
+    /* consume the data from the async notification handle */
+    _unused_ int unused = read(dns_handler_ctx->async_notify_handle, buff, sizeof(buff));
+
+       dns_handler_request_t *dns_req;
+       while ((dns_req = knotd_lockless_queue_dequeue(dns_handler_ctx->async_completed_reqs))) {
+        handle_dns_request_resume(dns_handler_ctx, dns_req);
+       }
+}
+#endif
+
+/*!
+ * \brief Initialize dns request handler.
+ *
+ * \param dns_handler DNS handler to be initialized.
+ * \param server Server to be used in this DNS request handler.
+ * \param thread_id ID of the thread that will invoke this dns_handler.
+ * \param flags DNS request flags to be used in this handler.
+ * \param send_result Optional. Callback method to send results to network layer. If none provided, only final result will be available in tx of request.
+ * \param async_complete Notification when async query is completed.
+ *
+ * \retval KNOT_EOK if success.
+ */
+int initialize_dns_handle(
+       dns_request_handler_context_t *dns_handler,
+       server_t *server,
+       int thread_id,
+       uint8_t flags,
+       send_produced_result send_result
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       ,async_query_completed_callback async_complete
+#endif
+) {
+       assert(flags & KNOTD_QUERY_FLAG_LIMIT_SIZE || send_result);
+       dns_handler->server = server;
+       dns_handler->thread_id = thread_id;
+       dns_handler->flags = flags;
+       dns_handler->send_result = send_result;
+       knot_layer_init(&dns_handler->layer, NULL, process_query_layer());
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       dns_handler->async_complete = async_complete;
+       dns_handler->async_notify_handle = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+       if (dns_handler->async_notify_handle == -1) {
+               return KNOT_ESYSTEM;
+       }
+
+       int ret;
+    if ((ret = knotd_lockless_queue_create(&dns_handler->async_completed_reqs, DISPATCH_QUEUE_SIZE))) {
+               return ret;
+       }
+#endif
+
+       return 0;
+}
+
+/*!
+ * \brief Cleanup dns request handler.
+ *
+ * \param dns_handler DNS handler to be cleaned up.
+ */
+void cleanup_dns_handle(dns_request_handler_context_t *dns_handler) {
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+    if (dns_handler->async_notify_handle != -1) {
+        close(dns_handler->async_notify_handle);
+        dns_handler->async_notify_handle = -1;
+    }
+
+       knotd_lockless_queue_delete(dns_handler->async_completed_reqs);
+#endif
+}
+
+/*!
+ * \brief handles dns request.
+ *
+ * \param dns_handler DNS handler to be used.
+ * \param dns_req DNS request to be processed.
+ *
+ * \retval KNOT_EOK if success.
+ */
+int handle_dns_request(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req)
+{
+       dns_handler_request_clear_handler_data(*dns_req);
+       knot_layer_clear_req_data(dns_handler->layer);
+
+       // Use the memory from req for this query processing
+       dns_handler->layer.mm = dns_req->req_data.mm;
+       dns_req->handler_data.dns_handler_ctx = dns_handler;
+
+       // allocate params from request memory
+       knotd_qdata_params_t *params = mm_alloc(dns_handler->layer.mm, sizeof(*params));
+       if (!params) {
+               // failed to allocated, just fail the call
+               dns_req->req_data.tx->iov_len = 0;
+               return KNOT_ESPACE;
+       }
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       dns_req->handler_data.flag &= ~(DNS_HANDLER_REQUEST_FLAG_IS_ASYNC | DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED);
+       params->async_completed_callback = dns_handler_notify_async_completed;
+#endif
+
+       /* Create query processing parameter. */
+       params->remote = &dns_req->req_data.source_addr;
+       params->local  = &dns_req->req_data.target_addr;
+       params->flags = dns_handler->flags;
+       params->socket = dns_req->req_data.fd;
+       params->server = dns_handler->server;
+       params->xdp_msg = dns_req->req_data.xdp_msg;
+       params->thread_id = dns_handler->thread_id;
+       params->dns_req = dns_req;
+       dns_req->handler_data.params = params;
+
+       /* Start query processing. */
+       knot_layer_begin(&dns_handler->layer, params);
+
+       /* Create packets. */
+       knot_pkt_t *query = knot_pkt_new(dns_req->req_data.rx->iov_base, dns_req->req_data.rx->iov_len, dns_handler->layer.mm);
+       dns_req->handler_data.ans = knot_pkt_new(dns_req->req_data.tx->iov_base, dns_req->req_data.tx->iov_len, dns_handler->layer.mm);
+
+       /* Input packet. */
+       int ret = knot_pkt_parse(query, 0);
+       if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT)
+               query->parsed--; // artificially decreasing "parsed" leads to FORMERR
+       }
+       knot_layer_consume(&dns_handler->layer, query);
+
+       return handle_dns_request_continue(dns_handler, dns_req);
+}
diff --git a/src/knot/server/dns-handler.h b/src/knot/server/dns-handler.h
new file mode 100644 (file)
index 0000000..8cb7f08
--- /dev/null
@@ -0,0 +1,223 @@
+/*  Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include "contrib/macros.h"
+#include "contrib/mempattern.h"
+#include "contrib/sockaddr.h"
+#include "contrib/ucw/mempool.h"
+#include "knot/common/fdset.h"
+#include "knot/nameserver/process_query.h"
+#include "knot/query/layer.h"
+#include "knot/server/server.h"
+#include "libknot/xdp/xdp.h"
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#include "knot/include/lqueue.h"
+#include <sys/eventfd.h>
+#endif
+
+/* Buffer identifiers. */
+enum {
+       RX = 0,
+       TX = 1,
+       NBUFS = 2
+};
+
+typedef struct dns_request_handler_context dns_request_handler_context_t;
+typedef struct dns_handler_request dns_handler_request_t;
+typedef int (*send_produced_result)(dns_request_handler_context_t *net, dns_handler_request_t *req, size_t size);
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+typedef void (*async_query_completed_callback)(dns_request_handler_context_t *net, dns_handler_request_t *req);
+
+/*! \brief DNS request handler flags. */
+typedef enum dns_handler_request_flag {
+       DNS_HANDLER_REQUEST_FLAG_IS_ASYNC = (1 << 0),     /*!< Is the request is currently handled asynchronously. */
+       DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED = (1 << 1),  /*!< Is the request cancelled. */
+} dns_handler_request_flag_t;
+#endif
+
+/*! \brief DNS request handler context data. */
+struct dns_request_handler_context {
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       knotd_lockless_queue_t *async_completed_reqs;   /*!< Requests which were asynchrnously completed by modules, but processing has not resumed for these requests. */
+       async_query_completed_callback async_complete;  /*!< Callback to network layer to indicate that the query in async state is completed. */
+       int async_notify_handle;                                                /*!< Handle used by dns request handling base layer to notify that there are requests pending async handling. */
+#endif
+       knot_layer_t layer;                                     /*!< Query processing layer. */
+       server_t *server;                                       /*!< Name server structure. */
+       send_produced_result send_result;       /*!< Sends the results produced.
+                                                                               If this is null, the sender handles the response after completion of dns request handling
+                                                                               and sends only single result. */
+       unsigned thread_id;                                     /*!< Thread identifier. */
+       uint8_t flags;                                          /*!< Flags for dns request handler for how to handle request. */
+};
+
+/*! \brief Network request data from network layer. */
+typedef struct dns_handler_network_layer_request {
+       struct sockaddr_storage source_addr;    /*!< Source address. */
+       struct sockaddr_storage target_addr;    /*!< Target address. */
+       struct iovec *rx;                                               /*!< Received iovec. */
+       struct iovec *tx;                                               /*!< Send iovec. */
+       struct knot_xdp_msg *xdp_msg;                   /*!< XDP message. */
+       knot_mm_t *mm;                                                  /*!< Processing memory context. */
+       int fd;                                                                 /*!< handle for the network request. */
+} dns_handler_network_layer_request_t;
+
+/*! \brief Network handler data to process the request. */
+typedef struct dns_handler_request_data {
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       dns_handler_request_flag_t flag;                                        /*!< Flags for the req. */
+#endif
+       knot_pkt_t *ans;                                                                        /*!< Answer for the req. */
+       dns_request_handler_context_t *dns_handler_ctx;         /*!< dns request handler context for the req. */
+       knotd_qdata_params_t *params;                                           /*!< params for this req. */
+       struct {
+               knot_layer_state_t state;                       //!< Processing state.
+               void *data;                                                     //!< Module specific.
+               tsig_ctx_t *tsig;                                       //!< TODO: remove
+               unsigned flags;                                         //!< Custom flags.
+       } layer_data_backup_on_async_stop;              //!< Layer data backup when req was offline. Valid only when req is offline.
+} dns_handler_request_data_t;
+
+/*! \brief Dns handler request data. */
+struct dns_handler_request {
+       dns_handler_network_layer_request_t req_data;   /*!< Data from network layer. Only data here can be exchanged between network layer and dns handler. */
+       dns_handler_request_data_t handler_data;                /*!< Data from dns request handler. This data should be treated private for dns handler and network handler should not use it. */
+};
+
+/*!
+ * \brief Initialize dns request handler.
+ *
+ * \param dns_handler DNS handler to be initialized.
+ * \param server Server to be used in this DNS request handler.
+ * \param thread_id ID of the thread that will invoke this dns_handler.
+ * \param flags DNS request flags to be used in this handler.
+ * \param send_result Optional. Callback method to send results to network layer. If none provided, only final result will be available in tx of request.
+ * \param async_complete Notification when async query is completed.
+ *
+ * \retval KNOT_EOK if success.
+ */
+int initialize_dns_handle(
+       dns_request_handler_context_t *dns_handler,
+       server_t *server,
+       int thread_id,
+       uint8_t flags,
+       send_produced_result send_result
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       ,async_query_completed_callback async_complete
+#endif
+);
+
+/*!
+ * \brief Cleanup dns request handler.
+ *
+ * \param dns_handler DNS handler to be cleaned up.
+ */
+void cleanup_dns_handle(dns_request_handler_context_t *dns_handler);
+
+/*!
+ * \brief handles dns request.
+ *
+ * \param dns_handler DNS handler to be used.
+ * \param dns_req DNS request to be processed.
+ *
+ * \retval KNOT_EOK if success.
+ */
+int handle_dns_request(dns_request_handler_context_t *dns_handler, dns_handler_request_t *dns_req);
+
+/*!
+ * \brief Clear the request with any previously processed query state information.
+ *
+ * \param dns_req DNS request that needs to be reset.
+ */
+#define dns_handler_request_clear_handler_data(dns_req) memset(&((dns_req).handler_data), 0, sizeof(dns_handler_request_data_t))
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Get the async handle that needs to be used for monitoring pending async completed requests.
+ *
+ * \param dns_handler dns request handler context.
+ *
+ * \retval Poll handle for monitoring existence of queries in async queue ready to execute.
+ */
+#define dns_request_handler_context_get_async_notify_handle(dns_handler) ((dns_handler)->async_notify_handle)
+
+/*!
+ * \brief Gets if a DNS request is in async state.
+ *
+ * \param dns_req DNS request whose async state needs to be checked.
+ *
+ * \retval true if the request is in async state.
+ */
+#define dns_handler_request_is_async(dns_req) ((dns_req).handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_ASYNC)
+
+/*!
+ * \brief Gets if a DNS request is in cancelled state.
+ *
+ * \param dns_req DNS request whose async state needs to be checked.
+ *
+ * \retval true if the request is in cancelled state.
+ */
+#define dns_handler_request_is_cancelled(dns_req) ((dns_req).handler_data.flag & DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED)
+
+/*!
+ * \brief Cancels the request from being processed.
+ *
+ * \param dns_req DNS request whose async state needs to be changed.
+ */
+#define dns_handler_cancel_request(dns_req) ((dns_req).handler_data.flag |= DNS_HANDLER_REQUEST_FLAG_IS_CANCELLED)
+
+/*!
+ * \brief Handle DNS async completed queries in this dns handler.
+ *
+ * \param dns_handler dns request handler context.
+ */
+void handle_dns_request_async_completed_queries(dns_request_handler_context_t *dns_handler);
+
+#endif
+
+/*!
+ * \brief Backup the layer state into request itself,
+ * \brief to allow layer to process other requests while current request is delayed by async processing.
+ *
+ * \param layer Layer whose states need to be preserved.
+ * \param dns_req DNS request that needs to be used to preserve layer state.
+ */
+#define knot_layer_backup_to_dns_handler_request(layer, dns_req) {                                             \
+       assert((layer).mm == (dns_req).req_data.mm);                                                                            \
+       (dns_req).handler_data.layer_data_backup_on_async_stop.state = (layer).state;           \
+       (dns_req).handler_data.layer_data_backup_on_async_stop.data = (layer).data;                     \
+       (dns_req).handler_data.layer_data_backup_on_async_stop.tsig = (layer).tsig;                     \
+       (dns_req).handler_data.layer_data_backup_on_async_stop.flags = (layer).flags;           \
+}
+
+/*!
+ * \brief Restore the layer state from request,
+ * \brief to resume processing request that was previously delayed due to async
+ *
+ * \param layer Layer whose states need to be restored.
+ * \param dns_req DNS request that needs to be used to restore layer state.
+ */
+#define knot_layer_backup_from_dns_handler_request(layer, dns_req) {                                   \
+       (layer).mm = (dns_req).req_data.mm;                                                                                                     \
+       (layer).state = (dns_req).handler_data.layer_data_backup_on_async_stop.state;           \
+       (layer).data = (dns_req).handler_data.layer_data_backup_on_async_stop.data;                     \
+       (layer).tsig = (dns_req).handler_data.layer_data_backup_on_async_stop.tsig;                     \
+       (layer).flags = (dns_req).handler_data.layer_data_backup_on_async_stop.flags;           \
+}
+
+
diff --git a/src/knot/server/network_req_manager.c b/src/knot/server/network_req_manager.c
new file mode 100644 (file)
index 0000000..d357ac5
--- /dev/null
@@ -0,0 +1,539 @@
+#include "knot/server/network_req_manager.h"
+#include "contrib/memcheck.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
+/*! \brief Basic network request manager data. */
+typedef struct network_dns_request_manager_basic {
+       network_dns_request_manager_t base;         //!< Base network request manager function pointers.
+
+       size_t buffer_size;                         //!< Buffer size used in rx/tx while allocating the DNS request.
+       knot_mm_t query_processing_mm;              //!< Query processing mm for the requests allocated. There is only one per network manager and hence can't support async requests.
+} network_dns_request_manager_basic_t;
+
+/*!
+ * \brief Free network request allocated using free.
+ *
+ * \param mgr Network request manager used for allocating req.
+ * \param req Request to be freed.
+ */
+static void network_dns_request_manager_basic_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       if (req) {
+               for (unsigned i = 0; i < NBUFS; ++i) {
+                       free(req->iov[i].iov_base);
+               }
+               free(req);
+       }
+}
+
+/*!
+ * \brief Allocate memory for data associated with request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param size size of memory to allocate.
+ *
+ * \retval Memory allocated. NULL if failed.
+ */
+static void* network_dns_request_manager_basic_allocate_mem(network_dns_request_manager_t *mgr, size_t size) {
+       return malloc(size);
+}
+
+/*!
+ * \brief Free memory for data associated with request previously allocated using allocate_mem_func.
+ *
+ * \param mgr Network request manager to be used.
+ * \param mem Memory previously allocated using allocate_mem_func.
+ */
+void network_dns_request_manager_basic_free_mem(struct network_dns_request_manager *mgr, void *mem) {
+       return free(mem);
+}
+
+/*!
+ * \brief Allocate request.
+ *
+ * \param mgr Network request manager to be used.
+ *
+ * \retval Request allocaed, NULL if failed.
+ */
+static network_dns_request_t* network_dns_request_manager_basic_allocate_req(network_dns_request_manager_t *mgr) {
+       network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base);
+       network_dns_request_t *req = NULL;
+       req = calloc(1, sizeof(network_dns_request_t));
+       if (req == NULL) {
+               return NULL;
+       }
+
+       for (unsigned i = 0; i < NBUFS; ++i) {
+               req->iov[i].iov_base = malloc(this->buffer_size);
+               if (req->iov[i].iov_base == NULL) {
+                       network_dns_request_manager_basic_free_req(&this->base, req);
+                       return NULL;
+               }
+               req->iov[i].iov_len = this->buffer_size;
+       }
+
+       req->dns_req.req_data.rx = &req->iov[RX];
+       req->dns_req.req_data.tx = &req->iov[TX];
+       req->dns_req.req_data.mm = &this->query_processing_mm;
+       req->dns_req.req_data.xdp_msg = NULL;
+
+       return req;
+}
+
+/*!
+ * \brief Reset request to handle new request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param req Request to be reset.
+ */
+static void network_dns_request_manager_basic_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base);
+       req->iov[RX].iov_len = this->buffer_size;
+       req->iov[TX].iov_len = this->buffer_size;
+
+       // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request.
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size);
+
+       dns_handler_request_clear_handler_data(req->dns_req);
+}
+
+/*!
+ * \brief Delete the request manager.
+ *
+ * \param mgr Network request manager to be deleted.
+ */
+static void network_dns_request_manager_basic_delete(network_dns_request_manager_t *mgr) {
+       network_dns_request_manager_basic_t *this = caa_container_of(mgr, network_dns_request_manager_basic_t, base);
+       mp_delete(this->query_processing_mm.ctx);
+       memset(this, 0, sizeof(*this));
+       free(this);
+}
+
+/*! \brief Knot_mm based network request manager data. */
+typedef struct network_dns_request_manager_knot_mm {
+       network_dns_request_manager_t base;         //!< Base network request manager function pointers.
+
+       size_t buffer_size;                         //!< Buffer size used in rx/tx while allocating the DNS request.
+       knot_mm_t req_allocation_mm;                //!< mm used for request allocation only. All memory for request are freed when request manager is destroyed.
+       knot_mm_t query_processing_mm;              //!< Query processing mm for the requests allocated. There is only one per network manager and hence can't support async requests.
+} network_dns_request_manager_knot_mm_t;
+
+/*!
+ * \brief Free network request allocated using knot_mm.
+ *
+ * \param mgr Network request manager used for allocating req.
+ * \param req Request to be freed.
+ */
+static void network_dns_request_manager_knot_mm_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       _unused_ network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base);
+
+       // individual request can't be freed, but mark as not accessible
+       VALGRIND_MAKE_MEM_NOACCESS(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_NOACCESS(req->iov[TX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_NOACCESS(req, sizeof(network_dns_request_t));
+}
+
+/*!
+ * \brief Allocate request.
+ *
+ * \param mgr Network request manager to be used.
+ *
+ * \retval Request allocaed, NULL if failed.
+ */
+static network_dns_request_t* network_dns_request_manager_knot_mm_allocate_req(network_dns_request_manager_t *mgr) {
+       network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base);
+       network_dns_request_t *req = NULL;
+       req = mm_calloc(&this->req_allocation_mm, 1, sizeof(network_dns_request_t));
+       if (req == NULL) {
+               return NULL;
+       }
+
+       for (unsigned i = 0; i < NBUFS; ++i) {
+               req->iov[i].iov_base = mm_alloc(&this->req_allocation_mm, this->buffer_size);
+               if (req->iov[i].iov_base == NULL) {
+                       network_dns_request_manager_knot_mm_free_req(&this->base, req);
+                       return NULL;
+               }
+               req->iov[i].iov_len = this->buffer_size;
+       }
+
+       req->dns_req.req_data.rx = &req->iov[RX];
+       req->dns_req.req_data.tx = &req->iov[TX];
+       req->dns_req.req_data.mm = &this->query_processing_mm;
+       req->dns_req.req_data.xdp_msg = NULL;
+
+       return req;
+}
+
+/*!
+ * \brief Allocate memory for data associated with request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param size size of memory to allocate.
+ *
+ * \retval Memory allocated. NULL if failed.
+ */
+static void* network_dns_request_manager_knot_mm_allocate_mem(network_dns_request_manager_t *mgr, size_t size) {
+       network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base);
+       return mm_alloc(&this->req_allocation_mm, size);
+}
+
+/*!
+ * \brief Free memory for data associated with request previously allocated using allocate_mem_func.
+ *
+ * \param mgr Network request manager to be used.
+ * \param mem Memory previously allocated using allocate_mem_func.
+ */
+void network_dns_request_manager_knot_mm_free_mem(struct network_dns_request_manager *mgr, void *mem) {
+       // Individual allocation cannot be freed when done from knot_mm.
+}
+
+/*!
+ * \brief Reset request to handle new request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param req Request to be reset.
+ */
+static void network_dns_request_manager_knot_mm_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base);
+       req->iov[RX].iov_len = this->buffer_size;
+       req->iov[TX].iov_len = this->buffer_size;
+
+       // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request.
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size);
+
+       dns_handler_request_clear_handler_data(req->dns_req);
+}
+
+/*!
+ * \brief Delete the request manager.
+ *
+ * \param mgr Network request manager to be deleted.
+ */
+static void network_dns_request_manager_knot_mm_delete(network_dns_request_manager_t *mgr) {
+       network_dns_request_manager_knot_mm_t *this = caa_container_of(mgr, network_dns_request_manager_knot_mm_t, base);
+       mp_delete(this->req_allocation_mm.ctx);
+       mp_delete(this->query_processing_mm.ctx);
+       memset(this, 0, sizeof(*this));
+       free(this);
+}
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#include "knot/include/lstack.h"
+
+/*! \brief Pooled network request manager data. */
+typedef struct network_dns_request_pool_manager {
+       network_dns_request_manager_t base;         //!< Base network request manager function pointers.
+
+       size_t buffer_size;                         //!< Buffer size used in rx/tx while allocating the DNS request.
+       size_t memory_size;                         //!< memory size to use for processing DNS request.
+       knot_mm_t req_allocation_mm;                //!< mm used for request allocation only. All memory for request are freed when request manager is destroyed.
+       knotd_lockless_stack_t free_pool;           //!< Stack of freed request pool.
+       atomic_shared_dns_request_manager_t *shared_req_mgr; //!< Shared resource manager.
+} network_dns_request_pool_manager_t;
+
+/*!
+ * \brief Free network request allocated by adding to free queue
+ *
+ * \param mgr Network request manager used for allocating req.
+ * \param req Request to be freed.
+ */
+static void network_dns_request_pool_manager_free_req(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base);
+       assert(req->free_list_node.next == NULL);
+
+       // Reset request for reuse and put it in the pool
+       mgr->restore_network_request_func(mgr, req);
+
+       // Make the request memory inaccessible
+       VALGRIND_MAKE_MEM_NOACCESS(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_NOACCESS(req->iov[TX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_NOACCESS(req, sizeof(network_dns_request_t));
+       // except free_list_node which is needed for maintaining free list.
+       VALGRIND_MAKE_MEM_UNDEFINED(&req->free_list_node, sizeof(req->free_list_node));
+
+       // Put the request in free pool
+       knotd_lockless_stack_push(&this->free_pool, &req->free_list_node);
+}
+
+/*!
+ * \brief Allocate request from pool.
+ *
+ * \param mgr Network request manager to be used.
+ *
+ * \retval Request allocaed, NULL if failed.
+ */
+static network_dns_request_t* network_dns_request_pool_manager_allocate_req(network_dns_request_manager_t *mgr) {
+       network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base);
+       knotd_lockless_stack_node_t *free_node = knotd_lockless_stack_pop(&this->free_pool);
+       if (free_node == NULL) {
+               return NULL;
+       }
+
+       network_dns_request_t* req = caa_container_of(free_node, network_dns_request_t, free_list_node);
+       // Make everything in req available to read. Buffers available but not initialized.
+       // Request was initialized as part of creation, but we intentionally made it inaccessible when it is in free list.
+       VALGRIND_MAKE_MEM_DEFINED(req, sizeof(network_dns_request_t));
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size);
+       return req;
+}
+
+/*!
+ * \brief Allocate request.
+ *
+ * \param this Network request manager to be used.
+ *
+ * \retval Request allocaed, NULL if failed.
+ */
+static network_dns_request_t* network_dns_request_pool_manager_real_allocate(network_dns_request_pool_manager_t *this) {
+       network_dns_request_t *req = NULL;
+       req = mm_calloc(&this->req_allocation_mm, 1, sizeof(network_dns_request_t));
+       if (req == NULL) {
+               return NULL;
+       }
+
+       for (unsigned i = 0; i < NBUFS; ++i) {
+               req->iov[i].iov_base = mm_alloc(&this->req_allocation_mm, this->buffer_size);
+               if (req->iov[i].iov_base == NULL) {
+                       network_dns_request_pool_manager_free_req(&this->base, req);
+                       return NULL;
+               }
+               req->iov[i].iov_len = this->buffer_size;
+       }
+
+       req->dns_req.req_data.rx = &req->iov[RX];
+       req->dns_req.req_data.tx = &req->iov[TX];
+       req->dns_req.req_data.mm = mm_calloc(&this->req_allocation_mm, 1, sizeof(knot_mm_t));
+       if (req->dns_req.req_data.mm == NULL) {
+               return NULL;
+       }
+       mm_ctx_mempool(req->dns_req.req_data.mm, this->memory_size);
+       req->dns_req.req_data.xdp_msg = NULL;
+
+       return req;
+}
+
+/*!
+ * \brief Allocate memory for data associated with request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param size size of memory to allocate.
+ *
+ * \retval Memory allocated. NULL if failed.
+ */
+static void* network_dns_request_pool_manager_allocate_mem(network_dns_request_manager_t *mgr, size_t size) {
+       // Don't allocate from knot_mm. It will lead to race condition as pool manager is shared by threads.
+       // Only DNS requests are preallocated to avoid race condition. Individual memory should be allocated using malloc,
+       return malloc(size);
+}
+
+/*!
+ * \brief Free memory for data associated with request previously allocated using allocate_mem_func.
+ *
+ * \param mgr Network request manager to be used.
+ * \param mem Memory previously allocated using allocate_mem_func.
+ */
+void network_dns_request_pool_manager_free_mem(struct network_dns_request_manager *mgr, void *mem) {
+       free(mem);
+}
+
+/*!
+ * \brief Reset request to handle new request.
+ *
+ * \param mgr Network request manager to be used.
+ * \param req Request to be reset.
+ */
+static void network_dns_request_pool_manager_reset_request(network_dns_request_manager_t *mgr, network_dns_request_t *req) {
+       network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base);
+       req->iov[RX].iov_len = this->buffer_size;
+       req->iov[TX].iov_len = this->buffer_size;
+
+       // Reusing buffer, make buffer not initialized to avoid previous request data considered valid for new request.
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[RX].iov_base, this->buffer_size);
+       VALGRIND_MAKE_MEM_UNDEFINED(req->iov[TX].iov_base, this->buffer_size);
+
+       dns_handler_request_clear_handler_data(req->dns_req);
+}
+
+/*!
+ * \brief Delete the request manager.
+ *
+ * \param mgr Network request manager to be deleted.
+ */
+static void network_dns_request_pool_manager_delete(network_dns_request_manager_t *mgr) {
+       network_dns_request_pool_manager_t *this = caa_container_of(mgr, network_dns_request_pool_manager_t, base);
+       struct shared_dns_request_manager expect, new_value = {0};
+       do {
+               KNOT_ATOMIC_GET(this->shared_req_mgr, expect);
+               new_value.ref_count = expect.ref_count - 1;
+               if (expect.ref_count == 1) {
+                       // last reference
+                       new_value.req_mgr = NULL;
+               } else {
+                       new_value.req_mgr = expect.req_mgr;
+               }
+       } while (!KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(this->shared_req_mgr, expect, new_value));
+
+       if ( new_value.ref_count == 0) {
+               // free the request manager
+               mp_delete(this->req_allocation_mm.ctx);
+               knotd_lockless_stack_cleanup(&this->free_pool);
+               memset(this, 0, sizeof(*this));
+               free(this);
+       }
+}
+#endif
+
+/*!
+ * \brief Creates the network manager which allocates the DNS requests using malloc/free.
+ * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async.
+ *
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t *network_dns_request_manager_basic_create(size_t buffer_size, size_t memory_size) {
+       network_dns_request_manager_basic_t *this = calloc(1, sizeof(*this));
+       if (this == NULL) {
+               return NULL;
+       }
+
+       this->base.allocate_network_request_func = network_dns_request_manager_basic_allocate_req;
+       this->base.allocate_mem_func = network_dns_request_manager_basic_allocate_mem;
+       this->base.restore_network_request_func = network_dns_request_manager_basic_reset_request;
+       this->base.free_network_request_func = network_dns_request_manager_basic_free_req;
+       this->base.free_mem_func = network_dns_request_manager_basic_free_mem;
+       this->base.delete_req_manager = network_dns_request_manager_basic_delete;
+
+       this->buffer_size = buffer_size;
+       mm_ctx_mempool(&this->query_processing_mm, memory_size);
+
+       return &this->base;
+}
+
+/*!
+ * \brief Creates the network manager which allocates the DNS requests using knot_mm_t.
+ * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async.
+ * \brief Since knot_mm_t does not support free, any request allocated using this manager will be freed when this manager is destroyed.
+ *
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t *network_dns_request_manager_knot_mm_create(size_t buffer_size, size_t memory_size) {
+       network_dns_request_manager_knot_mm_t *this = calloc(1, sizeof(*this));
+       if (this == NULL) {
+               return NULL;
+       }
+
+       this->base.allocate_network_request_func = network_dns_request_manager_knot_mm_allocate_req;
+       this->base.allocate_mem_func = network_dns_request_manager_knot_mm_allocate_mem;
+       this->base.restore_network_request_func = network_dns_request_manager_knot_mm_reset_request;
+       this->base.free_network_request_func = network_dns_request_manager_knot_mm_free_req;
+       this->base.free_mem_func = network_dns_request_manager_knot_mm_free_mem;
+       this->base.delete_req_manager = network_dns_request_manager_knot_mm_delete;
+
+       this->buffer_size = buffer_size;
+       mm_ctx_mempool(&this->req_allocation_mm, buffer_size);
+       mm_ctx_mempool(&this->query_processing_mm, memory_size);
+
+       return &this->base;
+}
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Creates the network manager which allocates the DNS requests and manages the pool.
+ * \brief Any freed request will be added to free pool, and hence memory is not released.
+ * \brief Deleting the request manager frees the memory.
+ *
+ * \param shared_req_mgr Shared request pool manager. Should be initialized with init_shared_req_mgr.
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ * \param pool_size Number of requests to maintain in the pool initially.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t* network_dns_request_pool_manager_create(atomic_shared_dns_request_manager_t *shared_req_mgr, size_t buffer_size, size_t memory_size, size_t pool_size) {
+       struct shared_dns_request_manager expect, new_value = {0};
+       void *is_being_initialized_value = (void*)shared_req_mgr;
+
+       // atomically initialize the structure. On input created_pool points to NULL.
+       // If shared_req_mgr points to NULL, make it point to itself and proceed to creation.
+       // If shared_req_mgr points to itself, wait until it completes.
+       do {
+               KNOT_ATOMIC_GET(shared_req_mgr, expect);
+               if (expect.req_mgr == NULL) {
+                       // not initialized. Try to take a lock.
+                       new_value.req_mgr = is_being_initialized_value;
+                       new_value.ref_count = 0;
+                       if (KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(shared_req_mgr, expect, new_value)) {
+                               // we got the lock to create it.
+                               break;
+                       }
+               }
+               else if (expect.req_mgr == is_being_initialized_value) {
+                       // still being initialized
+                       struct timespec ten_ms = { 0, 10000000};
+                       nanosleep(&ten_ms, &ten_ms);
+               }
+               else {
+                       new_value.req_mgr = expect.req_mgr;
+                       new_value.ref_count = expect.ref_count + 1;
+                       if (KNOT_ATOMIC_COMPARE_EXCHANGE_WEAK(shared_req_mgr, expect, new_value)) {
+                               // we got the reference to the mgr
+                               return expect.req_mgr;
+                       }
+               }
+       }
+       while (true);
+
+       new_value.req_mgr = NULL;
+       new_value.ref_count = 0;
+       network_dns_request_pool_manager_t *this = calloc(1, sizeof(*this));
+       if (this == NULL) {
+               KNOT_ATOMIC_INIT(*shared_req_mgr, new_value);
+               return NULL;
+       }
+
+       this->base.allocate_network_request_func = network_dns_request_pool_manager_allocate_req;
+       this->base.allocate_mem_func = network_dns_request_pool_manager_allocate_mem;
+       this->base.restore_network_request_func = network_dns_request_pool_manager_reset_request;
+       this->base.free_network_request_func = network_dns_request_pool_manager_free_req;
+       this->base.free_mem_func = network_dns_request_pool_manager_free_mem;
+       this->base.delete_req_manager = network_dns_request_pool_manager_delete;
+
+       this->shared_req_mgr = shared_req_mgr;
+       this->buffer_size = buffer_size;
+       this->memory_size = memory_size;
+
+       if (knotd_lockless_stack_init(&this->free_pool) != 0) {
+               free(this);
+               KNOT_ATOMIC_INIT(*shared_req_mgr, new_value);
+               return NULL;
+       }
+
+       mm_ctx_mempool(&this->req_allocation_mm, buffer_size);
+
+       for (size_t i = 0; i < pool_size; i++) {
+               network_dns_request_t* req = network_dns_request_pool_manager_real_allocate(this);
+               if (req == NULL) {
+                       mp_delete(this->req_allocation_mm.ctx);
+                       free(this);
+                       KNOT_ATOMIC_INIT(*shared_req_mgr, new_value);
+                       return NULL;
+               }
+               knotd_lockless_stack_push(&this->free_pool, &req->free_list_node);
+       }
+
+       new_value.req_mgr = &this->base;
+       new_value.ref_count = 1;
+       KNOT_ATOMIC_INIT(*shared_req_mgr, new_value);
+       return new_value.req_mgr;
+}
+#endif
diff --git a/src/knot/server/network_req_manager.h b/src/knot/server/network_req_manager.h
new file mode 100644 (file)
index 0000000..71c8b33
--- /dev/null
@@ -0,0 +1,90 @@
+#pragma once
+#include <urcu.h>
+#include "knot/server/dns-handler.h"
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+#include "knot/include/lstack.h"
+#endif
+#ifdef KNOT_ENABLE_NUMA
+#include <numa.h>
+#define KNOT_MAX_NUMA 16
+#else
+#define KNOT_MAX_NUMA 1
+#endif
+#include "knot/common/log.h"
+
+/*! \brief Control message to fit IP_PKTINFO or IPv6_RECVPKTINFO. */
+typedef union {
+       struct cmsghdr cmsg;
+       uint8_t buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+} cmsg_pktinfo_t;
+
+/*! \brief DNS request structure allocated for networking layer. */
+typedef struct network_dns_request {
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+    knotd_lockless_stack_node_t free_list_node;//!< Lockless stack node.
+#endif
+       dns_handler_request_t dns_req;          //!< dns request part for the handler.
+       struct iovec iov[NBUFS];                        //!< IOV used in network API for this DNS request.
+    size_t msg_namelen_received;        //!< Message name length received.
+    size_t msg_controllen_received;     //!< Message control length received.
+       cmsg_pktinfo_t pktinfo;                         //!< Request's DNS cmsg info.
+} network_dns_request_t;
+
+/*! \brief Network request manager that handles allocation/deallocation/reset. */
+typedef struct network_dns_request_manager {
+       network_dns_request_t* (*allocate_network_request_func)(struct network_dns_request_manager *);        //!< allocate request call.
+       void* (*allocate_mem_func)(struct network_dns_request_manager *, size_t);                     //!< allocate memory for request call.
+       void (*restore_network_request_func)(struct network_dns_request_manager *, network_dns_request_t *);  //!< Restore request state after a query is executed to prepare for next request.
+       void (*free_network_request_func)(struct network_dns_request_manager *, network_dns_request_t *);     //!< Free previously allocated request.
+       void (*free_mem_func)(struct network_dns_request_manager *, void *);                     //!< Free memory allocated in allocate_mem_func.
+       void (*delete_req_manager)(struct network_dns_request_manager *);                             //!< Delete the dns request manager.
+} network_dns_request_manager_t;
+
+/*!
+ * \brief Creates the network manager which allocates the DNS requests using malloc/free.
+ * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async.
+ *
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t *network_dns_request_manager_basic_create(size_t buffer_size, size_t memory_size);
+
+/*!
+ * \brief Creates the network manager which allocates the DNS requests using knot_mm_t.
+ * \brief This request manager uses a single knot_mm for all requests allocated. So can't be used for async.
+ * \brief Since knot_mm_t does not support free, any request allocated using this manager will be freed when this manager is destroyed.
+ *
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t *network_dns_request_manager_knot_mm_create(size_t buffer_size, size_t memory_size);
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+struct shared_dns_request_manager {
+       KNOT_ALIGN(16)
+    network_dns_request_manager_t *req_mgr;
+    int ref_count;
+};
+
+typedef KNOT_ATOMIC struct shared_dns_request_manager atomic_shared_dns_request_manager_t;
+
+#define init_shared_req_mgr(shared_req_mgr) { struct shared_dns_request_manager __t = {0}; KNOT_ATOMIC_INIT(shared_req_mgr, __t); }
+
+/*!
+ * \brief Creates the network manager which allocates the DNS requests and manages the pool.
+ * \brief Any freed request will be added to free pool, and hence memory is not released.
+ * \brief Deleting the request manager frees the memory.
+ *
+ * \param shared_req_mgr Shared request pool manager. Should be initialized with init_shared_req_mgr.
+ * \param buffer_size Buffer size to be used for dns request/response.
+ * \param memory_size Memory size to be used when handling the DNS request.
+ * \param pool_size Number of requests to maintain in the pool initially.
+ *
+ * \retval DNS request manager on success. NULL otherwise.
+ */
+network_dns_request_manager_t *network_dns_request_pool_manager_create(atomic_shared_dns_request_manager_t *shared_req_mgr, size_t buffer_size, size_t memory_size, size_t pool_size);
+#endif
index bac00a9dbe8ce2b133a537f2c0de2ed8a2a0b293..396133df0731d95f614d3ffdb9f6d740e19f72dc 100644 (file)
@@ -43,6 +43,9 @@
 #include "contrib/os.h"
 #include "contrib/sockaddr.h"
 #include "contrib/trim.h"
+#ifdef KNOT_ENABLE_NUMA
+#include <numa.h>
+#endif
 
 #ifdef ENABLE_XDP
 #include <net/if.h>
@@ -1085,7 +1088,25 @@ static int set_handler(server_t *server, int index, unsigned size, runnable_t ru
 
 static int configure_threads(conf_t *conf, server_t *server)
 {
-       int ret = set_handler(server, IO_UDP, conf->cache.srv_udp_threads, udp_master);
+       int ret;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       bool use_numa = conf->cache.numa_enabled;
+#ifdef KNOT_ENABLE_NUMA
+       use_numa = use_numa && (-1 != numa_available());
+#endif
+
+       ret = init_udp_async(conf->cache.udp_srv_async_reqs, use_numa);
+       if (ret != KNOT_EOK) {
+               return ret;
+       }
+
+       ret = init_tcp_async(conf->cache.tcp_srv_async_reqs, use_numa);
+       if (ret != KNOT_EOK) {
+               return ret;
+       }
+#endif
+
+       ret = set_handler(server, IO_UDP, conf->cache.srv_udp_threads, udp_master);
        if (ret != KNOT_EOK) {
                return ret;
        }
index 978a82c0223ede9005886351b2dbec6a8c06162a..524e060441e3ea6b332cec80cdeac4be50844a5c 100644 (file)
 #include "contrib/sockaddr.h"
 #include "contrib/time.h"
 #include "contrib/ucw/mempool.h"
+#include "knot/server/dns-handler.h"
+#include "knot/server/network_req_manager.h"
+#include "knot/common/stats.h"
 
 /*! \brief TCP context data. */
 typedef struct tcp_context {
-       knot_layer_t layer;              /*!< Query processing layer. */
-       server_t *server;                /*!< Name server structure. */
-       struct iovec iov[2];             /*!< TX/RX buffers. */
+       network_dns_request_manager_t *req_mgr;          /*!< DNS request manager. */
+       dns_request_handler_context_t dns_handler;       /*!< DNS request handler context. */
+       network_dns_request_t *tcp_req;       /*!< DNS request. */
        unsigned client_threshold;       /*!< Index of first TCP client. */
        struct timespec last_poll_time;  /*!< Time of the last socket poll. */
        bool is_throttled;               /*!< TCP connections throttling switch. */
        fdset_t set;                     /*!< Set of server/client sockets. */
-       unsigned thread_id;              /*!< Thread identifier. */
        unsigned max_worker_fds;         /*!< Max TCP clients per worker configuration + no. of ifaces. */
        int idle_timeout;                /*!< [s] TCP idle timeout configuration. */
        int io_timeout;                  /*!< [ms] TCP send/recv timeout configuration. */
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       int async_fd;                    /*!< Async notification file descriptor. */
+#endif
 } tcp_context_t;
 
 #define TCP_SWEEP_INTERVAL 2 /*!< [secs] granularity of connection sweeping. */
@@ -87,9 +92,17 @@ static void client_addr(const struct sockaddr_storage *ss, char *out, size_t out
 }
 
 /*! \brief Sweep TCP connection. */
-static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *data)
+static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *ctx, _unused_ void *data)
 {
-       assert(set && fd >= 0);
+       assert(set && fd >= 0 && data != NULL);
+
+       _unused_ tcp_context_t *tcp = (tcp_context_t*)data;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       network_dns_request_t *req = (network_dns_request_t *) ctx;
+       if (req != NULL) {
+               dns_handler_cancel_request(req->dns_req);
+       }
+#endif
 
        /* Best-effort, name and shame. */
        struct sockaddr_storage ss = { 0 };
@@ -103,16 +116,6 @@ static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, _unused_ void *data)
        return FDSET_SWEEP;
 }
 
-static bool tcp_active_state(int state)
-{
-       return (state == KNOT_STATE_PRODUCE || state == KNOT_STATE_FAIL);
-}
-
-static bool tcp_send_state(int state)
-{
-       return (state != KNOT_STATE_FAIL && state != KNOT_STATE_NOOP);
-}
-
 static void tcp_log_error(struct sockaddr_storage *ss, const char *operation, int ret)
 {
        /* Don't log ECONN as it usually means client closed the connection. */
@@ -156,70 +159,41 @@ static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
        return fdset_get_length(fds);
 }
 
-static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec *tx)
+static int tcp_handle(tcp_context_t *tcp, _unused_ unsigned idx)
 {
+       network_dns_request_t *tcp_req = tcp->tcp_req;
        /* Get peer name. */
-       struct sockaddr_storage ss;
        socklen_t addrlen = sizeof(struct sockaddr_storage);
-       if (getpeername(fd, (struct sockaddr *)&ss, &addrlen) != 0) {
+       if (getpeername(tcp_req->dns_req.req_data.fd, (struct sockaddr *)&tcp_req->dns_req.req_data.source_addr, &addrlen) != 0) {
                return KNOT_EADDRNOTAVAIL;
        }
 
-       /* Create query processing parameter. */
-       knotd_qdata_params_t params = {
-               .remote = &ss,
-               .socket = fd,
-               .server = tcp->server,
-               .thread_id = tcp->thread_id
-       };
+       addrlen = sizeof(struct sockaddr_storage);
+       if (getsockname(tcp_req->dns_req.req_data.fd, (struct sockaddr *)&tcp_req->dns_req.req_data.target_addr, &addrlen) != 0) {
+               return KNOT_EADDRNOTAVAIL;
+       }
 
-       rx->iov_len = KNOT_WIRE_MAX_PKTSIZE;
-       tx->iov_len = KNOT_WIRE_MAX_PKTSIZE;
+       tcp->req_mgr->restore_network_request_func(tcp->req_mgr, tcp_req);
 
        /* Receive data. */
-       int recv = net_dns_tcp_recv(fd, rx->iov_base, rx->iov_len, tcp->io_timeout);
+       int recv = net_dns_tcp_recv(tcp_req->dns_req.req_data.fd, tcp_req->dns_req.req_data.rx->iov_base, tcp_req->dns_req.req_data.rx->iov_len, tcp->io_timeout);
        if (recv > 0) {
-               rx->iov_len = recv;
+               tcp_req->dns_req.req_data.rx->iov_len = recv;
        } else {
-               tcp_log_error(&ss, "receive", recv);
+               tcp_log_error(&tcp_req->dns_req.req_data.source_addr, "receive", recv);
                return KNOT_EOF;
        }
 
-       /* Initialize processing layer. */
-       knot_layer_begin(&tcp->layer, &params);
-
-       /* Create packets. */
-       knot_pkt_t *ans = knot_pkt_new(tx->iov_base, tx->iov_len, tcp->layer.mm);
-       knot_pkt_t *query = knot_pkt_new(rx->iov_base, rx->iov_len, tcp->layer.mm);
+       int ret = handle_dns_request(&tcp->dns_handler, &tcp_req->dns_req);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       if (dns_handler_request_is_async(tcp_req->dns_req)) {
+               // Save the request on tcp connection context
+               fdset_set_ctx(&tcp->set, idx, tcp->tcp_req);
 
-       /* Input packet. */
-       int ret = knot_pkt_parse(query, 0);
-       if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT)
-               query->parsed--; // artificially decreasing "parsed" leads to FORMERR
-       }
-       knot_layer_consume(&tcp->layer, query);
-
-       /* Resolve until NOOP or finished. */
-       while (tcp_active_state(tcp->layer.state)) {
-               knot_layer_produce(&tcp->layer, ans);
-               /* Send, if response generation passed and wasn't ignored. */
-               if (ans->size > 0 && tcp_send_state(tcp->layer.state)) {
-                       int sent = net_dns_tcp_send(fd, ans->wire, ans->size,
-                                                   tcp->io_timeout, NULL);
-                       if (sent != ans->size) {
-                               tcp_log_error(&ss, "send", sent);
-                               ret = KNOT_EOF;
-                               break;
-                       }
-               }
+               // Release it
+               tcp->tcp_req = NULL;
        }
-
-       /* Reset after processing. */
-       knot_layer_finish(&tcp->layer);
-
-       /* Flush per-query memory (including query and answer packets). */
-       mp_flush(tcp->layer.mm->ctx);
-
+#endif
        return ret;
 }
 
@@ -243,8 +217,18 @@ static void tcp_event_accept(tcp_context_t *tcp, unsigned i)
 
 static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
 {
-       int ret = tcp_handle(tcp, fdset_get_fd(&tcp->set, i),
-                            &tcp->iov[0], &tcp->iov[1]);
+       if (tcp->tcp_req == NULL) {
+               // Previous tcp req is asynced and now we need a new request structure to process the request.
+               tcp->tcp_req = tcp->req_mgr->allocate_network_request_func(tcp->req_mgr);
+
+               if (tcp->tcp_req == NULL) {
+                       server_stats_increment_counter(server_stats_tcp_no_req_obj, 1);
+                       return KNOT_EOK; // ignore processing now
+               }
+       }
+
+       tcp->tcp_req->dns_req.req_data.fd = fdset_get_fd(&tcp->set, i);
+       int ret = tcp_handle(tcp, i);
        if (ret == KNOT_EOK) {
                /* Update socket activity timer. */
                (void)fdset_set_watchdog(&tcp->set, i, tcp->idle_timeout);
@@ -277,28 +261,118 @@ static void tcp_wait_for_events(tcp_context_t *tcp)
                unsigned int idx = fdset_it_get_idx(&it);
                if (fdset_it_is_error(&it)) {
                        should_close = (idx >= tcp->client_threshold);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               } else if (idx == tcp->client_threshold - 1) {
+                       // Async completion notification
+                       server_stats_increment_counter(server_stats_tcp_async_done, 1);
+                       handle_dns_request_async_completed_queries(&tcp->dns_handler);
+#endif
                } else if (fdset_it_is_pollin(&it)) {
                        /* Master sockets - new connection to accept. */
                        if (idx < tcp->client_threshold) {
                                /* Don't accept more clients than configured. */
                                if (fdset_get_length(set) < tcp->max_worker_fds) {
+                                       server_stats_increment_counter(server_stats_tcp_accept, 1);
                                        tcp_event_accept(tcp, idx);
                                }
                        /* Client sockets - already accepted connection or
                           closed connection :-( */
-                       } else if (tcp_event_serve(tcp, idx) != KNOT_EOK) {
-                               should_close = true;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       } else if (fdset_get_ctx(&tcp->set, idx) != NULL) {
+                               // Received another request before completing the current one, ignore for now
+                               // Implement more async handling
+                               server_stats_increment_counter(server_stats_tcp_multiple_req, 1);
+#endif
+                       } else {
+                               server_stats_increment_counter(server_stats_tcp_received, 1);
+                               if (tcp_event_serve(tcp, idx) != KNOT_EOK) {
+                                       should_close = true;
+                               }
                        }
                }
 
                /* Evaluate. */
                if (should_close) {
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       network_dns_request_t *req = (network_dns_request_t *) fdset_it_get_ctx(&it);
+                       if (req != NULL) {
+                               dns_handler_cancel_request(req->dns_req);
+                       }
+#endif
                        fdset_it_remove(&it);
                }
        }
        fdset_it_commit(&it);
 }
 
+static int tcp_send_produced_result(dns_request_handler_context_t *dns_handler, dns_handler_request_t *req, size_t size) {
+       tcp_context_t *tcp = caa_container_of(dns_handler, tcp_context_t, dns_handler);
+       int sent = net_dns_tcp_send(req->req_data.fd, req->req_data.tx->iov_base, size,
+                                                               tcp->io_timeout, NULL);
+       if (sent != size) {
+               tcp_log_error(&req->req_data.source_addr, "send", sent);
+       }
+
+       return sent;
+}
+
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+static bool use_numa = false;
+static bool tcp_use_async = false;
+static atomic_shared_dns_request_manager_t tcp_shared_req_mgr[KNOT_MAX_NUMA];
+static size_t tcp_req_pool_size;
+
+/*!
+ * \brief Initialize tcp async.
+ *
+ * \param pool_size Request pool size.
+ * \param numa_enabled Indicates if numa available.
+ *
+ * \retval KNOT_EOK on success.
+ */
+int init_tcp_async(size_t pool_size, bool numa_enabled) {
+       for (int i = 0; i < KNOT_MAX_NUMA; i++) {
+               init_shared_req_mgr(tcp_shared_req_mgr[i]);
+       }
+       tcp_req_pool_size = pool_size;
+       tcp_use_async = true;
+       use_numa = numa_enabled;
+       return KNOT_EOK;
+}
+
+static void tcp_async_query_completed_callback(dns_request_handler_context_t *net, dns_handler_request_t *req) {
+       tcp_context_t *tcp = caa_container_of(net, tcp_context_t, dns_handler);
+       network_dns_request_t *tcp_req = caa_container_of(req, network_dns_request_t, dns_req);
+
+       if (!dns_handler_request_is_cancelled(tcp_req->dns_req)) {
+               bool err = false;
+               // Send the response
+               if (req->req_data.tx->iov_len > 0) {
+                       int size = req->req_data.tx->iov_len;
+                       int sent = net_dns_tcp_send(req->req_data.fd, req->req_data.tx->iov_base, size,
+                                                                               tcp->io_timeout, NULL);
+                       if (sent != size) {
+                               tcp_log_error(&req->req_data.source_addr, "send", sent);
+                               err = true;
+                       }
+               }
+
+               // Cleanup async req from fd to allow fd receive more request
+               int idx = fdset_get_index_for_fd(&tcp->set, req->req_data.fd);
+               fdset_set_ctx(&tcp->set, idx, NULL);
+
+               if (!err) {
+                       fdset_set_watchdog(&tcp->set, idx, tcp->idle_timeout);
+               }
+       }
+
+       // Free the request
+       tcp->req_mgr->free_network_request_func(tcp->req_mgr, tcp_req);
+}
+#endif
+
+
+
 int tcp_master(dthread_t *thread)
 {
        if (thread == NULL || thread->data == NULL) {
@@ -319,28 +393,52 @@ int tcp_master(dthread_t *thread)
        }
 #endif
 
-       int ret = KNOT_EOK;
+       _unused_ int numa_node = 0;
+#ifdef KNOT_ENABLE_NUMA
+       if (use_numa)
+       {
+               unsigned cpu = dt_online_cpus();
+               if (cpu > 1) {
+                       unsigned cpu_mask = (dt_get_id(thread) % cpu);
+                       dt_setaffinity(thread, &cpu_mask, 1);
+                       int cpu_numa_node = numa_node_of_cpu(cpu_mask);
+                       numa_node =  cpu_numa_node % KNOT_MAX_NUMA;
+                       log_info("TCP thread %d using numa %d, original %d", thread_id, numa_node, cpu_numa_node);
+               }
+       }
+#endif
 
-       /* Create big enough memory cushion. */
-       knot_mm_t mm;
-       mm_ctx_mempool(&mm, 16 * MM_DEFAULT_BLKSIZE);
+       int ret = KNOT_EOK;
 
        /* Create TCP answering context. */
-       tcp_context_t tcp = {
-               .server = handler->server,
-               .is_throttled = false,
-               .thread_id = thread_id,
-       };
-       knot_layer_init(&tcp.layer, &mm, process_query_layer());
-
-       /* Create iovec abstraction. */
-       for (unsigned i = 0; i < 2; ++i) {
-               tcp.iov[i].iov_len = KNOT_WIRE_MAX_PKTSIZE;
-               tcp.iov[i].iov_base = malloc(tcp.iov[i].iov_len);
-               if (tcp.iov[i].iov_base == NULL) {
-                       ret = KNOT_ENOMEM;
-                       goto finish;
-               }
+       tcp_context_t tcp = {0};
+       tcp.req_mgr =
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               tcp_use_async ?
+                       network_dns_request_pool_manager_create(&tcp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, tcp_req_pool_size) :
+#endif
+                       network_dns_request_manager_basic_create(KNOT_WIRE_MAX_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE);
+       if (tcp.req_mgr == NULL) {
+               goto finish;
+       }
+
+       ret = initialize_dns_handle(
+               &tcp.dns_handler,
+               handler->server,
+               thread_id,
+               0,
+               tcp_send_produced_result
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               ,tcp_async_query_completed_callback
+#endif
+               );
+       if (ret != KNOT_EOK) {
+               goto finish;
+       }
+
+       tcp.tcp_req = tcp.req_mgr->allocate_network_request_func(tcp.req_mgr);
+       if (tcp.tcp_req == NULL) {
+               goto finish;
        }
 
        /* Initialize sweep interval and TCP configuration. */
@@ -361,6 +459,15 @@ int tcp_master(dthread_t *thread)
                goto finish; /* Terminate on zero interfaces. */
        }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       tcp.async_fd = dns_request_handler_context_get_async_notify_handle(&tcp.dns_handler);
+       if (fdset_add(&tcp.set, tcp.async_fd, FDSET_POLLIN, NULL) < 0) {
+               goto finish;
+       }
+
+       tcp.client_threshold++;
+#endif
+
        for (;;) {
                /* Check for cancellation. */
                if (dt_is_cancelled(thread)) {
@@ -372,16 +479,27 @@ int tcp_master(dthread_t *thread)
 
                /* Sweep inactive clients and refresh TCP configuration. */
                if (tcp.last_poll_time.tv_sec >= next_sweep.tv_sec) {
-                       fdset_sweep(&tcp.set, &tcp_sweep, NULL);
+                       fdset_sweep(&tcp.set, &tcp_sweep, &tcp);
                        update_sweep_timer(&next_sweep);
                        update_tcp_conf(&tcp);
                }
        }
 
 finish:
-       free(tcp.iov[0].iov_base);
-       free(tcp.iov[1].iov_base);
-       mp_delete(mm.ctx);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       {
+               struct timespec five_sec = { 5, 0 };
+               nanosleep(&five_sec, &five_sec);
+       }
+#endif
+
+       if (tcp.tcp_req != NULL) {
+               tcp.req_mgr->free_network_request_func(tcp.req_mgr, tcp.tcp_req);
+       }
+       if (tcp.req_mgr != NULL) {
+               tcp.req_mgr->delete_req_manager(tcp.req_mgr);
+       }
+       cleanup_dns_handle(&tcp.dns_handler);
        fdset_clear(&tcp.set);
 
        return ret;
index b60ce8f1f73e47f8e02aedb62799b4746c435ced..ceecc0c573473936d1d435875a20faa13879bfca 100644 (file)
 
 #define TCP_BACKLOG_SIZE  10 /*!< TCP listen backlog size. */
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Initialize tcp async.
+ *
+ * \param pool_size Request pool size.
+ * \param numa_enabled Indicates if numa available.
+ *
+ * \retval KNOT_EOK on success.
+ */
+int init_tcp_async(size_t pool_size, bool numa_enabled);
+#endif
+
 /*!
  * \brief TCP handler thread runnable.
  *
index 60b771c2eba99b5ad105421e951e02eab9cf38ba..a417a4352d28dfee2b9a4c262e39deb901f2774f 100644 (file)
 #include "knot/nameserver/process_query.h"
 #include "knot/query/layer.h"
 #include "knot/server/server.h"
+#include "knot/server/dns-handler.h"
 #include "knot/server/udp-handler.h"
 #include "knot/server/xdp-handler.h"
-
-/* Buffer identifiers. */
-enum {
-       RX = 0,
-       TX = 1,
-       NBUFS = 2
-};
+#include <urcu.h>
+#include "knot/server/network_req_manager.h"
+#include "knot/common/stats.h"
 
 /*! \brief UDP context data. */
 typedef struct {
-       knot_layer_t layer; /*!< Query processing layer. */
-       server_t *server;   /*!< Name server structure. */
-       unsigned thread_id; /*!< Thread identifier. */
+       dns_request_handler_context_t dns_handler; /*!< DNS Request handler context. */
+       network_dns_request_manager_t *req_mgr;
 } udp_context_t;
 
-static bool udp_state_active(int state)
-{
-       return (state == KNOT_STATE_PRODUCE || state == KNOT_STATE_FAIL);
-}
-
-static void udp_handle(udp_context_t *udp, int fd, struct sockaddr_storage *ss,
-                       struct iovec *rx, struct iovec *tx, struct knot_xdp_msg *xdp_msg)
-{
-       /* Create query processing parameter. */
-       knotd_qdata_params_t params = {
-               .remote = ss,
-               .flags = KNOTD_QUERY_FLAG_NO_AXFR | KNOTD_QUERY_FLAG_NO_IXFR | /* No transfers. */
-                        KNOTD_QUERY_FLAG_LIMIT_SIZE, /* Enforce UDP packet size limit. */
-               .socket = fd,
-               .server = udp->server,
-               .xdp_msg = xdp_msg,
-               .thread_id = udp->thread_id
-       };
-
-       /* Start query processing. */
-       knot_layer_begin(&udp->layer, &params);
-
-       /* Create packets. */
-       knot_pkt_t *query = knot_pkt_new(rx->iov_base, rx->iov_len, udp->layer.mm);
-       knot_pkt_t *ans = knot_pkt_new(tx->iov_base, tx->iov_len, udp->layer.mm);
-
-       /* Input packet. */
-       int ret = knot_pkt_parse(query, 0);
-       if (ret != KNOT_EOK && query->parsed > 0) { // parsing failed (e.g. 2x OPT)
-               query->parsed--; // artificially decreasing "parsed" leads to FORMERR
-       }
-       knot_layer_consume(&udp->layer, query);
-
-       /* Process answer. */
-       while (udp_state_active(udp->layer.state)) {
-               knot_layer_produce(&udp->layer, ans);
-       }
-
-       /* Send response only if finished successfully. */
-       if (udp->layer.state == KNOT_STATE_DONE) {
-               tx->iov_len = ans->size;
-       } else {
-               tx->iov_len = 0;
-       }
-
-       /* Reset after processing. */
-       knot_layer_finish(&udp->layer);
-
-       /* Flush per-query memory (including query and answer packets). */
-       mp_flush(udp->layer.mm->ctx);
-}
-
-typedef struct {
-       void* (*udp_init)(void *);
-       void (*udp_deinit)(void *);
-       int (*udp_recv)(int, void *);
-       void (*udp_handle)(udp_context_t *, void *);
-       void (*udp_send)(void *);
-       void (*udp_sweep)(void *); // Optional
-} udp_api_t;
-
-/*! \brief Control message to fit IP_PKTINFO or IPv6_RECVPKTINFO. */
-typedef union {
-       struct cmsghdr cmsg;
-       uint8_t buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
-} cmsg_pktinfo_t;
-
-static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx)
+static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx, int fd, struct sockaddr_storage *target_addr)
 {
        tx->msg_controllen = rx->msg_controllen;
        if (tx->msg_controllen > 0) {
@@ -135,6 +64,13 @@ static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx)
 #if defined(__linux__) || defined(__APPLE__)
        struct cmsghdr *cmsg = CMSG_FIRSTHDR(tx);
        if (cmsg == NULL) {
+               /* The socket is not bound to ANY addr. Get the socket ip. */
+               socklen_t sock_len = sizeof(*target_addr);
+               if (getsockname(fd, (struct sockaddr *)target_addr,
+                                               &sock_len) != 0) {
+                       /* Socket get failed. Cleanup the IP */
+                       memset(target_addr, 0, sizeof(*target_addr));
+               }
                return;
        }
 
@@ -143,47 +79,84 @@ static void udp_pktinfo_handle(const struct msghdr *rx, struct msghdr *tx)
                struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg);
                info->ipi_spec_dst = info->ipi_addr;
                info->ipi_ifindex = 0;
+               struct sockaddr_in * target_socket_v4 = (struct sockaddr_in *)target_addr;
+               target_socket_v4->sin_family = AF_INET;
+               target_socket_v4->sin_port   = -1; // TBD, if we need port later
+               target_socket_v4->sin_addr = info->ipi_addr;
        } else if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) {
                struct in6_pktinfo *info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
                info->ipi6_ifindex = 0;
+               struct sockaddr_in6 * target_socket_v6 = (struct sockaddr_in6 *)target_addr;
+               target_socket_v6->sin6_family = AF_INET6;
+               target_socket_v6->sin6_port   = -1; // TBD, if we need port later
+               target_socket_v6->sin6_addr = info->ipi6_addr;
        }
 #endif
 }
 
 /* UDP recvfrom() request struct. */
 struct udp_recvfrom {
-       int fd;
-       struct sockaddr_storage addr;
+       network_dns_request_t *udp_req;
        struct msghdr msg[NBUFS];
-       struct iovec iov[NBUFS];
-       uint8_t buf[NBUFS][KNOT_WIRE_MAX_PKTSIZE];
-       cmsg_pktinfo_t pktinfo;
+       network_dns_request_manager_t *req_mgr;
 };
 
-static void *udp_recvfrom_init(_unused_ void *xdp_sock)
+static inline void udp_set_msghdr_from_req(struct msghdr *msg, network_dns_request_t *req, int rxtx) {
+       msg->msg_name = &req->dns_req.req_data.source_addr;
+       msg->msg_namelen = sizeof(struct sockaddr_storage);
+       msg->msg_iov = &req->iov[rxtx];
+       msg->msg_iovlen = 1;
+       msg->msg_control = &req->pktinfo.cmsg;
+       msg->msg_controllen = sizeof(cmsg_pktinfo_t);
+}
+
+/*!
+ * \brief Sets the DNS request for msghdr.
+ *
+ * \param udp_recv udp_recvfrom context which needs to be udpated.
+ * \param req Request to be setup on msghdr.
+ */
+static void udp_recvfrom_set_request(struct udp_recvfrom *udp_recv, network_dns_request_t *req) {
+       udp_recv->udp_req = req;
+       for (unsigned i = 0; i < NBUFS; ++i) {
+               udp_set_msghdr_from_req(&udp_recv->msg[i], req, i);
+       }
+}
+
+typedef struct {
+       void* (*udp_init)(void *, network_dns_request_manager_t *req_mgr);
+       void (*udp_deinit)(void *);
+       int (*udp_recv)(int, void *);
+       void (*udp_handle)(udp_context_t *, void *);
+       void (*udp_send)(void *);
+       void (*udp_sweep)(void *); // Optional
+} udp_api_t;
+
+
+static void *udp_recvfrom_init(_unused_ void *xdp_sock, network_dns_request_manager_t *req_mgr)
 {
        struct udp_recvfrom *rq = malloc(sizeof(struct udp_recvfrom));
        if (rq == NULL) {
                return NULL;
        }
        memset(rq, 0, sizeof(struct udp_recvfrom));
+       rq->req_mgr = req_mgr;
 
-       for (unsigned i = 0; i < NBUFS; ++i) {
-               rq->iov[i].iov_base = rq->buf + i;
-               rq->iov[i].iov_len = KNOT_WIRE_MAX_PKTSIZE;
-               rq->msg[i].msg_name = &rq->addr;
-               rq->msg[i].msg_namelen = sizeof(rq->addr);
-               rq->msg[i].msg_iov = &rq->iov[i];
-               rq->msg[i].msg_iovlen = 1;
-               rq->msg[i].msg_control = &rq->pktinfo.cmsg;
-               rq->msg[i].msg_controllen = sizeof(rq->pktinfo);
+       network_dns_request_t *udp_req = req_mgr->allocate_network_request_func(req_mgr);
+       if (udp_req == NULL) {
+               free(rq);
+               return NULL;
        }
+
+       udp_recvfrom_set_request(rq, udp_req);
+
        return rq;
 }
 
 static void udp_recvfrom_deinit(void *d)
 {
        struct udp_recvfrom *rq = d;
+       rq->req_mgr->free_network_request_func(rq->req_mgr, rq->udp_req);
        free(rq);
 }
 
@@ -191,14 +164,25 @@ static int udp_recvfrom_recv(int fd, void *d)
 {
        /* Reset max lengths. */
        struct udp_recvfrom *rq = (struct udp_recvfrom *)d;
-       rq->iov[RX].iov_len = KNOT_WIRE_MAX_PKTSIZE;
+       if (rq->udp_req) {
+               // we are reusing the request, reset it
+               rq->req_mgr->restore_network_request_func(rq->req_mgr, rq->udp_req);
+       }
+       else {
+               rq->udp_req = rq->req_mgr->allocate_network_request_func(rq->req_mgr);
+               if (rq->udp_req == NULL) {
+                       // We could not allocate a request
+                       server_stats_increment_counter(server_stats_udp_no_req_obj, 1);
+                       return 0; // Dont process incoming, let the async handler free a request.
+               }
+       }
        rq->msg[RX].msg_namelen = sizeof(struct sockaddr_storage);
-       rq->msg[RX].msg_controllen = sizeof(rq->pktinfo);
+       rq->msg[RX].msg_controllen = sizeof(cmsg_pktinfo_t);
 
        int ret = recvmsg(fd, &rq->msg[RX], MSG_DONTWAIT);
        if (ret > 0) {
-               rq->fd = fd;
-               rq->iov[RX].iov_len = ret;
+               rq->udp_req->dns_req.req_data.fd = fd;
+               rq->udp_req->iov[RX].iov_len = ret;
                return 1;
        }
 
@@ -211,20 +195,33 @@ static void udp_recvfrom_handle(udp_context_t *ctx, void *d)
 
        /* Prepare TX address. */
        rq->msg[TX].msg_namelen = rq->msg[RX].msg_namelen;
-       rq->iov[TX].iov_len = KNOT_WIRE_MAX_PKTSIZE;
 
-       udp_pktinfo_handle(&rq->msg[RX], &rq->msg[TX]);
+       udp_pktinfo_handle(&rq->msg[RX], &rq->msg[TX], rq->udp_req->dns_req.req_data.fd, &rq->udp_req->dns_req.req_data.target_addr);
 
        /* Process received pkt. */
-       udp_handle(ctx, rq->fd, &rq->addr, &rq->iov[RX], &rq->iov[TX], NULL);
+       handle_dns_request(&ctx->dns_handler, &rq->udp_req->dns_req);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       if (dns_handler_request_is_async(rq->udp_req->dns_req)) {
+               // Save udp source state
+               rq->udp_req->msg_namelen_received = rq->msg[RX].msg_namelen;
+               rq->udp_req->msg_controllen_received = rq->msg[RX].msg_controllen;
+               // release the request
+               rq->udp_req = NULL;
+       }
+#endif
+
+}
+
+static void udp_send_single_response(network_dns_request_t *udp_req, struct msghdr *msghdr_tx) {
+       if (udp_req->iov[TX].iov_len > 0) {
+               (void)sendmsg(udp_req->dns_req.req_data.fd, msghdr_tx, 0);
+       }
 }
 
 static void udp_recvfrom_send(void *d)
 {
        struct udp_recvfrom *rq = d;
-       if (rq->iov[TX].iov_len > 0) {
-               (void)sendmsg(rq->fd, &rq->msg[TX], 0);
-       }
+       udp_send_single_response(rq->udp_req, &rq->msg[TX]);
 }
 
 _unused_
@@ -239,43 +236,48 @@ static udp_api_t udp_recvfrom_api = {
 #ifdef ENABLE_RECVMMSG
 /* UDP recvmmsg() request struct. */
 struct udp_recvmmsg {
-       int fd;
-       struct sockaddr_storage addrs[RECVMMSG_BATCHLEN];
-       char *iobuf[NBUFS];
-       struct iovec *iov[NBUFS];
+       network_dns_request_t *udp_reqs[RECVMMSG_BATCHLEN];
        struct mmsghdr *msgs[NBUFS];
        unsigned rcvd;
-       knot_mm_t mm;
-       cmsg_pktinfo_t pktinfo[RECVMMSG_BATCHLEN];
+       network_dns_request_manager_t *req_mgr;
+       size_t udp_reqs_available;
+       bool udp_reqs_fully_allocated;
+       int in_progress_fd;
 };
 
-static void *udp_recvmmsg_init(_unused_ void *xdp_sock)
-{
-       knot_mm_t mm;
-       mm_ctx_mempool(&mm, sizeof(struct udp_recvmmsg));
+/*!
+ * \brief Sets the DNS request as req_index'th request in mmsghdr.
+ *
+ * \param rq udp_recvmmsg context which needs to be udpated.
+ * \param req_index index where req needs to be set in rq.
+ * \param req Request to be setup on mmsghdr.
+ */
+static void udp_recvmmsg_set_request(struct udp_recvmmsg *rq, unsigned req_index, network_dns_request_t *req) {
+       rq->udp_reqs[req_index] = req;
+       for (unsigned i = 0; i < NBUFS; ++i) {
+               udp_set_msghdr_from_req(&rq->msgs[i][req_index].msg_hdr, req, i);
+       }
+}
 
-       struct udp_recvmmsg *rq = mm_alloc(&mm, sizeof(struct udp_recvmmsg));
+
+static void *udp_recvmmsg_init(_unused_ void *xdp_sock, _unused_ network_dns_request_manager_t *req_mgr)
+{
+       struct udp_recvmmsg *rq = (struct udp_recvmmsg *) req_mgr->allocate_mem_func(req_mgr, sizeof(struct udp_recvmmsg));
        memset(rq, 0, sizeof(*rq));
-       memcpy(&rq->mm, &mm, sizeof(knot_mm_t));
+       rq->req_mgr = req_mgr;
 
        /* Initialize buffers. */
        for (unsigned i = 0; i < NBUFS; ++i) {
-               rq->iobuf[i] = mm_alloc(&mm, KNOT_WIRE_MAX_PKTSIZE * RECVMMSG_BATCHLEN);
-               rq->iov[i] = mm_alloc(&mm, sizeof(struct iovec) * RECVMMSG_BATCHLEN);
-               rq->msgs[i] = mm_alloc(&mm, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN);
+               rq->msgs[i] =  req_mgr->allocate_mem_func(req_mgr, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN);
                memset(rq->msgs[i], 0, sizeof(struct mmsghdr) * RECVMMSG_BATCHLEN);
-               for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) {
-                       rq->iov[i][k].iov_base = rq->iobuf[i] + k * KNOT_WIRE_MAX_PKTSIZE;
-                       rq->iov[i][k].iov_len = KNOT_WIRE_MAX_PKTSIZE;
-                       rq->msgs[i][k].msg_hdr.msg_iov = rq->iov[i] + k;
-                       rq->msgs[i][k].msg_hdr.msg_iovlen = 1;
-                       rq->msgs[i][k].msg_hdr.msg_name = rq->addrs + k;
-                       rq->msgs[i][k].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage);
-                       rq->msgs[i][k].msg_hdr.msg_control = &rq->pktinfo[k].cmsg;
-                       rq->msgs[i][k].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t);
-               }
        }
 
+       for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) {
+               udp_recvmmsg_set_request(rq, k, req_mgr->allocate_network_request_func(req_mgr));
+       }
+
+       rq->udp_reqs_available = RECVMMSG_BATCHLEN;
+       rq->udp_reqs_fully_allocated = true;
        return rq;
 }
 
@@ -283,7 +285,70 @@ static void udp_recvmmsg_deinit(void *d)
 {
        struct udp_recvmmsg *rq = d;
        if (rq != NULL) {
-               mp_delete(rq->mm.ctx);
+               for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) {
+                       if (rq->udp_reqs[k] != NULL) {
+                               rq->req_mgr->free_network_request_func(rq->req_mgr, rq->udp_reqs[k]);
+                       }
+               }
+
+               if (rq->req_mgr != NULL)
+               {
+                       for (unsigned i = 0; i < NBUFS; ++i) {
+                               rq->req_mgr->free_mem_func(rq->req_mgr, rq->msgs[i]);
+                       }
+
+                       rq->req_mgr->free_mem_func(rq->req_mgr, rq);
+               }
+       }
+}
+
+/*!
+ * \brief If any request in mmsg is null, this function tries to allocate the req for NULL.
+ * If allocation fails, it packs the request to have the first N allocated and updates udp_reqs_available.
+ *
+ * \param rq udp_recvmmsg context which needs to be udpated.
+ */
+static void allocate_or_pack_udp_req(struct udp_recvmmsg *rq) {
+       int last_non_null_index = RECVMMSG_BATCHLEN - 1;
+       for (unsigned k = 0; k < RECVMMSG_BATCHLEN; ++k) {
+               if (rq->udp_reqs[k] == NULL) {
+                       // try to allocate
+                       network_dns_request_t *new_req = rq->req_mgr->allocate_network_request_func(rq->req_mgr);
+                       if (new_req != NULL) {
+                               udp_recvmmsg_set_request(rq, k, new_req);
+                       } else {
+                               // allocation failed. Move something from end to here.
+
+                               while (last_non_null_index > k && rq->udp_reqs[last_non_null_index] == NULL) {
+                                       last_non_null_index--;
+                               }
+
+                               if (last_non_null_index > k) {
+                                       // found non-null after current, move it to current
+                                       udp_recvmmsg_set_request(rq, k, rq->udp_reqs[last_non_null_index]);
+                                       rq->udp_reqs[last_non_null_index] = NULL;
+                               } else {
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       // At this point, the requests are packed or fully allocated
+       int reqs_allocated = RECVMMSG_BATCHLEN;
+       while (reqs_allocated > 0 && rq->udp_reqs[reqs_allocated - 1] == NULL) {
+               reqs_allocated--;
+       }
+
+       rq->udp_reqs_available = reqs_allocated;
+       rq->udp_reqs_fully_allocated = (reqs_allocated == RECVMMSG_BATCHLEN);
+
+       if (!rq->udp_reqs_fully_allocated) {
+               server_stats_increment_counter(server_stats_udp_req_batch_limited, 1);
+       }
+
+       if (reqs_allocated == 0) {
+               server_stats_increment_counter(server_stats_udp_no_req_obj, 1);
        }
 }
 
@@ -291,10 +356,17 @@ static int udp_recvmmsg_recv(int fd, void *d)
 {
        struct udp_recvmmsg *rq = d;
 
-       int n = recvmmsg(fd, rq->msgs[RX], RECVMMSG_BATCHLEN, MSG_DONTWAIT, NULL);
+       if (!rq->udp_reqs_fully_allocated) {
+               allocate_or_pack_udp_req(rq);
+       }
+
+       int n = recvmmsg(fd, rq->msgs[RX], rq->udp_reqs_available, MSG_DONTWAIT, NULL);
        if (n > 0) {
-               rq->fd = fd;
+               for (int i = 0; i < n; i++) {
+                       rq->udp_reqs[i]->dns_req.req_data.fd = fd;
+               }
                rq->rcvd = n;
+               rq->in_progress_fd = fd;
        }
        return n;
 }
@@ -306,12 +378,27 @@ static void udp_recvmmsg_handle(udp_context_t *ctx, void *d)
        /* Handle each received msg. */
        for (unsigned i = 0; i < rq->rcvd; ++i) {
                struct iovec *rx = rq->msgs[RX][i].msg_hdr.msg_iov;
-               struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov;
                rx->iov_len = rq->msgs[RX][i].msg_len; /* Received bytes. */
 
-               udp_pktinfo_handle(&rq->msgs[RX][i].msg_hdr, &rq->msgs[TX][i].msg_hdr);
+               udp_pktinfo_handle(&rq->msgs[RX][i].msg_hdr, &rq->msgs[TX][i].msg_hdr, rq->udp_reqs[i]->dns_req.req_data.fd, &rq->udp_reqs[i]->dns_req.req_data.target_addr);
+
+               handle_dns_request(&ctx->dns_handler, &rq->udp_reqs[i]->dns_req);
+       }
 
-               udp_handle(ctx, rq->fd, rq->addrs + i, rx, tx, NULL);
+       /* Setup response for each received msg. */
+       for (unsigned i = 0; i < rq->rcvd; ++i) {
+               struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+               if (dns_handler_request_is_async(rq->udp_reqs[i]->dns_req)) {
+                       // Save udp source state
+                       rq->udp_reqs[i]->msg_namelen_received = rq->msgs[RX][i].msg_hdr.msg_namelen;
+                       rq->udp_reqs[i]->msg_controllen_received = rq->msgs[RX][i].msg_hdr.msg_controllen;
+
+                       tx->iov_len = 0; // asynced request have nothing to send
+                       rq->udp_reqs[i] = NULL;
+                       rq->udp_reqs_fully_allocated = false;
+               }
+#endif
                rq->msgs[TX][i].msg_len = tx->iov_len;
                rq->msgs[TX][i].msg_hdr.msg_namelen = 0;
                if (tx->iov_len > 0) {
@@ -324,18 +411,17 @@ static void udp_recvmmsg_handle(udp_context_t *ctx, void *d)
 static void udp_recvmmsg_send(void *d)
 {
        struct udp_recvmmsg *rq = d;
-       (void)sendmmsg(rq->fd, rq->msgs[TX], rq->rcvd, 0);
+       (void)sendmmsg(rq->in_progress_fd, rq->msgs[TX], rq->rcvd, 0);
        for (unsigned i = 0; i < rq->rcvd; ++i) {
-               /* Reset buffer size and address len. */
-               struct iovec *rx = rq->msgs[RX][i].msg_hdr.msg_iov;
-               struct iovec *tx = rq->msgs[TX][i].msg_hdr.msg_iov;
-               rx->iov_len = KNOT_WIRE_MAX_PKTSIZE; /* Reset RX buflen */
-               tx->iov_len = KNOT_WIRE_MAX_PKTSIZE;
-
-               memset(rq->addrs + i, 0, sizeof(struct sockaddr_storage));
-               rq->msgs[RX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage);
-               rq->msgs[TX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage);
-               rq->msgs[RX][i].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t);
+               if (rq->udp_reqs[i] != NULL) {
+                       /* Reset buffer size and address len. */
+                       rq->req_mgr->restore_network_request_func(rq->req_mgr, rq->udp_reqs[i]);
+
+                       memset(&rq->udp_reqs[i]->dns_req.req_data.source_addr, 0, sizeof(struct sockaddr_storage));
+                       rq->msgs[RX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage);
+                       rq->msgs[TX][i].msg_hdr.msg_namelen = sizeof(struct sockaddr_storage);
+                       rq->msgs[RX][i].msg_hdr.msg_controllen = sizeof(cmsg_pktinfo_t);
+               }
        }
 }
 
@@ -350,7 +436,7 @@ static udp_api_t udp_recvmmsg_api = {
 
 #ifdef ENABLE_XDP
 
-static void *xdp_recvmmsg_init(void *xdp_sock)
+static void *xdp_recvmmsg_init(void *xdp_sock, _unused_ network_dns_request_manager_t *req_mgr)
 {
        return xdp_handle_init(xdp_sock);
 }
@@ -367,7 +453,7 @@ static int xdp_recvmmsg_recv(_unused_ int fd, void *d)
 
 static void xdp_recvmmsg_handle(udp_context_t *ctx, void *d)
 {
-       xdp_handle_msgs(d, &ctx->layer, ctx->server, ctx->thread_id);
+       xdp_handle_msgs(d, &ctx->dns_handler.layer, ctx->dns_handler.server, ctx->dns_handler.thread_id);
 }
 
 static void xdp_recvmmsg_send(void *d)
@@ -453,6 +539,47 @@ static unsigned udp_set_ifaces(const server_t *server, size_t n_ifaces, fdset_t
        return fdset_get_length(fds);
 }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+static bool use_numa = false;
+static bool udp_use_async = false;
+static atomic_shared_dns_request_manager_t udp_shared_req_mgr[KNOT_MAX_NUMA];
+static size_t udp_req_pool_size;
+
+/*!
+ * \brief Initialize udp async.
+ *
+ * \param pool_size Request pool size.
+ * \param numa_enabled Indicates if numa available.
+ *
+ * \retval KNOT_EOK on success.
+ */
+int init_udp_async(size_t pool_size, bool numa_enabled) {
+       for (int i = 0; i < KNOT_MAX_NUMA; i++) {
+               init_shared_req_mgr(udp_shared_req_mgr[i]);
+       }
+       udp_req_pool_size = pool_size;
+       udp_use_async = true;
+       use_numa = numa_enabled;
+       return KNOT_EOK;
+}
+
+static void udp_async_query_completed_callback(dns_request_handler_context_t *net, dns_handler_request_t *req) {
+       udp_context_t *udp = caa_container_of(net, udp_context_t, dns_handler);
+       network_dns_request_t *udp_req = caa_container_of(req, network_dns_request_t, dns_req);
+
+       // Prepare response and send it
+       struct msghdr txmsg;
+       udp_set_msghdr_from_req(&txmsg, udp_req, TX);
+       txmsg.msg_namelen = udp_req->msg_namelen_received;
+       txmsg.msg_controllen = udp_req->msg_controllen_received;
+       txmsg.msg_control = (txmsg.msg_controllen != 0) ? &udp_req->pktinfo.cmsg : NULL;
+       udp_send_single_response(udp_req, &txmsg);
+
+       // Free the request
+       udp->req_mgr->free_network_request_func(udp->req_mgr, udp_req);
+}
+#endif
+
 int udp_master(dthread_t *thread)
 {
        if (thread == NULL || thread->data == NULL) {
@@ -466,13 +593,25 @@ int udp_master(dthread_t *thread)
                return KNOT_EOK;
        }
 
+       _unused_ int numa_node = 0;
        /* Set thread affinity to CPU core (same for UDP and XDP). */
        unsigned cpu = dt_online_cpus();
        if (cpu > 1) {
                unsigned cpu_mask = (dt_get_id(thread) % cpu);
                dt_setaffinity(thread, &cpu_mask, 1);
+#ifdef KNOT_ENABLE_NUMA
+               if (use_numa)
+               {
+                       int cpu_numa_node = numa_node_of_cpu(cpu_mask);
+                       numa_node =  cpu_numa_node % KNOT_MAX_NUMA;
+                       log_info("UDP thread %d using numa %d, original %d", thread_id, numa_node, cpu_numa_node);
+               }
+#endif
        }
 
+       /* Create UDP answering context. */
+       udp_context_t udp = {0};
+
        /* Choose processing API. */
        udp_api_t *api = NULL;
        if (is_xdp_thread(handler->server, thread_id)) {
@@ -484,28 +623,51 @@ int udp_master(dthread_t *thread)
        } else {
 #ifdef ENABLE_RECVMMSG
                api = &udp_recvmmsg_api;
+               udp.req_mgr =
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       udp_use_async ?
+                               network_dns_request_pool_manager_create(&udp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, udp_req_pool_size) :
+#endif
+                               network_dns_request_manager_knot_mm_create(KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE);
 #else
                api = &udp_recvfrom_api;
+               udp.req_mgr =
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       udp_use_async ?
+                               network_dns_request_pool_manager_create(&udp_shared_req_mgr[numa_node], KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE, udp_req_pool_size):
+#endif
+                               network_dns_request_manager_basic_create(KNOT_WIRE_MAX_UDP_PKTSIZE, 16 * MM_DEFAULT_BLKSIZE);
 #endif
        }
+       assert(udp.req_mgr != NULL);
        void *api_ctx = NULL;
 
-       /* Create big enough memory cushion. */
-       knot_mm_t mm;
-       mm_ctx_mempool(&mm, 16 * MM_DEFAULT_BLKSIZE);
-
-       /* Create UDP answering context. */
-       udp_context_t udp = {
-               .server = handler->server,
-               .thread_id = thread_id,
-       };
-       knot_layer_init(&udp.layer, &mm, process_query_layer());
+       /* Initialize UDP answering context. */
+       if ( initialize_dns_handle(
+                       &udp.dns_handler,
+                       handler->server,
+                       thread_id,
+                       KNOTD_QUERY_FLAG_NO_AXFR | KNOTD_QUERY_FLAG_NO_IXFR | /* No transfers. */
+                               KNOTD_QUERY_FLAG_LIMIT_SIZE, /* Enforce UDP packet size limit. */
+                       NULL
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       ,udp_async_query_completed_callback
+#endif
+                       ) != KNOT_EOK) {
+               goto finish;
+       }
 
        /* Allocate descriptors for the configured interfaces. */
        void *xdp_socket = NULL;
        size_t nifs = handler->server->n_ifaces;
+       size_t fds_size = handler->server->n_ifaces;
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       if (udp_use_async) {
+               fds_size++;
+       }
+#endif
        fdset_t fds;
-       if (fdset_init(&fds, nifs) != KNOT_EOK) {
+       if (fdset_init(&fds, fds_size) != KNOT_EOK) {
                goto finish;
        }
        unsigned nfds = udp_set_ifaces(handler->server, nifs, &fds,
@@ -514,8 +676,15 @@ int udp_master(dthread_t *thread)
                goto finish;
        }
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       int async_completed_notification = dns_request_handler_context_get_async_notify_handle(&udp.dns_handler);
+       if (fdset_add(&fds, async_completed_notification, FDSET_POLLIN, NULL) < 0) {
+               goto finish;
+       }
+#endif
+
        /* Initialize the networking API. */
-       api_ctx = api->udp_init(xdp_socket);
+       api_ctx = api->udp_init(xdp_socket, udp.req_mgr);
        if (api_ctx == NULL) {
                goto finish;
        }
@@ -536,9 +705,20 @@ int udp_master(dthread_t *thread)
                        if (!fdset_it_is_pollin(&it)) {
                                continue;
                        }
-                       if (api->udp_recv(fdset_it_get_fd(&it), api_ctx) > 0) {
-                               api->udp_handle(&udp, api_ctx);
-                               api->udp_send(api_ctx);
+                       int ready_handle = fdset_it_get_fd(&it);
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+                       if (ready_handle == async_completed_notification) {
+                               server_stats_increment_counter(server_stats_udp_async_done, 1);
+                               handle_dns_request_async_completed_queries(&udp.dns_handler);
+                       }
+                       else
+#endif
+                       {
+                               server_stats_increment_counter(server_stats_udp_received, 1);
+                               if (api->udp_recv(ready_handle, api_ctx) > 0) {
+                                       api->udp_handle(&udp, api_ctx);
+                                       api->udp_send(api_ctx);
+                               }
                        }
                }
 
@@ -549,8 +729,18 @@ int udp_master(dthread_t *thread)
        }
 
 finish:
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+       {
+               struct timespec five_sec = { 5, 0 };
+               nanosleep(&five_sec, &five_sec);
+       }
+#endif
+
+       cleanup_dns_handle(&udp.dns_handler);
        api->udp_deinit(api_ctx);
-       mp_delete(mm.ctx);
+       if (udp.req_mgr) {
+               udp.req_mgr->delete_req_manager(udp.req_mgr);
+       }
        fdset_clear(&fds);
 
        return KNOT_EOK;
index b09e43e14f3a848af6ed35cfd529ba03749eee3c..4d4ff3129fd410e97cddb3a3243457a9b70b45e7 100644 (file)
 
 #define RECVMMSG_BATCHLEN 10 /*!< Default recvmmsg() batch size. */
 
+#ifdef ENABLE_ASYNC_QUERY_HANDLING
+/*!
+ * \brief Initialize udp async.
+ *
+ * \param pool_size Request pool size.
+ * \param numa_enabled Indicates if numa available.
+ *
+ * \retval KNOT_EOK on success.
+ */
+int init_udp_async(size_t pool_size, bool numa_enabled);
+#endif
+
 /*!
  * \brief UDP handler thread runnable.
  *
index 291454bd7693e2d27c6deb4c41f6147ab0917de1..f149a644e13d8d7c1797d530fdd47fe46a34b69d 100644 (file)
 #include "knot/zone/node.h"
 #include "libknot/libknot.h"
 
-void additional_clear(additional_t *additional)
+void additional_clear_mm(additional_t *additional, knot_mm_t *mm)
 {
        if (additional == NULL) {
                return;
        }
 
-       free(additional->glues);
-       free(additional);
+       mm_free(mm, additional->glues);
+       mm_free(mm, additional);
+}
+
+void additional_clear(additional_t *additional)
+{
+    additional_clear_mm(additional, NULL);
 }
 
 bool additional_equal(additional_t *a, additional_t *b)
@@ -265,14 +270,14 @@ bool binode_additionals_unchanged(zone_node_t *node, zone_node_t *counterpart)
        return true;
 }
 
-void node_free_rrsets(zone_node_t *node, knot_mm_t *mm)
+void node_free_rrsets_mm(zone_node_t *node, knot_mm_t *mm, bool use_mm_for_additional)
 {
        if (node == NULL) {
                return;
        }
 
        for (uint16_t i = 0; i < node->rrset_count; ++i) {
-               additional_clear(node->rrs[i].additional);
+               additional_clear_mm(node->rrs[i].additional, use_mm_for_additional ? mm : NULL);
                rr_data_clear(&node->rrs[i], mm);
        }
 
@@ -281,6 +286,11 @@ void node_free_rrsets(zone_node_t *node, knot_mm_t *mm)
        node->rrset_count = 0;
 }
 
+void node_free_rrsets(zone_node_t *node, knot_mm_t *mm)
+{
+    node_free_rrsets_mm(node, mm, false);
+}
+
 void node_free(zone_node_t *node, knot_mm_t *mm)
 {
        if (node == NULL) {
@@ -335,7 +345,7 @@ int node_add_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm)
        return add_rrset_no_merge(node, rrset, mm);
 }
 
-void node_remove_rdataset(zone_node_t *node, uint16_t type)
+void node_remove_rdataset_mm(zone_node_t *node, uint16_t type, knot_mm_t *mm)
 {
        if (node == NULL) {
                return;
@@ -346,10 +356,10 @@ void node_remove_rdataset(zone_node_t *node, uint16_t type)
        for (int i = 0; i < node->rrset_count; ++i) {
                if (node->rrs[i].type == type) {
                        if (!binode_additional_shared(node, type)) {
-                               additional_clear(node->rrs[i].additional);
+                               additional_clear_mm(node->rrs[i].additional, mm);
                        }
                        if (!binode_rdata_shared(node, type)) {
-                               rr_data_clear(&node->rrs[i], NULL);
+                               rr_data_clear(&node->rrs[i], mm);
                        }
                        memmove(node->rrs + i, node->rrs + i + 1,
                                (node->rrset_count - i - 1) * sizeof(struct rr_data));
@@ -359,6 +369,27 @@ void node_remove_rdataset(zone_node_t *node, uint16_t type)
        }
 }
 
+void node_remove_rdataset(zone_node_t *node, uint16_t type)
+{
+       node_remove_rdataset_mm(node, type, NULL);
+}
+
+int node_add_rrset_additional(zone_node_t *node, uint16_t type, additional_t *additional)
+{
+       if (node == NULL) {
+               return KNOT_EINVAL;
+       }
+
+       for (uint16_t i = 0; i < node->rrset_count; ++i) {
+               if (node->rrs[i].type == type) {
+            node->rrs[i].additional = additional;
+            return KNOT_EOK;
+               }
+       }
+
+       return KNOT_EINVAL;
+}
+
 int node_remove_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm)
 {
        if (node == NULL || rrset == NULL) {
index d30cc6e1c401d731b75ca5805662efebea7ef8c8..fe1934e865f9c6dfd74d8cd6b3249ed40edaf1e7 100644 (file)
@@ -109,6 +109,14 @@ enum node_flags {
 typedef void (*node_addrem_cb)(zone_node_t *, void *);
 typedef zone_node_t *(*node_new_cb)(const knot_dname_t *, void *);
 
+/*!
+ * \brief Clears additional structure.
+ *
+ * \param additional  Additional to clear.
+ * \param mm          Memory context to use.
+ */
+void additional_clear_mm(additional_t *additional, knot_mm_t *mm);
+
 /*!
  * \brief Clears additional structure.
  *
@@ -199,6 +207,16 @@ bool binode_additional_shared(zone_node_t *node, uint16_t type);
  */
 bool binode_additionals_unchanged(zone_node_t *node, zone_node_t *counterpart);
 
+/*!
+ * \brief Destroys allocated data within the node
+ *        structure, but not the node itself.
+ *
+ * \param node  Node that contains data to be destroyed.
+ * \param mm    Memory context to use.
+ * \param use_mm_for_additional Use mm to free the additional data
+ */
+void node_free_rrsets_mm(zone_node_t *node, knot_mm_t *mm, bool use_mm_for_additional);
+
 /*!
  * \brief Destroys allocated data within the node
  *        structure, but not the node itself.
@@ -231,6 +249,26 @@ void node_free(zone_node_t *node, knot_mm_t *mm);
  */
 int node_add_rrset(zone_node_t *node, const knot_rrset_t *rrset, knot_mm_t *mm);
 
+/*!
+ * \brief Adds an additional to the node. Just pointer is saved
+ *
+ * \param node     Node to add the RRSet to.
+ * \param type     type of the RR to which the additional has to be added.
+ * \param additional Additional data to be added       .
+ *
+ * \return KNOT_E*
+ */
+int node_add_rrset_additional(zone_node_t *node, uint16_t type, additional_t *additional);
+
+/*!
+ * \brief Removes data for given RR type from node.
+ *
+ * \param node  Node we want to delete from.
+ * \param type  RR type to delete.
+ * \param mm       Memory context to use.
+ */
+void node_remove_rdataset_mm(zone_node_t *node, uint16_t type, knot_mm_t *mm);
+
 /*!
  * \brief Removes data for given RR type from node.
  *
index a827eb0e34800756b2484a8c6be660adb1669b77..09be1b36a3bf03cc4df37dc46bfa0204b0698225 100644 (file)
@@ -155,15 +155,15 @@ flush_journal_replan:
        return ret;
 }
 
-zone_t* zone_new(const knot_dname_t *name)
+zone_t* zone_new_mm(const knot_dname_t *name, knot_mm_t *mm)
 {
-       zone_t *zone = malloc(sizeof(zone_t));
+       zone_t *zone = mm_alloc(mm, sizeof(zone_t));
        if (zone == NULL) {
                return NULL;
        }
        memset(zone, 0, sizeof(zone_t));
 
-       zone->name = knot_dname_copy(name, NULL);
+       zone->name = knot_dname_copy(name, mm);
        if (zone->name == NULL) {
                free(zone);
                return NULL;
@@ -188,6 +188,11 @@ zone_t* zone_new(const knot_dname_t *name)
        return zone;
 }
 
+zone_t* zone_new(const knot_dname_t *name)
+{
+       return zone_new_mm(name, NULL);
+}
+
 void zone_control_clear(zone_t *zone)
 {
        if (zone == NULL) {
@@ -199,7 +204,7 @@ void zone_control_clear(zone_t *zone)
        zone->control_update = NULL;
 }
 
-void zone_free(zone_t **zone_ptr)
+void zone_free_mm(zone_t **zone_ptr, knot_mm_t *mm)
 {
        if (zone_ptr == NULL || *zone_ptr == NULL) {
                return;
@@ -209,7 +214,7 @@ void zone_free(zone_t **zone_ptr)
 
        zone_events_deinit(zone);
 
-       knot_dname_free(zone->name, NULL);
+       knot_dname_free(zone->name, mm);
 
        free_ddns_queue(zone);
        pthread_mutex_destroy(&zone->ddns_lock);
@@ -231,10 +236,15 @@ void zone_free(zone_t **zone_ptr)
 
        conf_deactivate_modules(&zone->query_modules, &zone->query_plan);
 
-       free(zone);
+       mm_free(mm, zone);
        *zone_ptr = NULL;
 }
 
+void zone_free(zone_t **zone_ptr)
+{
+       zone_free_mm(zone_ptr, NULL);
+}
+
 void zone_reset(conf_t *conf, zone_t *zone)
 {
        if (zone == NULL) {
index 6696efa471747a5d8487136beac6d2fc0cfd09b0..bca86af5f3333188dad90f6febb46dbfbfadf092 100644 (file)
@@ -25,6 +25,7 @@
 #include "knot/updates/changesets.h"
 #include "knot/zone/contents.h"
 #include "knot/zone/timers.h"
+#include "knot/dnssec/zone-keys.h"
 #include "libknot/dname.h"
 #include "libknot/packet/pkt.h"
 
@@ -42,6 +43,8 @@ typedef enum {
        ZONE_FORCE_ZSK_ROLL = 1 << 4, /*!< Force ZSK rollover. */
        ZONE_IS_CATALOG     = 1 << 5, /*!< This is a catalog. */
        ZONE_IS_CAT_MEMBER  = 1 << 6, /*!< This zone exists according to a catalog. */
+       ZONE_DNSSEC_ENABLED = 1 << 14, /*!< Dnssec is enabled for this zone. */
+       ZONE_EPHEMERAL      = 1 << 15,/*!< Ephemeral zone which is not persisted after query processing */
 } zone_flag_t;
 
 /*!
@@ -104,11 +107,23 @@ typedef struct zone
        /*! \brief Preferred master for remote operation. */
        struct sockaddr_storage *preferred_master;
 
+       /*! \brief Zone signing context and keys. (for DNSSEC onlinesign) */
+       zone_sign_ctx_t *sign_ctx;
+
        /*! \brief Query modules. */
        list_t query_modules;
        struct query_plan *query_plan;
 } zone_t;
 
+/*!---
+ * \brief Creates new zone with emtpy zone content and marks it as ephemeral. If mm is passed, allocates the zone using mm
+ *
+ * \param name  Zone name.
+ *
+ * \return The initialized zone structure or NULL if an error occurred.
+ */
+zone_t* zone_new_mm(const knot_dname_t *name, knot_mm_t *mm);
+
 /*!
  * \brief Creates new zone with emtpy zone content.
  *
@@ -118,6 +133,15 @@ typedef struct zone
  */
 zone_t* zone_new(const knot_dname_t *name);
 
+/*!
+ * \brief Deallocates the zone structure created with zone_new_mm.
+ *
+ * \note The function also deallocates all bound structures (contents, etc.).
+ *
+ * \param zone_ptr Zone to be freed.
+ */
+void zone_free_mm(zone_t **zone_ptr, knot_mm_t *mm);
+
 /*!
  * \brief Deallocates the zone structure.
  *
@@ -217,4 +241,4 @@ int zone_set_lastsigned_serial(zone_t *zone, uint32_t serial);
 
 int zone_get_lastsigned_serial(zone_t *zone, uint32_t *serial);
 
-int slave_zone_serial(zone_t *zone, conf_t *conf, uint32_t *serial);
+int slave_zone_serial(zone_t *zone, conf_t *conf, uint32_t *serial);
\ No newline at end of file
index e113028c138c67458e044d6d58dac584e452b3c6..50269e77e621bc8da6b68bbfb5b432dfcb8f6836 100644 (file)
@@ -215,6 +215,17 @@ int dnssec_key_set_rdata(dnssec_key_t *key, const dnssec_binary_t *rdata);
  */
 int dnssec_key_load_pkcs8(dnssec_key_t *key, const dnssec_binary_t *pem);
 
+/*!
+ * Load PKCS #8 private key in the DER format.
+ *
+ * At least an algorithm must be set prior to calling this function.
+ *
+ * The function will create public key, unless it was already set (using
+ * \ref dnssec_key_set_pubkey or \ref dnssec_key_set_rdata). If the public key
+ * was set, the function will prevent loading of non-matching private key.
+ */
+int dnssec_key_load_pkcs8_der(dnssec_key_t *key, const dnssec_binary_t *der);
+
 /*!
  * Check if the key can be used for signing.
  */
index 10126ccf3689731405e26580008ebf5cce7fa6e0..e2695208b87296a34a39120b051a4ad1f586a004 100644 (file)
@@ -53,3 +53,31 @@ int dnssec_key_load_pkcs8(dnssec_key_t *key, const dnssec_binary_t *pem)
 
        return DNSSEC_EOK;
 }
+
+/* -- public API ----------------------------------------------------------- */
+
+_public_
+int dnssec_key_load_pkcs8_der(dnssec_key_t *key, const dnssec_binary_t *der)
+{
+       if (!key || !der || !der->data) {
+               return DNSSEC_EINVAL;
+       }
+
+       if (dnssec_key_get_algorithm(key) == 0) {
+               return DNSSEC_INVALID_KEY_ALGORITHM;
+       }
+
+       gnutls_privkey_t privkey = NULL;
+       int r = dnssec_der_to_privkey(der, &privkey);
+       if (r != DNSSEC_EOK) {
+               return r;
+       }
+
+       r = key_set_private_key(key, privkey);
+       if (r != DNSSEC_EOK) {
+               gnutls_privkey_deinit(privkey);
+               return r;
+       }
+
+       return DNSSEC_EOK;
+}
index fa463f69d87a3af869fdb107fdfb27da1841dcac..c2f8a68116267f030d33e6462b33d7b645e37081 100644 (file)
@@ -54,6 +54,49 @@ int dnssec_pem_to_x509(const dnssec_binary_t *pem, gnutls_x509_privkey_t *key)
        return DNSSEC_EOK;
 }
 
+
+static int dnssec_der_to_x509(const dnssec_binary_t *der, gnutls_x509_privkey_t *key)
+{
+       if (!der || !key) {
+               return DNSSEC_EINVAL;
+       }
+
+       gnutls_datum_t data = binary_to_datum(der);
+
+       gnutls_datum_t result;
+
+       int r = gnutls_base64_decode2(&data, &result);
+
+       // handle r
+       if (r != GNUTLS_E_SUCCESS) {
+               return DNSSEC_ENOMEM;
+       }
+
+
+       gnutls_x509_privkey_t _key = NULL;
+       r = gnutls_x509_privkey_init(&_key);
+       if (r != GNUTLS_E_SUCCESS) {
+               gnutls_free(result.data);
+               return DNSSEC_ENOMEM;
+       }
+
+       int format = GNUTLS_X509_FMT_DER;
+       char *password = NULL;
+       int flags = GNUTLS_PKCS_PLAIN;
+       r = gnutls_x509_privkey_import_pkcs8(_key, &result, format, password, flags);
+       if (r != GNUTLS_E_SUCCESS) {
+               gnutls_x509_privkey_deinit(_key);
+               gnutls_free(result.data);
+               return DNSSEC_PKCS8_IMPORT_ERROR;
+       }
+
+       gnutls_free(result.data);
+
+       *key = _key;
+
+       return DNSSEC_EOK;
+}
+
 _public_
 int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key)
 {
@@ -87,6 +130,39 @@ int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key)
        return DNSSEC_EOK;
 }
 
+_public_
+int dnssec_der_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key)
+{
+       if (!pem || !key) {
+               return DNSSEC_EINVAL;
+       }
+
+       gnutls_x509_privkey_t key_x509 = NULL;
+       int r = dnssec_der_to_x509(pem, &key_x509);
+       if (r != DNSSEC_EOK) {
+               return r;
+       }
+
+       gnutls_privkey_t key_abs = NULL;
+       r = gnutls_privkey_init(&key_abs);
+       if (r != GNUTLS_E_SUCCESS) {
+               gnutls_x509_privkey_deinit(key_x509);
+               return DNSSEC_ENOMEM;
+       }
+
+       int flags = GNUTLS_PRIVKEY_IMPORT_AUTO_RELEASE;
+       r = gnutls_privkey_import_x509(key_abs, key_x509, flags);
+       if (r != GNUTLS_E_SUCCESS) {
+               gnutls_x509_privkey_deinit(key_x509);
+               gnutls_privkey_deinit(key_abs);
+               return DNSSEC_ENOMEM;
+       }
+
+       *key = key_abs;
+
+       return DNSSEC_EOK;
+}
+
 static int try_export_pem(gnutls_x509_privkey_t key, dnssec_binary_t *pem)
 {
        assert(key);
index c84d87dba4cafbfbefe1fb94383f534d9933a4ac..2c478dd3a5184b6c1ca887b994c8c90bd80f57ec 100644 (file)
@@ -50,6 +50,16 @@ int dnssec_pem_to_x509(const dnssec_binary_t *pem, gnutls_x509_privkey_t *key);
  */
 int dnssec_pem_to_privkey(const dnssec_binary_t *pem, gnutls_privkey_t *key);
 
+/*!
+ * Create GnuTLS private key from unencrypted PEM data.
+ *
+ * \param[in]  pem  PEM binary data.
+ * \param[out] key  Resulting private key.
+ *
+ * \return Error code, DNSSEC_EOK if successful.
+ */
+int dnssec_der_to_privkey(const dnssec_binary_t *der, gnutls_privkey_t *key);
+
 /*!
  * Export GnuTLS X.509 private key to PEM binary.
  *
index 381a8dee6ea5eba4cd4adb510b01ca5e0ba686bf..f4133c192958a6e8bb2ce66da992d7d390903656 100644 (file)
@@ -745,7 +745,17 @@ static int parse_payload(knot_pkt_t *pkt, unsigned flags)
 
        /* Check for trailing garbage. */
        if (pkt->parsed < pkt->size) {
+#ifdef ENABLE_TRAILING_BYTES
+               for (size_t trail = pkt->parsed; trail < pkt->size; trail++) {
+                       if (pkt->wire[trail] != 0) {
+                               return KNOT_ETRAIL;
+                       }
+               }
+
+               pkt->size = pkt->parsed;
+#else
                return KNOT_ETRAIL;
+#endif
        }
 
        return KNOT_EOK;
index 698ac3dbfee98681b5c3b6fe6b995e982f5f9367..09cea85445acc35478ae6c3f448d4405be282a44 100644 (file)
@@ -48,6 +48,7 @@ enum knot_wire_sizes {
        KNOT_WIRE_QUESTION_MIN_SIZE = 5,
        KNOT_WIRE_RR_MIN_SIZE = 11,
        KNOT_WIRE_MIN_PKTSIZE = 512,
+       KNOT_WIRE_MAX_UDP_PKTSIZE = KNOT_MAX_UDP_REQRESP_SIZE_BYTES,
        KNOT_WIRE_MAX_PKTSIZE = 65535,
        KNOT_WIRE_MAX_PAYLOAD = KNOT_WIRE_MAX_PKTSIZE
                                - KNOT_WIRE_HEADER_SIZE
index 110422f3341b314259f8cb08cddab606849b12f8..c7b0d2789c67caf1789fdcc1483c79e571b78be1 100644 (file)
@@ -146,18 +146,28 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset)
        /* Convert owner for all RRSets. */
        knot_dname_to_lower(rrset->owner);
 
+       return knot_rdata_to_canonical(rrset->rrs.rdata->data, rrset->rrs.rdata->len, rrset->type);
+}
+
+_public_
+int knot_rdata_to_canonical(uint8_t *data, uint16_t len, uint16_t type)
+{
+       if (data == NULL) {
+               return KNOT_EINVAL;
+       }
+
        /* Convert DNAMEs in RDATA only for RFC4034 types. */
-       if (!knot_rrtype_should_be_lowercased(rrset->type)) {
+       if (!knot_rrtype_should_be_lowercased(type)) {
                return KNOT_EOK;
        }
 
-       const knot_rdata_descriptor_t *desc = knot_get_rdata_descriptor(rrset->type);
+       const knot_rdata_descriptor_t *desc = knot_get_rdata_descriptor(type);
        if (desc->type_name == NULL) {
-               desc = knot_get_obsolete_rdata_descriptor(rrset->type);
+               desc = knot_get_obsolete_rdata_descriptor(type);
        }
 
-       uint16_t rdlen = rrset->rrs.rdata->len;
-       uint8_t *pos = rrset->rrs.rdata->data;
+       uint16_t rdlen = len;
+       uint8_t *pos = data;
        uint8_t *endpos = pos + rdlen;
 
        /* No RDATA */
@@ -167,8 +177,8 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset)
 
        /* Otherwise, whole and not malformed RDATA are expected. */
        for (int i = 0; desc->block_types[i] != KNOT_RDATA_WF_END; ++i) {
-               int type = desc->block_types[i];
-               switch (type) {
+               int block_type = desc->block_types[i];
+               switch (block_type) {
                case KNOT_RDATA_WF_COMPRESSIBLE_DNAME:
                case KNOT_RDATA_WF_DECOMPRESSIBLE_DNAME:
                case KNOT_RDATA_WF_FIXED_DNAME:
@@ -187,8 +197,8 @@ int knot_rrset_rr_to_canonical(knot_rrset_t *rrset)
                        break;
                default:
                        /* Fixed size block */
-                       assert(type > 0);
-                       pos += type;
+                       assert(block_type > 0);
+                       pos += block_type;
                }
        }
 
index fdc57196a299a06909f1c5d75925911053965072..5b0d0297dd005ec450032ca73f87bcab9f24d08a 100644 (file)
@@ -184,6 +184,22 @@ bool knot_rrset_is_nsec3rel(const knot_rrset_t *rr);
  */
 int knot_rrset_rr_to_canonical(knot_rrset_t *rrset);
 
+/*!
+ * \brief Convert one Rdata into canonical format.
+ *
+ * RDATA domain names are converted only
+ * for types listed in RFC 4034, Section 6.2, except for NSEC (updated by
+ * RFC 6840, Section 5.1) and A6 (not supported).
+ *
+ * \warning This function expects either empty RDATA or full, not malformed
+ *          RDATA. If malformed RRSet is passed to this function, memory errors
+ *          may occur.
+ *
+ * \param data  Rdata to convert.
+ * \param len   Length of Rdata
+ * \param type  Type of Rdata.
+ */
+int knot_rdata_to_canonical(uint8_t *data, uint16_t len, uint16_t type);
 /*!
  * \brief Size of rrset in wire format.
  *
index db45ce0bd5430e142eef9e704a42232a6ac63562..b82abe318cd246fde12432c9559ccbe5fc23697c 100644 (file)
@@ -6,8 +6,8 @@ noinst_LTLIBRARIES += libknotus.la
 
 libknotus_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(gnutls_CFLAGS) \
                         $(libedit_CFLAGS) $(libidn2_CFLAGS) $(libidn_CFLAGS) \
-                        $(libkqueue_CFLAGS) $(libnghttp2_CFLAGS) $(lmdb_CFLAGS)
-libknotus_la_LDFLAGS  = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+                        $(libkqueue_CFLAGS) $(libnghttp2_CFLAGS) $(lmdb_CFLAGS) ${fuzzer_CFLAGS}
+libknotus_la_LDFLAGS  = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS) ${fuzzer_CFLAGS}
 libknotus_la_LIBADD   = $(libidn2_LIBS) $(libidn_LIBS) $(libnghttp2_LIBS)
 libknotus_LIBS        = libknotus.la libknot.la libdnssec.la $(libcontrib_LIBS) \
                         $(gnutls_LIBS) $(libedit_LIBS)