]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
daemon/defer: add hard-timeout for interrupting expensive computations
authorLukáš Ondráček <lukas.ondracek@nic.cz>
Thu, 9 Jan 2025 00:04:09 +0000 (01:04 +0100)
committerVladimír Čunát <vladimir.cunat@nic.cz>
Wed, 20 Aug 2025 10:56:22 +0000 (12:56 +0200)
daemon/defer.c
daemon/defer.h
daemon/lua/kres-gen-33.lua
daemon/main.c
doc/_static/config.schema.json
doc/user/config-defer.rst
python/knot_resolver/datamodel/defer_schema.py
python/knot_resolver/datamodel/templates/defer.lua.j2

index 5d0c2a43f113ffab9a619eb1917dbcb64a7a5d0a..bb9f2daba94146e9d4ed0d80ae4f00168ad532b7 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <math.h>
 #include <stdatomic.h>
+#include <unistd.h>
 #include "daemon/defer.h"
 #include "daemon/session2.h"
 #include "daemon/udp_queue.h"
@@ -65,6 +66,7 @@ struct defer {
        size_t capacity;
        kru_price_t max_decay;
        uint32_t log_period;
+       uint32_t hard_timeout;
        int cpus;
        bool using_avx2;
        _Atomic uint32_t log_time;
@@ -646,9 +648,36 @@ static void defer_queues_idle(uv_idle_t *handle)
        VERBOSE_LOG("POLL\n");
 }
 
+static void defer_alarm(int signum)
+{
+       if (!defer || (defer->hard_timeout == 0)) return;
+
+       uint64_t elapsed = 0;
+       if (defer_sample_state.is_accounting) {
+               elapsed = defer_get_stamp() - defer_sample_state.stamp;
+               VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n",
+                               signum ? "received" : "initialized",
+                               kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX
+       } else {
+               VERBOSE_LOG("SIGALRM %s, no measuring in progress\n",
+                               signum ? "received" : "initialized");
+       }
+       int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns
+       if (rest_to_timeout_ms <= 0) {
+               uv_update_time(uv_default_loop()); // TODO more conceptual solution?
+               defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
+               kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n",
+                       kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0);
+               classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX
+               __sync_synchronize();
+               abort();
+       }
+       alarm((rest_to_timeout_ms + 999) / 1000);
+}
 
 /// Initialize shared memory, queues. To be called from Lua.
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO possibly remove cpus; not needed
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus)
+       // TODO possibly remove cpus; not needed
 {
        defer_initialized = true;
        if (mmap_file == NULL) {
@@ -666,6 +695,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
                .capacity = KRU_CAPACITY,
                .max_decay = MAX_DECAY,
                .log_period = log_period,
+               .hard_timeout = hard_timeout,
                .cpus = cpus,
                .using_avx2 = using_avx2(),
        };
@@ -680,6 +710,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
                        sizeof(header.capacity) +
                        sizeof(header.max_decay) +
                        sizeof(header.log_period) +
+                       sizeof(header.hard_timeout) +
                        sizeof(header.cpus),
                "detected padding with undefined data inside mmapped header");
 
@@ -717,6 +748,9 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
        for (size_t i = 0; i < QUEUES_CNT; i++)
                queue_init(queues[i]);
 
+       signal(SIGALRM, defer_alarm);
+       defer_alarm(0);
+
        return 0;
 
 fail:
index e6ade87aad9d2cf2094c0ef3bda53755e15eae09..71769c6f44b313aeb602d78f5a9495ecdc599471 100644 (file)
@@ -9,9 +9,9 @@
 
 /// Initialize defer, incl. shared memory with KRU, excl. idle.
 KR_EXPORT
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus);
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus);
 
-/// Initialize idle.
+/// Initialize idle and SIGALRM handler.
 int defer_init_idle(uv_loop_t *loop);
 
 /// Deinitialize shared memory.
@@ -92,9 +92,10 @@ static inline void defer_sample_start_stamp(uint64_t stamp)
 {
        if (!defer) return;
        kr_assert(!defer_sample_state.is_accounting);
-       defer_sample_state.is_accounting = true;
        defer_sample_state.stamp = stamp;
        defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
+       __sync_synchronize();
+       defer_sample_state.is_accounting = true;
 }
 
 /// Internal; stop accounting work at specified timestamp and charge the source if applicable.
@@ -103,6 +104,7 @@ static inline void defer_sample_stop_stamp(uint64_t stamp)
        if (!defer) return;
        kr_assert(defer_sample_state.is_accounting);
        defer_sample_state.is_accounting = false;
+       __sync_synchronize();
 
        if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return;
 
@@ -159,7 +161,10 @@ static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reus
 
        // resume
        if (prev_state) {
-               defer_sample_state = *prev_state;
+               defer_sample_state.addr = prev_state->addr;
+               defer_sample_state.stream = prev_state->stream;
                defer_sample_state.stamp = stamp;
+               __sync_synchronize();
+               defer_sample_state.is_accounting = prev_state->is_accounting;
        }
 }
index 83269222ca51b42c72421d9a52f45d008bbac38d..6db038c788afd0f5a4f21349104bd883622f771a 100644 (file)
@@ -635,7 +635,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
 int zi_zone_import(const zi_config_t);
 _Bool ratelimiting_request_begin(struct kr_request *);
 int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
-int defer_init(const char *, uint32_t, int);
+int defer_init(const char *, uint32_t, uint32_t, int);
 void defer_set_price_factor16(struct kr_request *, uint32_t);
 struct engine {
        char _stub[];
index 3925a15c18c80b82b0c319d7951a418d6038f40b..801484184aea92aa4a76afbc337e0e3eba284e7c 100644 (file)
@@ -553,7 +553,7 @@ int main(int argc, char **argv)
 
        uv_loop_t *loop = uv_default_loop();
        /* Catch some signals. */
-       uv_signal_t sigint, sigterm, sigchld;
+       uv_signal_t sigint, sigterm, sigchld; // +SIGALRM handled by defer
        if (true) ret = uv_signal_init(loop, &sigint);
        if (!ret) ret = uv_signal_init(loop, &sigterm);
        if (!ret) ret = uv_signal_init(loop, &sigchld);
@@ -618,7 +618,7 @@ int main(int argc, char **argv)
 
        if (!defer_initialized) {
                kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n");
-               ret = defer_init("defer", 1, 1);
+               ret = defer_init("defer", 1, 0, 1);
                if (ret) {
                        ret = EXIT_FAILURE;
                        goto cleanup;
index 88e11901d818a2a2a999cb15a8cf32294196da20..47e9fd1b69331d986ef008bece5693cd3aec0d16 100644 (file)
                         }
                     },
                     "default": {
-                        "files_watchdog": true,
+                        "files_watchdog": false,
                         "cert_file": null,
                         "key_file": null,
                         "sticket_secret": null,
                 },
                 "address_renumbering": null,
                 "tls": {
-                    "files_watchdog": true,
+                    "files_watchdog": false,
                     "cert_file": null,
                     "key_file": null,
                     "sticket_secret": null,
                     "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
                     "description": "Minimal time between two log messages, or '0s' to disable.",
                     "default": "0s"
+                },
+                "hard-timeout": {
+                    "type": "string",
+                    "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
+                    "description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.",
+                    "default": "0s"
                 }
             },
             "default": {
                 "enabled": false,
-                "log_period": "0s"
+                "log_period": "0s",
+                "hard_timeout": "0s"
             }
         },
         "lua": {
index 4d26ee403c21f2637bde85de7c76cd545789d6d7..2baafa958a283cfe6cc9d5b667d9f09384b12769 100644 (file)
@@ -8,6 +8,7 @@ Request prioritization (defer)
 Defer tries to mitigate DoS attacks by measuring cpu time consumption of different hosts and networks
 and deferring future requests from the same origin.
 If there is not enough time to process all the requests, the lowest priority ones are dropped.
+It also allows setting a hard timeout on a continuous computation on a single request.
 
 The time measurements are taken into account only for TCP-based queries (including DoT and DoH),
 as the source address of plain UDP can be forged.
@@ -46,6 +47,30 @@ The limits can be adjusted for different packet origins using :option:`price-fac
     and sources with more dropped queries have greater probability to be chosen.
 
 
+.. option:: defer/hard-timeout: <time ms|s|m|h|d>
+
+    :default: 0s
+
+    Time limit for a cpu time consumed continuously on a single request, or ``0s`` to disable.
+    It causes crash of kresd if exceeded; use carefully.
+
+    This is intended as a last resort defence against yet unknown bugs
+    allowing an attacker to initiate very expensive computations by a single request
+    resulting in freezing kresd process for several seconds or minutes.
+
+    It is based on scheduling a SIGALRM to be delivered after the timeout (or up to 1s later),
+    which then interrupts the computation.
+    After the interrupt the priority of the request's origin is decreased according to the duration,
+    the kresd process is terminated (dropping all pending, but probably already timeouted, requests)
+    and started again by manager.
+    To keep the data with measurements and priorities alive during restart,
+    it is crucial to use :ref:`multiple workers <config-multiple-workers>`
+    as those data are shared between them and disappear with the last one.
+
+    A continuous work on a single request usually takes under 1 ms. (TODO check)
+    Set the timeout at least to several seconds to avoid random crashes. (TODO or more?)
+
+
 Implementation details
 ----------------------
 
@@ -64,4 +89,3 @@ Further ordering is according to the time of arrival.
 If a request is deferred for too long, it gets dropped.
 This can happen also for UDP requests,
 which are stored in a single queue ordered by the time of their arrival.
-
index 81546a0756841e5a2671e7b2e47281763a3f8f83..8ec7cf8080a4de37c06653c3937ec19c9e5ff1dd 100644 (file)
@@ -9,7 +9,9 @@ class DeferSchema(ConfigSchema):
     ---
     enabled: Use request prioritization.
     log_period: Minimal time between two log messages, or '0s' to disable.
+    hard_timeout: If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.
     """
 
     enabled: bool = False
     log_period: TimeUnit = TimeUnit("0s")
+    hard_timeout: TimeUnit = TimeUnit("0s")
index 5dacff75fbc60f76c8d901a01a8c90dcbdd6122d..e158f5e486895e3999fcaabeade05c3a0f45b9d8 100644 (file)
@@ -4,7 +4,8 @@
 assert(C.defer_init(
        '{{ cfg.rundir }}/defer',
        {{ cfg.defer.log_period.millis() }},
+       {{ cfg.defer.hard_timeout.millis() }},
        {{ cfg.workers }}) == 0)
 {% else %}
-assert(C.defer_init(nil, 0, 0) == 0)
+assert(C.defer_init(nil, 0, 0, 0) == 0)
 {%- endif %}