daemon/defer: add hard-timeout for interrupting expensive computations

author Lukáš Ondráček <lukas.ondracek@nic.cz>

Thu, 9 Jan 2025 00:04:09 +0000 (01:04 +0100)

committer Lukáš Ondráček <lukas.ondracek@nic.cz>

Wed, 5 Mar 2025 14:51:41 +0000 (15:51 +0100)
author Lukáš Ondráček <lukas.ondracek@nic.cz>
Thu, 9 Jan 2025 00:04:09 +0000 (01:04 +0100)
committer Lukáš Ondráček <lukas.ondracek@nic.cz>
Wed, 5 Mar 2025 14:51:41 +0000 (15:51 +0100)
diff --git a/daemon/defer.c b/daemon/defer.c

index aeaded206d0f2ead4f2035667b5803ad7cafc579..42bd5c11dea581525ca53b1e09377787ed0bf47a 100644 (file)
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -4,6 +4,7 @@
  
  #include <math.h>
  #include <stdatomic.h>
+#include <unistd.h>
  #include "daemon/defer.h"
  #include "daemon/session2.h"
  #include "daemon/udp_queue.h"
@@ -63,6 +64,7 @@ struct defer {
         size_t capacity;
         kru_price_t max_decay;
         uint32_t log_period;
+       uint32_t hard_timeout;
         int cpus;
         bool using_avx2;
         _Atomic uint32_t log_time;
@@ -642,9 +644,36 @@ static void defer_queues_idle(uv_idle_t *handle)
         VERBOSE_LOG("POLL\n");
  }
  
+static void defer_alarm(int signum)
+{
+       if (!defer || (defer->hard_timeout == 0)) return;
+
+       uint64_t elapsed = 0;
+       if (defer_sample_state.is_accounting) {
+               elapsed = defer_get_stamp() - defer_sample_state.stamp;
+               VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n",
+                               signum ? "received" : "initialized",
+                               kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX
+       } else {
+               VERBOSE_LOG("SIGALRM %s, no measuring in progress\n",
+                               signum ? "received" : "initialized");
+       }
+       int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns
+       if (rest_to_timeout_ms <= 0) {
+               uv_update_time(uv_default_loop()); // TODO more conceptual solution?
+               defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
+               kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n",
+                       kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0);
+               classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX
+               __sync_synchronize();
+               abort();
+       }
+       alarm((rest_to_timeout_ms + 999) / 1000);
+}
  
  /// Initialize shared memory, queues. To be called from Lua.
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO possibly remove cpus; not needed
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus)
+       // TODO possibly remove cpus; not needed
  {
         defer_initialized = true;
         if (mmap_file == NULL) {
@@ -662,6 +691,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
                 .capacity = KRU_CAPACITY,
                 .max_decay = MAX_DECAY,
                 .log_period = log_period,
+               .hard_timeout = hard_timeout,
                 .cpus = cpus,
                 .using_avx2 = using_avx2(),
         };
@@ -676,6 +706,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
                         sizeof(header.capacity) +
                         sizeof(header.max_decay) +
                         sizeof(header.log_period) +
+                       sizeof(header.hard_timeout) +
                         sizeof(header.cpus),
                 "detected padding with undefined data inside mmapped header");
  
@@ -713,6 +744,9 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
         for (size_t i = 0; i < QUEUES_CNT; i++)
                 queue_init(queues[i]);
  
+       signal(SIGALRM, defer_alarm);
+       defer_alarm(0);
+
         return 0;
  
  fail:
diff --git a/daemon/defer.h b/daemon/defer.h

index e6ade87aad9d2cf2094c0ef3bda53755e15eae09..71769c6f44b313aeb602d78f5a9495ecdc599471 100644 (file)
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -9,9 +9,9 @@
  
  /// Initialize defer, incl. shared memory with KRU, excl. idle.
  KR_EXPORT
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus);
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus);
  
-/// Initialize idle.
+/// Initialize idle and SIGALRM handler.
  int defer_init_idle(uv_loop_t *loop);
  
  /// Deinitialize shared memory.
@@ -92,9 +92,10 @@ static inline void defer_sample_start_stamp(uint64_t stamp)
  {
         if (!defer) return;
         kr_assert(!defer_sample_state.is_accounting);
-       defer_sample_state.is_accounting = true;
         defer_sample_state.stamp = stamp;
         defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
+       __sync_synchronize();
+       defer_sample_state.is_accounting = true;
  }
  
  /// Internal; stop accounting work at specified timestamp and charge the source if applicable.
@@ -103,6 +104,7 @@ static inline void defer_sample_stop_stamp(uint64_t stamp)
         if (!defer) return;
         kr_assert(defer_sample_state.is_accounting);
         defer_sample_state.is_accounting = false;
+       __sync_synchronize();
  
         if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return;
  
@@ -159,7 +161,10 @@ static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reus
  
         // resume
         if (prev_state) {
-               defer_sample_state = *prev_state;
+               defer_sample_state.addr = prev_state->addr;
+               defer_sample_state.stream = prev_state->stream;
                 defer_sample_state.stamp = stamp;
+               __sync_synchronize();
+               defer_sample_state.is_accounting = prev_state->is_accounting;
         }
  }
diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua

index 8147af88f8c1228a696e347aec8f37a90fc37a96..f779002798fa985e5b316fa5ef6a4aba8d95fb61 100644 (file)
--- a/daemon/lua/kres-gen-33.lua
+++ b/daemon/lua/kres-gen-33.lua
@@ -620,7 +620,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
  int zi_zone_import(const zi_config_t);
  _Bool ratelimiting_request_begin(struct kr_request *);
  int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
-int defer_init(const char *, uint32_t, int);
+int defer_init(const char *, uint32_t, uint32_t, int);
  void defer_set_price_factor16(struct kr_request *, uint32_t);
  struct engine {
         char _stub[];
diff --git a/daemon/main.c b/daemon/main.c

index a7b9c92b6d9c304c462f44d297c3fd783700ec42..5f0717d112a68eb37ab82221b5d5a697b2fe23ef 100644 (file)
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -553,7 +553,7 @@ int main(int argc, char **argv)
  
         uv_loop_t *loop = uv_default_loop();
         /* Catch some signals. */
-       uv_signal_t sigint, sigterm, sigchld;
+       uv_signal_t sigint, sigterm, sigchld; // +SIGALRM handled by defer
         if (true) ret = uv_signal_init(loop, &sigint);
         if (!ret) ret = uv_signal_init(loop, &sigterm);
         if (!ret) ret = uv_signal_init(loop, &sigchld);
@@ -618,7 +618,7 @@ int main(int argc, char **argv)
  
         if (!defer_initialized) {
                 kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n");
-               ret = defer_init("defer", 1, 1);
+               ret = defer_init("defer", 1, 0, 1);
                 if (ret) {
                         ret = EXIT_FAILURE;
                         goto cleanup;
diff --git a/doc/_static/config.schema.json b/doc/_static/config.schema.json

index 0bedbbc4ed1b314e73bd6539b33142f0148b0452..b9063fd4b9c6945a48336703ce5adc71a946cc00 100644 (file)
--- a/doc/_static/config.schema.json
+++ b/doc/_static/config.schema.json
@@ -374,7 +374,7 @@
                          }
                      },
                      "default": {
-                        "files_watchdog": true,
+                        "files_watchdog": false,
                          "cert_file": null,
                          "key_file": null,
                          "sticket_secret": null,
@@ -533,7 +533,7 @@
                  },
                  "address_renumbering": null,
                  "tls": {
-                    "files_watchdog": true,
+                    "files_watchdog": false,
                      "cert_file": null,
                      "key_file": null,
                      "sticket_secret": null,
@@ -1751,11 +1751,18 @@
                      "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
                      "description": "Minimal time between two log messages, or '0s' to disable.",
                      "default": "0s"
+                },
+                "hard-timeout": {
+                    "type": "string",
+                    "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
+                    "description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.",
+                    "default": "0s"
                  }
              },
              "default": {
                  "enabled": false,
-                "log_period": "0s"
+                "log_period": "0s",
+                "hard_timeout": "0s"
              }
          },
          "lua": {
diff --git a/doc/user/config-defer.rst b/doc/user/config-defer.rst

index 4d26ee403c21f2637bde85de7c76cd545789d6d7..2baafa958a283cfe6cc9d5b667d9f09384b12769 100644 (file)
--- a/doc/user/config-defer.rst
+++ b/doc/user/config-defer.rst
@@ -8,6 +8,7 @@ Request prioritization (defer)
  Defer tries to mitigate DoS attacks by measuring cpu time consumption of different hosts and networks
  and deferring future requests from the same origin.
  If there is not enough time to process all the requests, the lowest priority ones are dropped.
+It also allows setting a hard timeout on a continuous computation on a single request.
  
  The time measurements are taken into account only for TCP-based queries (including DoT and DoH),
  as the source address of plain UDP can be forged.
@@ -46,6 +47,30 @@ The limits can be adjusted for different packet origins using :option:`price-fac
      and sources with more dropped queries have greater probability to be chosen.
  
  
+.. option:: defer/hard-timeout: <time ms|s|m|h|d>
+
+    :default: 0s
+
+    Time limit for a cpu time consumed continuously on a single request, or ``0s`` to disable.
+    It causes crash of kresd if exceeded; use carefully.
+
+    This is intended as a last resort defence against yet unknown bugs
+    allowing an attacker to initiate very expensive computations by a single request
+    resulting in freezing kresd process for several seconds or minutes.
+
+    It is based on scheduling a SIGALRM to be delivered after the timeout (or up to 1s later),
+    which then interrupts the computation.
+    After the interrupt the priority of the request's origin is decreased according to the duration,
+    the kresd process is terminated (dropping all pending, but probably already timeouted, requests)
+    and started again by manager.
+    To keep the data with measurements and priorities alive during restart,
+    it is crucial to use :ref:`multiple workers <config-multiple-workers>`
+    as those data are shared between them and disappear with the last one.
+
+    A continuous work on a single request usually takes under 1 ms. (TODO check)
+    Set the timeout at least to several seconds to avoid random crashes. (TODO or more?)
+
+
  Implementation details
  ----------------------
  
@@ -64,4 +89,3 @@ Further ordering is according to the time of arrival.
  If a request is deferred for too long, it gets dropped.
  This can happen also for UDP requests,
  which are stored in a single queue ordered by the time of their arrival.
-
diff --git a/python/knot_resolver/datamodel/defer_schema.py b/python/knot_resolver/datamodel/defer_schema.py

index 81546a0756841e5a2671e7b2e47281763a3f8f83..8ec7cf8080a4de37c06653c3937ec19c9e5ff1dd 100644 (file)
--- a/python/knot_resolver/datamodel/defer_schema.py
+++ b/python/knot_resolver/datamodel/defer_schema.py
@@ -9,7 +9,9 @@ class DeferSchema(ConfigSchema):
      ---
      enabled: Use request prioritization.
      log_period: Minimal time between two log messages, or '0s' to disable.
+    hard_timeout: If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.
      """
  
      enabled: bool = False
      log_period: TimeUnit = TimeUnit("0s")
+    hard_timeout: TimeUnit = TimeUnit("0s")
diff --git a/python/knot_resolver/datamodel/templates/defer.lua.j2 b/python/knot_resolver/datamodel/templates/defer.lua.j2

index 131b71c4760943e000e8328e75e4900659efe933..18549aaf90b92ddb9dfdbb57a90008de6b018e3d 100644 (file)
--- a/python/knot_resolver/datamodel/templates/defer.lua.j2
+++ b/python/knot_resolver/datamodel/templates/defer.lua.j2
@@ -4,7 +4,8 @@
  assert(C.defer_init(
         '{{ cfg.rundir }}/defer',
         {{ cfg.defer.log_period.millis() }},
+       {{ cfg.defer.hard_timeout.millis() }},
         {{ cfg.workers }}) == 0)
  {% else %}
-assert(C.defer_init(nil, 0, 0) == 0)
+assert(C.defer_init(nil, 0, 0, 0) == 0)
  {%- endif %}
author	Lukáš Ondráček <lukas.ondracek@nic.cz>
	Thu, 9 Jan 2025 00:04:09 +0000 (01:04 +0100)
committer	Lukáš Ondráček <lukas.ondracek@nic.cz>
	Wed, 5 Mar 2025 14:51:41 +0000 (15:51 +0100)
daemon/defer.c		patch \| blob \| blame \| history
daemon/defer.h		patch \| blob \| blame \| history
daemon/lua/kres-gen-33.lua		patch \| blob \| blame \| history
daemon/main.c		patch \| blob \| blame \| history
doc/_static/config.schema.json		patch \| blob \| blame \| history
doc/user/config-defer.rst		patch \| blob \| blame \| history
python/knot_resolver/datamodel/defer_schema.py		patch \| blob \| blame \| history
python/knot_resolver/datamodel/templates/defer.lua.j2		patch \| blob \| blame \| history