rrl: tidying

author Lukáš Ondráček <lukas.ondracek@nic.cz>

Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)

committer Lukáš Ondráček <lukas.ondracek@nic.cz>

Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)
author Lukáš Ondráček <lukas.ondracek@nic.cz>
Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)
committer Lukáš Ondráček <lukas.ondracek@nic.cz>
Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)
diff --git a/daemon/ratelimiting.test/tests-parallel.c b/daemon/ratelimiting.test/tests-parallel.c

index 8efeb2fa7951d0f957f2631b757904e52fcb69f9..a329595595aeafbb20d265df22a7eb2fb2253cd5 100644 (file)
--- a/daemon/ratelimiting.test/tests-parallel.c
+++ b/daemon/ratelimiting.test/tests-parallel.c
@@ -16,7 +16,8 @@
  
  static void the_tests(void **state);
  
-#include "./tests.inc.c"
+#include "./tests.inc.c"  // NOLINT(bugprone-suspicious-include)
+
  
  #define THREADS 4
  
diff --git a/daemon/ratelimiting.test/tests.c b/daemon/ratelimiting.test/tests.c

index ad111dd5e8f86ea49d8761771f9609d185338a6d..04e445ce67ab7c4d1884f772087ebf809890734f 100644 (file)
--- a/daemon/ratelimiting.test/tests.c
+++ b/daemon/ratelimiting.test/tests.c
@@ -16,7 +16,7 @@
  
  static void the_tests(void **state);
  
-#include "./tests.inc.c"
+#include "./tests.inc.c"  // NOLINT(bugprone-suspicious-include)
  
  // defining count_test as macro to let it print usable line number on failure
  #define count_test(DESC, EXPECTED_PASSING, MARGIN_FRACT, ...) { \
diff --git a/lib/kru-avx2.c b/lib/kru-avx2.c

index 453d1336d1c31f8ec394d1142c63cc539877e1bc..e6875f75f852dc0bfae3e38ff877afa3b6077250 100644 (file)
--- a/lib/kru-avx2.c
+++ b/lib/kru-avx2.c
@@ -41,7 +41,7 @@
  #define USE_SSE41 1
  
  #include "lib/defines.h"
-#include "./kru.inc.c"
+#include "./kru.inc.c"  // NOLINT(bugprone-suspicious-include)
  KR_EXPORT
  const struct kru_api KRU_AVX2 = KRU_API_INITIALIZER;
  
diff --git a/lib/kru-decay.inc.c b/lib/kru-decay.inc.c

deleted file mode 100644 (file)

index 002007b..0000000
--- a/lib/kru-decay.inc.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*  Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-#include <math.h>
-
-/// Parametrization for speed of decay.
-struct decay_config {
-       /// Bit shift per tick, fractional
-       double shift_bits;
-
-       /// Ticks to get zero loads
-       uint32_t max_ticks;
-
-       uint32_t mult_cache[32];
-};
-
-static inline void decay_initialize(struct decay_config *decay, kru_price_t max_decay)
-{
-       decay->shift_bits = log2(KRU_LIMIT - 1) - log2(KRU_LIMIT - 1 - max_decay);
-       decay->max_ticks = 18 / decay->shift_bits;
-
-       decay->mult_cache[0] = 0;  // not used
-       for (size_t ticks = 1; ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache); ticks++) {
-               decay->mult_cache[ticks] = exp2(32 - decay->shift_bits * ticks) + 0.5;
-       }
-}
-
-/// Catch up the time drift with configurably slower decay.
-static inline void update_time(struct load_cl *l, const uint32_t time_now,
-                       const struct decay_config *decay)
-{
-       uint32_t ticks;
-       uint32_t time_last = atomic_load_explicit(&l->time, memory_order_relaxed);
-       do {
-               ticks = time_now - time_last;
-               if (__builtin_expect(!ticks, true)) // we optimize for time not advancing
-                       return;
-               // We accept some desynchronization of time_now (e.g. from different threads).
-               if (ticks > (uint32_t)-1024)
-                       return;
-       } while (!atomic_compare_exchange_weak_explicit(&l->time, &time_last, time_now, memory_order_relaxed, memory_order_relaxed));
-
-       // If we passed here, we have acquired a time difference we are responsibe for.
-
-       // Don't bother with complex computations if lots of ticks have passed. (little to no speed-up)
-       if (ticks > decay->max_ticks) {
-               memset(l->loads, 0, sizeof(l->loads));
-               return;
-       }
-
-       uint32_t mult;
-       if (__builtin_expect(ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache), 1)) {
-               mult = decay->mult_cache[ticks];
-       } else {
-               mult = exp2(32 - decay->shift_bits * ticks) + 0.5;
-       }
-
-       for (int i = 0; i < LOADS_LEN; ++i) {
-               // We perform decay for the acquired time difference; decays from different threads are commutative.
-               _Atomic uint16_t *load_at = (_Atomic uint16_t *)&l->loads[i];
-               uint16_t l1, load_orig = atomic_load_explicit(load_at, memory_order_relaxed);
-               const uint16_t rnd = rand_bits(16);
-               do {
-                       uint64_t m = (((uint64_t)load_orig << 16)) * mult;
-                       m = (m >> 32) + ((m >> 31) & 1);
-                       l1 = (m >> 16) + (rnd < (uint16_t)m);
-               } while (!atomic_compare_exchange_weak_explicit(load_at, &load_orig, l1, memory_order_relaxed, memory_order_relaxed));
-       }
-}
diff --git a/lib/kru-generic.c b/lib/kru-generic.c

index 8d122fd834c8532d69a5415e4ccecef3616e42ee..a6e4f3196514c595fddc467e28ac15639731099b 100644 (file)
--- a/lib/kru-generic.c
+++ b/lib/kru-generic.c
@@ -15,7 +15,7 @@
   */
  
  #include "lib/defines.h"
-#include "./kru.inc.c"
+#include "./kru.inc.c"  // NOLINT(bugprone-suspicious-include)
  
  KR_EXPORT
  const struct kru_api KRU_GENERIC = KRU_API_INITIALIZER;
diff --git a/lib/kru.inc.c b/lib/kru.inc.c

index 5f630fb78fff9ac242242877cde2a91070e2e0ba..6d75fd7e9aaa197a649b00c98da3ec70fd861082 100644 (file)
--- a/lib/kru.inc.c
+++ b/lib/kru.inc.c
@@ -46,30 +46,9 @@ Size (`loads_bits` = log2 length):
  #include <stdbool.h>
  #include <stddef.h>
  #include <string.h>
+#include <math.h>
  
  #include "./kru.h"
-
-/// Block of loads sharing the same time, so that we're more space-efficient.
-/// It's exactly a single cache line.
-struct load_cl {
-       _Atomic uint32_t time;
-       #define LOADS_LEN 15
-       uint16_t ids[LOADS_LEN];
-       uint16_t loads[LOADS_LEN];
-} ALIGNED_CPU_CACHE;
-static_assert(64 == sizeof(struct load_cl), "bad size of struct load_cl");
-
-inline static uint64_t rand_bits(unsigned int bits) {
-       static _Thread_local uint64_t state = 3723796604792068981ull;
-       const uint64_t prime1 = 11737314301796036329ull;
-       const uint64_t prime2 = 3107264277052274849ull;
-       state = prime1 * state + prime2;
-       //return state & ((1 << bits) - 1);
-       return state >> (64 - bits);
-}
-
-#include "./kru-decay.inc.c"
-
  #include "contrib/ucw/lib.h"
  #include "libdnssec/error.h"
  #include "libdnssec/random.h"
@@ -92,6 +71,27 @@ typedef uint64_t hash_t;
         #include <x86intrin.h>
  #endif
  
+/// Block of loads sharing the same time, so that we're more space-efficient.
+/// It's exactly a single cache line.
+struct load_cl {
+       _Atomic uint32_t time;
+       #define LOADS_LEN 15
+       uint16_t ids[LOADS_LEN];
+       uint16_t loads[LOADS_LEN];
+} ALIGNED_CPU_CACHE;
+static_assert(64 == sizeof(struct load_cl), "bad size of struct load_cl");
+
+/// Parametrization for speed of decay.
+struct decay_config {
+       /// Bit shift per tick, fractional
+       double shift_bits;
+
+       /// Ticks to get zero loads
+       uint32_t max_ticks;
+
+       uint32_t mult_cache[32];
+};
+
  struct kru {
  #if USE_AES
         /// Hashing secret.  Random but shared by all users of the table.
@@ -111,6 +111,69 @@ struct kru {
         struct load_cl load_cls[][TABLE_COUNT];
  };
  
+inline static uint64_t rand_bits(unsigned int bits) {
+       static _Thread_local uint64_t state = 3723796604792068981ull;
+       const uint64_t prime1 = 11737314301796036329ull;
+       const uint64_t prime2 = 3107264277052274849ull;
+       state = prime1 * state + prime2;
+       //return state & ((1 << bits) - 1);
+       return state >> (64 - bits);
+}
+
+static inline void decay_initialize(struct decay_config *decay, kru_price_t max_decay)
+{
+       decay->shift_bits = log2(KRU_LIMIT - 1) - log2(KRU_LIMIT - 1 - max_decay);
+       decay->max_ticks = 18 / decay->shift_bits;
+
+       decay->mult_cache[0] = 0;  // not used
+       for (size_t ticks = 1; ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache); ticks++) {
+               decay->mult_cache[ticks] = exp2(32 - decay->shift_bits * ticks) + 0.5;
+       }
+}
+
+/// Catch up the time drift with configurably slower decay.
+static inline void update_time(struct load_cl *l, const uint32_t time_now,
+                       const struct decay_config *decay)
+{
+       uint32_t ticks;
+       uint32_t time_last = atomic_load_explicit(&l->time,memory_order_relaxed);
+       do {
+               ticks = time_now - time_last;
+               if (__builtin_expect(!ticks, true)) // we optimize for time not advancing
+                       return;
+               // We accept some desynchronization of time_now (e.g. from different threads).
+               if (ticks > (uint32_t)-1024)
+                       return;
+       } while (!atomic_compare_exchange_weak_explicit(&l->time, &time_last, time_now, memory_order_relaxed, memory_order_relaxed));
+
+       // If we passed here, we have acquired a time difference we are responsibe for.
+
+       // Don't bother with complex computations if lots of ticks have passed. (little to no speed-up)
+       if (ticks > decay->max_ticks) {
+               memset(l->loads, 0, sizeof(l->loads));
+               return;
+       }
+
+       uint32_t mult;
+       if (__builtin_expect(ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache), 1)) {
+               mult = decay->mult_cache[ticks];
+       } else {
+               mult = exp2(32 - decay->shift_bits * ticks) + 0.5;
+       }
+
+       for (int i = 0; i < LOADS_LEN; ++i) {
+               // We perform decay for the acquired time difference; decays from different threads are commutative.
+               _Atomic uint16_t *load_at = (_Atomic uint16_t *)&l->loads[i];
+               uint16_t l1, load_orig = atomic_load_explicit(load_at, memory_order_relaxed);
+               const uint16_t rnd = rand_bits(16);
+               do {
+                       uint64_t m = (((uint64_t)load_orig << 16)) * mult;
+                       m = (m >> 32) + ((m >> 31) & 1);
+                       l1 = (m >> 16) + (rnd < (uint16_t)m);
+               } while (!atomic_compare_exchange_weak_explicit(load_at, &load_orig, l1, memory_order_relaxed, memory_order_relaxed));
+       }
+}
+
  /// Convert capacity_log to loads_bits
  static inline int32_t capacity2loads(int capacity_log)
  {
author	Lukáš Ondráček <lukas.ondracek@nic.cz>
	Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)
committer	Lukáš Ondráček <lukas.ondracek@nic.cz>
	Wed, 29 May 2024 12:42:51 +0000 (14:42 +0200)
daemon/ratelimiting.test/tests-parallel.c		patch \| blob \| blame \| history
daemon/ratelimiting.test/tests.c		patch \| blob \| blame \| history
lib/kru-avx2.c		patch \| blob \| blame \| history
lib/kru-decay.inc.c	[deleted file]	patch \| blob \| blame \| history
lib/kru-generic.c		patch \| blob \| blame \| history
lib/kru.inc.c		patch \| blob \| blame \| history