kru: edit prepare for domain name score

author Hynek Šabacký <hynek.sabacky@nic.cz>

Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)

committer Hynek Šabacký <hynek.sabacky@nic.cz>

Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)
author Hynek Šabacký <hynek.sabacky@nic.cz>
Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)
committer Hynek Šabacký <hynek.sabacky@nic.cz>
Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)
diff --git a/daemon/ratelimiting.c b/daemon/ratelimiting.c

index d182658e49b5aa090ea49a8dac23440eb6605036..c1aa9cc4047780a09b1cd315a8774bbbad49cdd3 100644 (file)
--- a/daemon/ratelimiting.c
+++ b/daemon/ratelimiting.c
@@ -141,6 +141,7 @@ bool ratelimiting_request_begin(struct kr_request *req)
         // classify
         _Alignas(16) uint8_t key[16] = {0, };
         uint8_t limited_prefix;
+       struct limited_info combined_prefix;
         if (req->qsource.addr->sa_family == AF_INET6) {
                 struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)req->qsource.addr;
                 memcpy(key, &ipv6->sin6_addr, 16);
@@ -151,7 +152,7 @@ bool ratelimiting_request_begin(struct kr_request *req)
                         prices[i] = (req->qsource.price_factor16
                                         * (uint64_t)ratelimiting->v6_prices[i] + (1<<15)) >> 16;
                 }
-               limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
+               combined_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
                                 1, key, V6_PREFIXES, prices, V6_PREFIXES_CNT, NULL);
         } else {
                 struct sockaddr_in *ipv4 = (struct sockaddr_in *)req->qsource.addr;
@@ -163,9 +164,11 @@ bool ratelimiting_request_begin(struct kr_request *req)
                         prices[i] = (req->qsource.price_factor16
                                         * (uint64_t)ratelimiting->v4_prices[i] + (1<<15)) >> 16;
                 }
-               limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
+               combined_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
                                 0, key, V4_PREFIXES, prices, V4_PREFIXES_CNT, NULL);
         }
+       if (combined_prefix.score < 255) return false;
+       limited_prefix = combined_prefix.prefix;
         if (!limited_prefix) return false;  // not limited
  
         // slip: truncating vs dropping
diff --git a/daemon/ratelimiting.h b/daemon/ratelimiting.h

index 2e9ccc4dea08f98fddc1ce1cce6eed1371e4e207..056cd0aee90dbab7591d411a24f726d7754b16c0 100644 (file)
--- a/daemon/ratelimiting.h
+++ b/daemon/ratelimiting.h
@@ -5,6 +5,7 @@
  #include <stdbool.h>
  #include "lib/defines.h"
  #include "lib/utils.h"
+#include "lib/kru.h"
  struct kr_request;
  
  /** Initialize rate-limiting with shared mmapped memory.
diff --git a/lib/kru.h b/lib/kru.h

index b3690703d8e6038493046a140d8adb0dcf7f2522..32be8ff47cac2a957f404fd78d2bbf6d0c1da822 100644 (file)
--- a/lib/kru.h
+++ b/lib/kru.h
@@ -34,6 +34,11 @@ typedef uint32_t kru_price_t;
  
  struct kru;
  
+struct limited_info {
+       uint8_t score;
+       uint8_t prefix;
+};
+
  /// Usage: KRU.limited(...)
  struct kru_api {
         /// Initialize a new KRU structure that can track roughly 2^capacity_log limited keys.
@@ -80,7 +85,7 @@ struct kru_api {
         /// they just extend the keys to allow storing different noncolliding sets of them in the same table (such as IPv4 and IPv6).
         /// If zero is returned, *max_load_out (unless NULL) is set to
         /// the maximum of final values of the involved counters normalized to the limit 2^16.
-       uint8_t (*limited_multi_prefix_or)(struct kru *kru, uint32_t time_now,
+       struct limited_info (*limited_multi_prefix_or)(struct kru *kru, uint32_t time_now,
                         uint8_t namespace, uint8_t key[static 16], uint8_t *prefixes, kru_price_t *prices, size_t queries_cnt, uint16_t *max_load_out);
  
         /// Multiple queries based on different prefixes of a single key.
diff --git a/lib/kru.inc.c b/lib/kru.inc.c

index 166e1004827aa0374d3a505fee2c290018556714..3ee0b1d68ffb518821e8bb107255aba754c1850f 100644 (file)
--- a/lib/kru.inc.c
+++ b/lib/kru.inc.c
@@ -76,8 +76,9 @@ typedef uint64_t hash_t;
  struct load_cl {
         ALIGNED_CPU_CACHE
         _Atomic uint32_t time;
-       #define LOADS_LEN 15
+       #define LOADS_LEN 10
         uint16_t ids[LOADS_LEN];
+       uint16_t cdnls[LOADS_LEN];
         uint16_t loads[LOADS_LEN];
  };
  static_assert(64 == sizeof(struct load_cl), "bad size of struct load_cl");
@@ -179,8 +180,8 @@ static inline void update_time(struct load_cl *l, const uint32_t time_now,
  /// Convert capacity_log to loads_bits
  static inline int32_t capacity2loads(int capacity_log)
  {
-       static_assert(LOADS_LEN == 15 && TABLE_COUNT == 2, "");
-       // So, the pair of cache lines hold up to 2*15 elements.
+       static_assert(LOADS_LEN == 10 && TABLE_COUNT == 2, "");
+       // So, the pair of cache lines hold up to 2*10 elements.
         // Let's say that we can reliably store 16 = 1 << (1+3).
         // (probably more but certainly not 1 << 5)
         const int shift = 1 + 3;
@@ -234,6 +235,7 @@ struct query_ctx {
         uint32_t limit16;  // 2^16 has to be representable
         uint16_t id;
         uint16_t final_load_value;  // set by kru_limited_update if not blocked
+       uint16_t cdnl;
         uint16_t *load;
  };
  
@@ -348,7 +350,7 @@ static inline void kru_limited_prefetch_prefix(struct kru *kru, uint32_t time_no
  }
  
  /// Phase 2/3 of a query -- returns answer with no state modification (except update_time).
-static inline bool kru_limited_fetch(struct kru *kru, struct query_ctx *ctx)
+static inline uint8_t kru_limited_fetch(struct kru *kru, struct query_ctx *ctx)
  {
         // Compute 16-bit limit and price.
         // For 32-bit prices we assume that a 16-bit load value corresponds
@@ -387,13 +389,13 @@ static inline bool kru_limited_fetch(struct kru *kru, struct query_ctx *ctx)
  #else
         const __m256i id_v = _mm256_set1_epi16(id);
         for (int li = 0; li < TABLE_COUNT; ++li) {
-               static_assert(LOADS_LEN == 15 && sizeof(ctx->l[li]->ids[0]) == 2, "");
+               static_assert(LOADS_LEN == 10 && sizeof(ctx->l[li]->ids[0]) == 2, "");
                 // unfortunately we can't use aligned load here
-               __m256i ids_v = _mm256_loadu_si256((__m256i *)((uint16_t *)ctx->l[li]->ids - 1));
+               __m256i ids_v = _mm256_loadu_si256((__m256i *)((uint16_t *)ctx->l[li]->ids - 6));
                 __m256i match_mask = _mm256_cmpeq_epi16(ids_v, id_v);
                 if (_mm256_testz_si256(match_mask, match_mask))
                         continue; // no match of id
-               int index = _bit_scan_reverse(_mm256_movemask_epi8(match_mask)) / 2 - 1;
+               int index = _bit_scan_reverse(_mm256_movemask_epi8(match_mask)) / 2 - 6;
                 // there's a small possibility that we hit equality only on the -1 index
                 if (index >= 0) {
                         ctx->load = &ctx->l[li]->loads[index];
@@ -407,7 +409,12 @@ static inline bool kru_limited_fetch(struct kru *kru, struct query_ctx *ctx)
  
  load_found:;
         ctx->final_load_value = *ctx->load;
-       return (ctx->final_load_value >= ctx->limit16);
+       //return (ctx->final_load_value << 8) / ctx->limit16; 
+       if (ctx->final_load_value >= ctx->limit16) {
+               return 255;
+       } else {
+               return (uint8_t)(((uint32_t)(ctx->final_load_value) * 255) / (uint32_t)ctx->limit16);
+       }
  }
  
  /// Phase 3/3 of a query -- state update, return value overrides previous answer in case of race.
@@ -431,15 +438,15 @@ static inline bool kru_limited_update(struct kru *kru, struct query_ctx *ctx, bo
                 for (int li = 0; li < TABLE_COUNT; ++li) {
                         // BEWARE: we're relying on the exact memory layout of struct load_cl,
                         //  where the .loads array take 15 16-bit values at the very end.
-                       static_assert((offsetof(struct load_cl, loads) - 2) % 16 == 0,
+                       static_assert((offsetof(struct load_cl, loads) - 12) % 16 == 0,
                                         "bad alignment of struct load_cl::loads");
-                       static_assert(LOADS_LEN == 15 && sizeof(ctx->l[li]->loads[0]) == 2, "");
-                       __m128i *l_v = (__m128i *)((uint16_t *)ctx->l[li]->loads - 1);
+                       static_assert(LOADS_LEN == 10 && sizeof(ctx->l[li]->loads[0]) == 2, "");
+                       __m128i *l_v = (__m128i *)((uint16_t *)ctx->l[li]->loads - 6);
                         __m128i l0 = _mm_load_si128(l_v);
                         __m128i l1 = _mm_load_si128(l_v + 1);
                         // We want to avoid the first item in l0, so we maximize it.
                         //  (but this function takes a signed integer, so -1 is the maximum)
-                       l0 = _mm_insert_epi16(l0, -1, 0);
+                       l0 = _mm_insert_epi16(l0, -1, 5);
  
                         // Only one instruction can find minimum and its position,
                         // and it works on 8x uint16_t.
@@ -463,8 +470,8 @@ static inline bool kru_limited_update(struct kru *kru, struct query_ctx *ctx, bo
                         }
                 }
                 // now, min_i (and min_ix) is offset by one due to alignment of .loads
-               if (min_i != 0) // zero is very unlikely
-                       --min_i;
+               if (min_i > 5)
+                       min_i -= 6;
  #endif
  
                 ctx->l[min_li]->ids[min_i] = ctx->id;
@@ -536,23 +543,32 @@ static bool kru_limited_multi_or_nobreak(struct kru *kru, uint32_t time_now, uin
         return ret;
  }
  
-static uint8_t kru_limited_multi_prefix_or(struct kru *kru, uint32_t time_now, uint8_t namespace,
+static struct limited_info kru_limited_multi_prefix_or(struct kru *kru, uint32_t time_now, uint8_t namespace,
                                             uint8_t key[static 16], uint8_t *prefixes, kru_price_t *prices, size_t queries_cnt, uint16_t *max_load_out)
  {
         struct query_ctx ctx[queries_cnt];
+       struct limited_info lim;
+       lim.score = 0;
+       lim.prefix = 0;
  
         for (size_t i = 0; i < queries_cnt; i++) {
                 kru_limited_prefetch_prefix(kru, time_now, namespace, key, prefixes[i], prices[i], ctx + i);
         }
  
         for (size_t i = 0; i < queries_cnt; i++) {
-               if (kru_limited_fetch(kru, ctx + i))
-                       return prefixes[i];
+               int ret = kru_limited_fetch(kru, ctx + i);
+               lim.score = ret;
+               lim.prefix = prefixes[i];
+               if (lim.score == 255)
+                       return lim;
         }
  
         for (int i = queries_cnt - 1; i >= 0; i--) {
-               if (kru_limited_update(kru, ctx + i, false))
-                       return prefixes[i];
+               if (kru_limited_update(kru, ctx + i, false)) {
+                       lim.score = 255;
+                       lim.prefix = prefixes[i];
+                       return lim;
+               }
         }
  
         if (max_load_out) {
@@ -562,7 +578,7 @@ static uint8_t kru_limited_multi_prefix_or(struct kru *kru, uint32_t time_now, u
                 }
         }
  
-       return 0;
+       return lim;
  }
  
  static void kru_load_multi_prefix(struct kru *kru, uint32_t time_now, uint8_t namespace,
author	Hynek Šabacký <hynek.sabacky@nic.cz>
	Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)
committer	Hynek Šabacký <hynek.sabacky@nic.cz>
	Tue, 25 Mar 2025 10:15:44 +0000 (11:15 +0100)
daemon/ratelimiting.c		patch \| blob \| blame \| history
daemon/ratelimiting.h		patch \| blob \| blame \| history
lib/kru.h		patch \| blob \| blame \| history
lib/kru.inc.c		patch \| blob \| blame \| history