lib/cache/top: increase bloom filter size

author Lukáš Ondráček <lukas.ondracek@nic.cz>

Sat, 14 Jun 2025 18:40:02 +0000 (20:40 +0200)

committer Vladimír Čunát <vladimir.cunat@nic.cz>

Tue, 12 Aug 2025 15:47:30 +0000 (17:47 +0200)
author Lukáš Ondráček <lukas.ondracek@nic.cz>
Sat, 14 Jun 2025 18:40:02 +0000 (20:40 +0200)
committer Vladimír Čunát <vladimir.cunat@nic.cz>
Tue, 12 Aug 2025 15:47:30 +0000 (17:47 +0200)
diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua

index afeb94d809e5a59dd9cb8e296762db494ff1b722..d046dae0bf89befc27689d53495ce7669a30837f 100644 (file)
--- a/daemon/lua/kres-gen-33.lua
+++ b/daemon/lua/kres-gen-33.lua
@@ -234,7 +234,7 @@ struct kr_extended_error {
         const char *extra_text;
  };
  struct kr_cache_top_context {
-       uint64_t bloom[4];
+       uint32_t bloom[16];
         uint32_t cnt;
  };
  struct kr_request {
diff --git a/lib/cache/top.c b/lib/cache/top.c

index 134bb5e428dcbbccddfab312b843eecba0317078..0620af8af369abd295cb1b75b936a9abdeae9c13 100644 (file)
--- a/lib/cache/top.c
+++ b/lib/cache/top.c
@@ -38,20 +38,21 @@ static inline uint32_t ticks_now(void)
  
  static inline bool first_access_ro(struct kr_cache_top_context *ctx, kru_hash_t hash) {
         // struct kr_cache_top_context { uint64_t bloom[4]; }
-       static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom[0]) * 8 == 64);
-       static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom)    * 8 == 64 * 4);
-               // expected around 16 unique cache accesses per request context;
-               // prob. of collision of the 16th unique access with the preceeding ones: 1/510;
-               // 32nd access: 1/45; 64th access: 1/6
+       static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom[0]) * 8 == 32);
+       static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom)    * 8 == 32 * 16);
+               // expected around 40 unique cache accesses per request context, up to ~100;
+               // prob. of collision of 40th unique access with the preceeding ones: ~0.5 %;
+               // 60th: ~1.9 %; 80th: 4.5 %; 100th: 8.4 %; 150th: 22 %; 200th; 39 %
+               //   -> collision means not counting the cache access in KRU while it should be
  
         uint8_t *h = (uint8_t *)&hash;
-       static_assert(sizeof(kru_hash_t) >= 4);
+       static_assert(sizeof(kru_hash_t) >= 8);
  
-       bool accessed = 1ull &
-               (ctx->bloom[0] >> (h[0] % 64)) &
-               (ctx->bloom[1] >> (h[1] % 64)) &
-               (ctx->bloom[2] >> (h[2] % 64)) &
-               (ctx->bloom[3] >> (h[3] % 64));
+       bool accessed = 1u &
+               (ctx->bloom[h[0] % 16] >> (h[1] % 32)) &
+               (ctx->bloom[h[2] % 16] >> (h[3] % 32)) &
+               (ctx->bloom[h[4] % 16] >> (h[5] % 32)) &
+               (ctx->bloom[h[6] % 16] >> (h[7] % 32));
  
         return !accessed;
  }
@@ -60,19 +61,30 @@ static inline bool first_access(struct kr_cache_top_context *ctx, kru_hash_t has
         if (!first_access_ro(ctx, hash)) return false;
  
         uint8_t *h = (uint8_t *)&hash;
-       static_assert(sizeof(kru_hash_t) >= 4);
+       static_assert(sizeof(kru_hash_t) >= 8);
  
-       ctx->bloom[0] |= 1ull << (h[0] % 64);
-       ctx->bloom[1] |= 1ull << (h[1] % 64);
-       ctx->bloom[2] |= 1ull << (h[2] % 64);
-       ctx->bloom[3] |= 1ull << (h[3] % 64);
-
-       kr_assert(!first_access_ro(ctx, hash));
+       { // temporal statistics, TODO remove
+               int ones = 0;
+               for (int i = 0; i < 16; i++) {
+                       ones += __builtin_popcount(ctx->bloom[i]);
+               }
+               double collision_prob = ones / 512.0; // 1-bit collision
+               collision_prob *= collision_prob;     // 2-bit collision
+               collision_prob *= collision_prob;     // 4-bit collision
  
-       if (++ctx->cnt > 16) {
-               VERBOSE_LOG("BLOOM overfull (%d unique accesses)\n", ctx->cnt);
+               if (collision_prob > 0.1) {
+                       VERBOSE_LOG("BLOOM %d unique accesses, collision prob. %5.3f %% (%d/512 ones)\n", ctx->cnt, 100.0 * collision_prob, ones);
+               }
+               ctx->cnt++;
         }
  
+       ctx->bloom[h[0] % 16] |= 1u << (h[1] % 32);
+       ctx->bloom[h[2] % 16] |= 1u << (h[3] % 32);
+       ctx->bloom[h[4] % 16] |= 1u << (h[5] % 32);
+       ctx->bloom[h[6] % 16] |= 1u << (h[7] % 32);
+
+       kr_assert(!first_access_ro(ctx, hash));
+
         return true;
  }
  
diff --git a/lib/cache/top.h b/lib/cache/top.h

index fef3292c55b845b8072addd838d252eafbc8a64b..024920110dcc55e933918dcba8c357f3072e5d25 100644 (file)
--- a/lib/cache/top.h
+++ b/lib/cache/top.h
@@ -12,7 +12,7 @@ struct kr_cache_top {
  };
  
  struct kr_cache_top_context {
-       uint64_t bloom[4];
+       uint32_t bloom[16]; // TODO require alignment
         uint32_t cnt;  // TODO remove this (and propagate to kres-gen)
  };
  
diff --git a/lib/kru.h b/lib/kru.h

index b3f73bc7da91bb726eb8c9f830ad720b836e57bc..bad258695a6df598f9b459fbac16095405689db6 100644 (file)
--- a/lib/kru.h
+++ b/lib/kru.h
@@ -109,6 +109,7 @@ struct kru_api {
  
         // TODO
         /// Compute 64-bit hash to be used in load_hash.
+       /// The key need not to be aligned as we use always unoptimized variant here.
         kru_hash_t (*hash_bytes)(struct kru *kru, uint8_t *key, size_t key_size);
         uint16_t (*load_hash)(struct kru *kru, uint32_t time_now, kru_hash_t hash, kru_price_t price);
  };
diff --git a/lib/kru.inc.c b/lib/kru.inc.c

index d9e1ba49a2667bfda738feb99d842f138945ccc8..e54d366ee645486bb3e92b54057742d6db3d3ed7 100644 (file)
--- a/lib/kru.inc.c
+++ b/lib/kru.inc.c
@@ -55,7 +55,8 @@ Size (`loads_bits` = log2 length):
  #if USE_AES
         /// 4-8 rounds should be an OK choice, most likely.
         #define AES_ROUNDS 4
-#else
+#endif //#else
+// use SipHash also for variable-length keys with otherwise optimized variant
         #include "contrib/openbsd/siphash.h"
  
         /// 1,3 should be OK choice, probably.
@@ -63,7 +64,7 @@ Size (`loads_bits` = log2 length):
                 SIPHASH_RC = 1,
                 SIPHASH_RF = 3,
         };
-#endif
+//#endif
  
  #if USE_AVX2 || USE_SSE41 || USE_AES
         #include <immintrin.h>
@@ -359,15 +360,11 @@ static kru_hash_t kru_hash_bytes(struct kru *kru, uint8_t *key, size_t key_size)
         kru_hash_t hash;
         static_assert(sizeof(kru_hash_t) * 8 <= 64);
  
-#if !USE_AES
-       hash = SipHash(&kru->hash_key, SIPHASH_RC, SIPHASH_RF, key, key_size);
-#else
-       // TODO
-       hash = 3;
-       for (size_t i = 0; i < key_size; i++) {
-               hash = hash * 257 + key[i];
-       }
-#endif
+       // We use SipHash even for otherwise optimized KRU variant, which has diffent type of hash_key.
+       static_assert(sizeof(kru->hash_key) >= sizeof(SIPHASH_KEY));
+       SIPHASH_KEY *hash_key = (void *)&kru->hash_key;
+
+       hash = SipHash(hash_key, SIPHASH_RC, SIPHASH_RF, key, key_size);
  
         return hash;
  }
author	Lukáš Ondráček <lukas.ondracek@nic.cz>
	Sat, 14 Jun 2025 18:40:02 +0000 (20:40 +0200)
committer	Vladimír Čunát <vladimir.cunat@nic.cz>
	Tue, 12 Aug 2025 15:47:30 +0000 (17:47 +0200)
daemon/lua/kres-gen-33.lua		patch \| blob \| blame \| history
lib/cache/top.c		patch \| blob \| blame \| history
lib/cache/top.h		patch \| blob \| blame \| history
lib/kru.h		patch \| blob \| blame \| history
lib/kru.inc.c		patch \| blob \| blame \| history