return ret
end
elseif type(opt) == 'table' then
- local cache_key = lua_util.table_digest(opt)
+ local cache_key = lua_util.unordered_table_digest(opt)
if not callback and maps_cache[cache_key] then
rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s',
cache_key:sub(1, 8), description)
exports.add_composition_map = function(cfg, map_obj)
local hash_key = map_obj
if type(map_obj) == 'table' then
- hash_key = lua_util.table_digest(map_obj)
+ hash_key = lua_util.unordered_table_digest(map_obj)
end
local map = maps_cache[hash_key]
local hash_key = rules
local rspamd_text = require "rspamd_text"
if type(rules) == 'table' then
- hash_key = lua_util.table_digest(rules)
+ hash_key = lua_util.unordered_table_digest(rules)
end
local map = maps_cache[hash_key]
exports.table_digest = table_digest
+---[[[
+-- @function lua_util.unordered_table_digest(t)
+-- Returns a hash of table contents that is independent of iteration order.
+-- Uses XXH3 fast hash with XOR accumulation for O(n) performance.
+-- All value types (string, number, boolean, table) are included in the hash.
+-- @param {table} t input array or map
+-- @return {string} hex representation of the 64-bit hash
+--]]]
+local function unordered_table_digest(t)
+ local cr = require "rspamd_cryptobox"
+ local bit = require "bit"
+
+ -- Internal function that returns high/low 32-bit parts
+ local function digest_impl(tbl)
+ local acc_hi, acc_lo = 0, 0
+
+ if tbl[1] ~= nil then
+ -- Array: order matters, so include index in hash
+ for i, e in ipairs(tbl) do
+ local str
+ if type(e) == 'table' then
+ -- Recursively compute digest for nested table
+ str = tostring(i) .. '\0' .. digest_impl(e)
+ else
+ str = tostring(i) .. '\0' .. tostring(e)
+ end
+ local hi, lo = cr.fast_hash64(str)
+ acc_hi = bit.bxor(acc_hi, hi)
+ acc_lo = bit.bxor(acc_lo, lo)
+ end
+ else
+ -- Map: order doesn't matter, XOR all k-v hashes
+ for k, v in pairs(tbl) do
+ local str
+ if type(v) == 'table' then
+ -- Recursively compute digest for nested table
+ str = tostring(k) .. '\0' .. digest_impl(v)
+ else
+ str = tostring(k) .. '\0' .. tostring(v)
+ end
+ local hi, lo = cr.fast_hash64(str)
+ acc_hi = bit.bxor(acc_hi, hi)
+ acc_lo = bit.bxor(acc_lo, lo)
+ end
+ end
+
+ -- Return as hex string for nested calls
+ -- Use bit.tohex() which properly handles signed 32-bit values
+ return bit.tohex(acc_hi) .. bit.tohex(acc_lo)
+ end
+
+ return digest_impl(t)
+end
+
+exports.unordered_table_digest = unordered_table_digest
+
---[[[
-- @function lua_util.toboolean(v)
-- Converts a string or a number to boolean
norm.providers[i] = entry
end
- return lua_util.table_digest(norm)
+ return lua_util.unordered_table_digest(norm)
end
-- If no providers configured, fallback to symbols provider unless disabled
LUA_FUNCTION_DEF(cryptobox, decrypt_cookie);
LUA_FUNCTION_DEF(cryptobox, pbkdf);
LUA_FUNCTION_DEF(cryptobox, gen_dkim_keypair);
+LUA_FUNCTION_DEF(cryptobox, fast_hash64);
/* Secretbox API: uses libsodium secretbox and blake2b for key derivation */
LUA_FUNCTION_DEF(cryptobox_secretbox, create);
LUA_INTERFACE_DEF(cryptobox, decrypt_cookie),
LUA_INTERFACE_DEF(cryptobox, pbkdf),
LUA_INTERFACE_DEF(cryptobox, gen_dkim_keypair),
+ LUA_INTERFACE_DEF(cryptobox, fast_hash64),
{NULL, NULL}};
static const struct luaL_reg cryptoboxpubkeylib_f[] = {
return 2;
}
+/***
+ * @function rspamd_cryptobox.fast_hash64(data[, seed])
+ * Computes a fast 64-bit hash (XXH3) of the input data.
+ * Returns two numbers: high 32 bits and low 32 bits.
+ * This is useful for order-independent hashing via XOR accumulation.
+ * @param {string|rspamd_text} data input data to hash
+ * @param {number} seed optional seed value (default 0)
+ * @return {number,number} high 32 bits and low 32 bits of the 64-bit hash
+ */
+static int
+lua_cryptobox_fast_hash64(lua_State *L)
+{
+ LUA_TRACE_POINT;
+ const char *data;
+ gsize len;
+ uint64_t seed = 0;
+ uint64_t h;
+
+ if (lua_type(L, 1) == LUA_TSTRING) {
+ data = lua_tolstring(L, 1, &len);
+ }
+ else if (lua_type(L, 1) == LUA_TUSERDATA) {
+ struct rspamd_lua_text *t = lua_check_text(L, 1);
+ if (!t) {
+ return luaL_error(L, "invalid arguments");
+ }
+ data = t->start;
+ len = t->len;
+ }
+ else {
+ return luaL_error(L, "invalid arguments: string or rspamd_text expected");
+ }
+
+ if (lua_type(L, 2) == LUA_TNUMBER) {
+ seed = lua_tointeger(L, 2);
+ }
+
+ h = rspamd_cryptobox_fast_hash_specific(RSPAMD_CRYPTOBOX_XXHASH3, data, len, seed);
+
+ /* Return as two 32-bit integers for easy XOR in LuaJIT */
+ lua_pushinteger(L, (lua_Integer) (h >> 32)); /* high 32 bits */
+ lua_pushinteger(L, (lua_Integer) (h & 0xFFFFFFFF)); /* low 32 bits */
+
+ return 2;
+}
+
/*
* Secretbox API
*/
--- /dev/null
+local cr = require 'rspamd_cryptobox'
+
+context("Cryptobox - fast_hash64", function()
+
+ test('Returns two numbers', function()
+ local hi, lo = cr.fast_hash64("test")
+ assert_not_nil(hi)
+ assert_not_nil(lo)
+ assert_equal(type(hi), "number")
+ assert_equal(type(lo), "number")
+ end)
+
+ test('Same input produces same output', function()
+ local hi1, lo1 = cr.fast_hash64("hello world")
+ local hi2, lo2 = cr.fast_hash64("hello world")
+ assert_equal(hi1, hi2)
+ assert_equal(lo1, lo2)
+ end)
+
+ test('Different input produces different output', function()
+ local hi1, lo1 = cr.fast_hash64("hello")
+ local hi2, lo2 = cr.fast_hash64("world")
+ -- At least one of hi/lo should differ
+ assert_true(hi1 ~= hi2 or lo1 ~= lo2, "Different inputs should produce different hashes")
+ end)
+
+ test('Seed affects output', function()
+ local hi1, lo1 = cr.fast_hash64("test", 0)
+ local hi2, lo2 = cr.fast_hash64("test", 12345)
+ assert_true(hi1 ~= hi2 or lo1 ~= lo2, "Different seeds should produce different hashes")
+ end)
+
+ test('Empty string is valid input', function()
+ local hi, lo = cr.fast_hash64("")
+ assert_not_nil(hi)
+ assert_not_nil(lo)
+ end)
+
+ test('Long string is valid input', function()
+ local long_str = string.rep("x", 10000)
+ local hi, lo = cr.fast_hash64(long_str)
+ assert_not_nil(hi)
+ assert_not_nil(lo)
+ end)
+
+ test('XOR accumulation produces order-independent result', function()
+ local bit = require "bit"
+
+ local function hash_and_xor(strings)
+ local acc_hi, acc_lo = 0, 0
+ for _, s in ipairs(strings) do
+ local hi, lo = cr.fast_hash64(s)
+ acc_hi = bit.bxor(acc_hi, hi)
+ acc_lo = bit.bxor(acc_lo, lo)
+ end
+ return acc_hi, acc_lo
+ end
+
+ -- Same strings in different order
+ local hi1, lo1 = hash_and_xor({"alpha", "beta", "gamma"})
+ local hi2, lo2 = hash_and_xor({"gamma", "alpha", "beta"})
+ local hi3, lo3 = hash_and_xor({"beta", "gamma", "alpha"})
+
+ assert_equal(hi1, hi2)
+ assert_equal(lo1, lo2)
+ assert_equal(hi1, hi3)
+ assert_equal(lo1, lo3)
+ end)
+
+end)
--- /dev/null
+local util = require 'lua_util'
+
+context("Lua util - unordered_table_digest", function()
+
+ test('Simple map produces consistent digest', function()
+ local t1 = { a = 1, b = 2, c = 3 }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t1)
+ assert_equal(d1, d2)
+ assert_equal(#d1, 16) -- 64 bits = 16 hex chars
+ end)
+
+ test('Order independence for maps', function()
+ -- Create tables that may iterate in different orders
+ local t1 = {}
+ t1.alpha = "first"
+ t1.beta = "second"
+ t1.gamma = "third"
+
+ local t2 = {}
+ t2.gamma = "third"
+ t2.alpha = "first"
+ t2.beta = "second"
+
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_equal(d1, d2, "Digests should be equal regardless of insertion order")
+ end)
+
+ test('Different values produce different digests', function()
+ local t1 = { a = 1, b = 2 }
+ local t2 = { a = 1, b = 3 }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_not_equal(d1, d2)
+ end)
+
+ test('Different keys produce different digests', function()
+ local t1 = { a = 1, b = 2 }
+ local t2 = { a = 1, c = 2 }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_not_equal(d1, d2)
+ end)
+
+ test('Numeric values are included in digest', function()
+ local t1 = { weight = 1.0 }
+ local t2 = { weight = 2.0 }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_not_equal(d1, d2, "Different numeric values should produce different digests")
+ end)
+
+ test('Boolean values are included in digest', function()
+ local t1 = { enabled = true }
+ local t2 = { enabled = false }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_not_equal(d1, d2, "Different boolean values should produce different digests")
+ end)
+
+ test('Nested tables are handled correctly', function()
+ local t1 = { outer = { inner = "value" } }
+ local t2 = { outer = { inner = "value" } }
+ local t3 = { outer = { inner = "other" } }
+
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ local d3 = util.unordered_table_digest(t3)
+
+ assert_equal(d1, d2, "Same nested structure should produce same digest")
+ assert_not_equal(d1, d3, "Different nested values should produce different digest")
+ end)
+
+ test('Arrays preserve order', function()
+ local t1 = { "a", "b", "c" }
+ local t2 = { "c", "b", "a" }
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_not_equal(d1, d2, "Arrays with different order should have different digests")
+ end)
+
+ test('Empty table produces consistent digest', function()
+ local t1 = {}
+ local t2 = {}
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_equal(d1, d2)
+ end)
+
+ test('Complex nested structure with mixed types', function()
+ local t1 = {
+ providers = {
+ { type = "llm", model = "gpt-4", weight = 1.0 },
+ { type = "symbols", weight = 0.5 },
+ },
+ fusion = {
+ normalization = "none",
+ include_meta = true,
+ meta_weight = 1.0,
+ },
+ max_inputs = 100,
+ }
+
+ local t2 = {
+ max_inputs = 100,
+ fusion = {
+ meta_weight = 1.0,
+ include_meta = true,
+ normalization = "none",
+ },
+ providers = {
+ { type = "llm", model = "gpt-4", weight = 1.0 },
+ { type = "symbols", weight = 0.5 },
+ },
+ }
+
+ local d1 = util.unordered_table_digest(t1)
+ local d2 = util.unordered_table_digest(t2)
+ assert_equal(d1, d2, "Same structure with different key order should produce same digest")
+ end)
+
+end)