From: Vsevolod Stakhov Date: Tue, 24 Feb 2026 13:38:31 +0000 (+0000) Subject: [Fix] Move binary msgpack data from KEYS to ARGV in Bayes Redis scripts X-Git-Tag: 4.0.0~70 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=30bd75dbcbf56b73010ebfd9f2f341a9e16d7be2;p=thirdparty%2Frspamd.git [Fix] Move binary msgpack data from KEYS to ARGV in Bayes Redis scripts When expand_keys is enabled, lutil.template() is applied to all KEYS arguments of EVALSHA commands. This corrupts binary msgpack blobs by stripping 0x24 ('$') bytes, breaking str8 headers where the length byte equals 36. Move non-key arguments (msgpack tokens, config, labels) to ARGV which is not subject to key expansion. Also fix msgpack_str_len off-by-one for str32 (4+len -> 5+len). --- diff --git a/.luacheckrc b/.luacheckrc index 7e48b8ee05..053e4c62f4 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -63,6 +63,7 @@ files['/**/lualib/lua_redis.lua'].globals = { files['/**/lualib/redis_scripts/**'].globals = { 'redis', 'KEYS', + 'ARGV', 'cjson', 'cmsgpack', } diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua index 150a51fce0..0612d6ec2e 100644 --- a/lualib/lua_bayes_redis.lua +++ b/lualib/lua_bayes_redis.lua @@ -55,7 +55,7 @@ local function gen_classify_functor(redis_params, classify_script_id) lua_redis.exec_redis_script(classify_script_id, { task = task, is_write = false, key = expanded_key }, - classify_redis_cb, { expanded_key, script_class_labels, stat_tokens }) + classify_redis_cb, { expanded_key }, { script_class_labels, stat_tokens }) end end @@ -82,11 +82,11 @@ local function gen_learn_functor(redis_params, learn_script_id) lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = expanded_key }, learn_redis_cb, - { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) + { expanded_key }, { script_class_label, symbol, tostring(is_unlearn), stat_tokens, maybe_text_tokens }) else lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = expanded_key }, - learn_redis_cb, { expanded_key, script_class_label, symbol, tostring(is_unlearn), stat_tokens }) + learn_redis_cb, { expanded_key }, { script_class_label, symbol, tostring(is_unlearn), stat_tokens }) end end end @@ -244,7 +244,7 @@ local function gen_cache_check_functor(redis_params, check_script_id, conf) lua_util.debugm(N, task, 'checking cache: %s', cache_id) lua_redis.exec_redis_script(check_script_id, { task = task, is_write = false, key = cache_id }, - classify_redis_cb, { cache_id, packed_conf }) + classify_redis_cb, { cache_id }, { packed_conf }) end end @@ -259,7 +259,7 @@ local function gen_cache_learn_functor(redis_params, learn_script_id, conf) lua_redis.exec_redis_script(learn_script_id, { task = task, is_write = true, key = cache_id }, learn_redis_cb, - { cache_id, class_name, packed_conf }) + { cache_id }, { class_name, packed_conf }) end end diff --git a/lualib/redis_scripts/bayes_cache_check.lua b/lualib/redis_scripts/bayes_cache_check.lua index 912396e4c2..fcd5a24140 100644 --- a/lualib/redis_scripts/bayes_cache_check.lua +++ b/lualib/redis_scripts/bayes_cache_check.lua @@ -1,10 +1,10 @@ -- Lua script to perform cache checking for bayes classification -- This script accepts the following parameters: -- key1 - cache id --- key2 - configuration table in message pack +-- argv1 - configuration table in message pack local cache_id = KEYS[1] -local conf = cmsgpack.unpack(KEYS[2]) +local conf = cmsgpack.unpack(ARGV[1]) cache_id = string.sub(cache_id, 1, conf.cache_elt_len) -- Try each prefix that is in Redis diff --git a/lualib/redis_scripts/bayes_cache_learn.lua b/lualib/redis_scripts/bayes_cache_learn.lua index d3ec095a0a..08187b4c3d 100644 --- a/lualib/redis_scripts/bayes_cache_learn.lua +++ b/lualib/redis_scripts/bayes_cache_learn.lua @@ -1,8 +1,8 @@ -- Lua script to perform cache learning for bayes classification (multi-class) -- This script accepts the following parameters: -- key1 - cache id --- key2 - class name string (e.g. "spam", "ham", "transactional") --- key3 - configuration table in message pack +-- argv1 - class name string (e.g. "spam", "ham", "transactional") +-- argv2 - configuration table in message pack -- -- The cache value stored in Redis is the class name string. A numeric class_id -- hash was used previously, but uint64_t values > 2^53 lose precision when @@ -10,8 +10,8 @@ -- unreliable for arbitrary multiclass names. local cache_id = KEYS[1] -local class_name = KEYS[2] -local conf = cmsgpack.unpack(KEYS[3]) +local class_name = ARGV[1] +local conf = cmsgpack.unpack(ARGV[2]) -- Store the class name directly as the cache value local cache_value = class_name diff --git a/lualib/redis_scripts/bayes_classify.lua b/lualib/redis_scripts/bayes_classify.lua index d6132e631b..029aa980ce 100644 --- a/lualib/redis_scripts/bayes_classify.lua +++ b/lualib/redis_scripts/bayes_classify.lua @@ -1,12 +1,12 @@ -- Lua script to perform bayes classification (multi-class) -- This script accepts the following parameters: -- key1 - prefix for bayes tokens (e.g. for per-user classification) --- key2 - class labels: table of all class labels as "TABLE:label1,label2,..." --- key3 - set of tokens encoded in messagepack array of strings +-- argv1 - class labels: table of all class labels as "TABLE:label1,label2,..." +-- argv2 - set of tokens encoded in messagepack array of strings local prefix = KEYS[1] -local class_labels_arg = KEYS[2] -local input_tokens = cmsgpack.unpack(KEYS[3]) +local class_labels_arg = ARGV[1] +local input_tokens = cmsgpack.unpack(ARGV[2]) -- Parse class labels (always expect TABLE: format) local class_labels = {} diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua index ebc798fe0d..3f8ee1cbc7 100644 --- a/lualib/redis_scripts/bayes_learn.lua +++ b/lualib/redis_scripts/bayes_learn.lua @@ -1,21 +1,21 @@ -- Lua script to perform bayes learning (multi-class) -- This script accepts the following parameters: -- key1 - prefix for bayes tokens (e.g. for per-user classification) --- key2 - class label string (e.g. "S", "H", "T") --- key3 - string symbol --- key4 - boolean is_unlearn --- key5 - set of tokens encoded in messagepack array of strings --- key6 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `KEYS[5]`) +-- argv1 - class label string (e.g. "S", "H", "T") +-- argv2 - string symbol +-- argv3 - boolean is_unlearn +-- argv4 - set of tokens encoded in messagepack array of strings +-- argv5 - set of text tokens (if any) encoded in messagepack array of strings (size must be twice of `ARGV[4]`) local prefix = KEYS[1] -local class_label = KEYS[2] -local symbol = KEYS[3] -local is_unlearn = KEYS[4] == 'true' and true or false -local input_tokens = cmsgpack.unpack(KEYS[5]) +local class_label = ARGV[1] +local symbol = ARGV[2] +local is_unlearn = ARGV[3] == 'true' and true or false +local input_tokens = cmsgpack.unpack(ARGV[4]) local text_tokens -if KEYS[6] then - text_tokens = cmsgpack.unpack(KEYS[6]) +if ARGV[5] then + text_tokens = cmsgpack.unpack(ARGV[5]) end -- Handle backward compatibility for boolean values diff --git a/src/libstat/backends/redis_backend.cxx b/src/libstat/backends/redis_backend.cxx index 8f93b61db6..8489947e36 100644 --- a/src/libstat/backends/redis_backend.cxx +++ b/src/libstat/backends/redis_backend.cxx @@ -797,7 +797,7 @@ msgpack_str_len(std::size_t len) -> std::size_t return 3 + len; } else { - return 4 + len; + return 5 + len; } }