From: Vsevolod Stakhov Date: Sat, 6 Dec 2025 18:30:09 +0000 (+0000) Subject: [Fix] Fix Lua 5.4 compatibility issues in neural plugin X-Git-Tag: 3.14.2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c68b35272d27ceacb08fbe5f8dac5670c435ade4;p=thirdparty%2Frspamd.git [Fix] Fix Lua 5.4 compatibility issues in neural plugin This commit addresses several Lua 5.4 compatibility issues that caused the neural LLM tests to fail: 1. Redis TTL must be integer (lua_cache.lua): - Lua 5.4's tostring() produces "4.0" for floats instead of "4" - Redis SETEX/EXPIRE commands require integer TTL values - Fixed by using math.floor() before tostring() 2. Version number format in ANN keys (lualib/plugins/neural.lua): - Changed string format from %s to %d for version numbers - Ensures integer format "1" instead of potential "1.0" 3. Iterator vs table handling (src/plugins/lua/neural.lua): - fun.map() returns an iterator, not a table - In Lua 5.4, # operator on iterators returns 0 - Fixed by wrapping with fun.totable() to get a proper table 4. Nil values in table arguments (lualib/plugins/neural.lua): - Lua 5.4 handles nil values in tables differently - Tables like {a, b, nil, nil} have undefined length behavior - Fixed by using empty string defaults for optional parameters 5. Redis script nil checks (neural_save_unlock.lua): - Added empty string checks alongside nil checks - Ensures optional fields are only set when truly provided 6. Test infrastructure improvements: - Added logging to dummy_llm.py for debugging - Added proper error handling and diagnostics - Updated rspamd.robot with better dummy_llm startup logging --- diff --git a/lualib/lua_cache.lua b/lualib/lua_cache.lua index e8e79f0547..6f731193b0 100644 --- a/lualib/lua_cache.lua +++ b/lualib/lua_cache.lua @@ -335,7 +335,7 @@ local function cache_get(task, key, cache_context, timeout, callback_uncached, c lua_util.debugm(cache_context.N, task, "calling uncached handler for %s", full_key) callback_uncached(task) end, - 'SETEX', { full_key, tostring(timeout * 2), pending_marker } + 'SETEX', { full_key, tostring(math.floor(timeout * 2)), pending_marker } ) else -- Key found, check if it's a pending marker or actual data @@ -359,7 +359,7 @@ local function cache_get(task, key, cache_context, timeout, callback_uncached, c lua_util.debugm(cache_context.N, task, "successfully extended TTL for %s", full_key) end end, - 'EXPIRE', { full_key, tostring(cache_context.opts.cache_ttl) } + 'EXPIRE', { full_key, tostring(math.floor(cache_context.opts.cache_ttl)) } ) lua_util.debugm(cache_context.N, task, "returning cached data for key %s", full_key) @@ -400,7 +400,7 @@ local function cache_set(task, key, data, cache_context) full_key, os.date('%Y-%m-%d %H:%M:%S', expire_at)) end end, - 'SETEX', { full_key, tostring(ttl), encoded_data } + 'SETEX', { full_key, tostring(math.floor(ttl)), encoded_data } ) end diff --git a/lualib/plugins/neural.lua b/lualib/plugins/neural.lua index 5fcb75fcf9..ab4c24060b 100644 --- a/lualib/plugins/neural.lua +++ b/lualib/plugins/neural.lua @@ -560,8 +560,8 @@ end -- Used to generate new ANN key for specific profile local function new_ann_key(rule, set, version) - local ann_key = string.format('%s_%s_%s_%s_%s', settings.prefix, - rule.prefix, set.name, set.digest:sub(1, 8), tostring(version)) + local ann_key = string.format('%s_%s_%s_%s_%d', settings.prefix, + rule.prefix, set.name, set.digest:sub(1, 8), version) return ann_key end @@ -1016,6 +1016,8 @@ local function spawn_train(params) #(params.set.ann.pca or {}), #(pca_data or {}), params.set.ann.redis_key, params.ann_key) + -- Ensure all arguments are non-nil for Lua 5.4 compatibility + -- (nil values in tables cause length/iteration issues) lua_redis.exec_redis_script(redis_script_id.save_unlock, { ev_base = params.ev_base, is_write = true }, redis_save_cb, @@ -1026,10 +1028,10 @@ local function spawn_train(params) tostring(params.rule.ann_expire), tostring(os.time()), params.ann_key, -- old key to unlock... - roc_thresholds_serialized, - pca_data, - providers_meta_serialized, - ucl.to_format(norm_stats, 'json-compact', true), + roc_thresholds_serialized or '', + pca_data or '', + providers_meta_serialized or '', + ucl.to_format(norm_stats, 'json-compact', true) or '', }) end end diff --git a/lualib/redis_scripts/neural_save_unlock.lua b/lualib/redis_scripts/neural_save_unlock.lua index dfed2e358f..1ce31afa14 100644 --- a/lualib/redis_scripts/neural_save_unlock.lua +++ b/lualib/redis_scripts/neural_save_unlock.lua @@ -15,13 +15,13 @@ local now = tonumber(KEYS[6]) redis.call('ZADD', KEYS[2], now, KEYS[4]) redis.call('HSET', KEYS[1], 'ann', KEYS[3]) redis.call('HSET', KEYS[1], 'roc_thresholds', KEYS[8]) -if KEYS[9] then +if KEYS[9] and KEYS[9] ~= '' then redis.call('HSET', KEYS[1], 'pca', KEYS[9]) end -if KEYS[10] then +if KEYS[10] and KEYS[10] ~= '' then redis.call('HSET', KEYS[1], 'providers_meta', KEYS[10]) end -if KEYS[11] then +if KEYS[11] and KEYS[11] ~= '' then redis.call('HSET', KEYS[1], 'norm_stats', KEYS[11]) end redis.call('HDEL', KEYS[1], 'lock') diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua index 282f49ef5d..4d61f9771f 100644 --- a/src/plugins/lua/neural.lua +++ b/src/plugins/lua/neural.lua @@ -892,7 +892,8 @@ local function check_anns(worker, cfg, ev_base, rule, process_callback, what) elseif type(data) == 'table' then lua_util.debugm(N, cfg, '%s: process element %s:%s (profiles=%s)', what, rule.prefix, set.name, #data) - process_callback(worker, ev_base, rule, set, fun.map(load_ann_profile, data)) + -- Use fun.totable to convert iterator to table for Lua 5.4 compatibility + process_callback(worker, ev_base, rule, set, fun.totable(fun.map(load_ann_profile, data))) set.can_store_vectors = true else lua_util.debugm(N, cfg, '%s: no profiles for %s:%s', what, rule.prefix, set.name) diff --git a/test/functional/lib/rspamd.robot b/test/functional/lib/rspamd.robot index d59f158f23..4fa6995202 100644 --- a/test/functional/lib/rspamd.robot +++ b/test/functional/lib/rspamd.robot @@ -526,8 +526,18 @@ Run Dummy Https Export Scoped Variables ${RSPAMD_SCOPE} DUMMY_HTTPS_PROC=${result} Run Dummy Llm - ${result} = Start Process python3 ${RSPAMD_TESTDIR}/util/dummy_llm.py 18080 - Wait Until Created /tmp/dummy_llm.pid timeout=2 second + ${result} = Start Process ${RSPAMD_TESTDIR}/util/dummy_llm.py 18080 + ... stderr=/tmp/dummy_llm.log stdout=/tmp/dummy_llm.log + ${status} ${error} = Run Keyword And Ignore Error Wait Until Created /tmp/dummy_llm.pid timeout=2 second + IF '${status}' == 'FAIL' + ${logstatus} ${log} = Run Keyword And Ignore Error Get File /tmp/dummy_llm.log + IF '${logstatus}' == 'PASS' + Log dummy_llm.py failed to start. Log output:\n${log} level=ERROR + ELSE + Log dummy_llm.py failed to start. No log file found at /tmp/dummy_llm.log level=ERROR + END + Fail dummy_llm.py did not create PID file in 2 seconds + END Export Scoped Variables ${RSPAMD_SCOPE} DUMMY_LLM_PROC=${result} Dummy Llm Teardown diff --git a/test/functional/util/dummy_llm.py b/test/functional/util/dummy_llm.py old mode 100644 new mode 100755 index 9ee0f17726..758cbdca08 --- a/test/functional/util/dummy_llm.py +++ b/test/functional/util/dummy_llm.py @@ -56,16 +56,25 @@ class EmbeddingHandler(BaseHTTPRequestHandler): if __name__ == "__main__": - alen = len(sys.argv) - if alen > 1: - port = int(sys.argv[1]) - else: - port = 18080 - server = HTTPServer(("127.0.0.1", port), EmbeddingHandler) - dummy_killer.write_pid(PID) + import traceback try: + alen = len(sys.argv) + if alen > 1: + port = int(sys.argv[1]) + else: + port = 18080 + print(f"dummy_llm.py: Starting server on 127.0.0.1:{port}", file=sys.stderr) + server = HTTPServer(("127.0.0.1", port), EmbeddingHandler) + dummy_killer.write_pid(PID) + print(f"dummy_llm.py: PID file written to {PID}", file=sys.stderr) + print(f"dummy_llm.py: Server started successfully", file=sys.stderr) server.serve_forever() except KeyboardInterrupt: pass + except Exception as e: + print(f"dummy_llm.py: FATAL ERROR: {type(e).__name__}: {e}", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + sys.exit(1) finally: - server.server_close() + if 'server' in dir(): + server.server_close()