From: Vsevolod Stakhov Date: Thu, 11 Dec 2025 12:24:53 +0000 (+0000) Subject: [Minor] Clean up code style and fix Lua portability X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=061e5cbb8fd66c594ad60e8d647dcf969a7bfa68;p=thirdparty%2Frspamd.git [Minor] Clean up code style and fix Lua portability --- diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index f647dfb981..550465c1a0 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -384,7 +384,6 @@ local function gen_text_grammar() local total_odd = math.ceil(total_checked / 2) local total_even = math.floor(total_checked / 2) - -- Correction for disjoint ranges if len > 32 if len > 32 then total_odd = 16 total_even = 16 @@ -403,7 +402,6 @@ local function gen_text_grammar() if charset and rspamd_util.to_utf8 then local conv = rspamd_util.to_utf8(s, charset) if conv then - -- Check for control characters to avoid garbage local garbage_limit = 0 local clen = #conv for i = 1, clen do @@ -414,7 +412,6 @@ local function gen_text_grammar() end if garbage_limit > 0 then - -- Treat as garbage return '' end @@ -436,11 +433,8 @@ local function gen_text_grammar() for _, chunk in ipairs(t) do if type(chunk) == 'string' then table.insert(tres, chunk) - elseif type(chunk) == 'number' then - -- Heuristic: if number is < -200, append space - if chunk < -200 then - table.insert(tres, ' ') - end + elseif type(chunk) == 'number' and chunk < -200 then + table.insert(tres, ' ') end end res = table.concat(tres) @@ -458,7 +452,6 @@ local function gen_text_grammar() local function nary_op_handler(...) local args = { ... } local op = args[#args] - -- local t = args[#args - 1] -- The table of numbers if op == 'Tm' then return '\n' @@ -470,7 +463,7 @@ local function gen_text_grammar() local function ternary_op_handler(...) local args = { ... } local op = args[#args] - local a2 = args[#args - 2] -- Second to last argument (ty) + local a2 = args[#args - 2] if (op == 'Td' or op == 'TD') and type(a2) == 'number' and a2 ~= 0 then return '\n' @@ -599,7 +592,6 @@ local function maybe_apply_filter(dict, data, pdf, task) if type(filt) == 'string' then filts = { filt } elseif type(filt) == 'table' then - -- Array of filters filts = filt end @@ -608,11 +600,8 @@ local function maybe_apply_filter(dict, data, pdf, task) if type(decode_params) == 'table' then if decode_params.Predictor then - -- We can handle Predictor 1 (No prediction) or maybe others in future local predictor = tonumber(decode_params.Predictor) or 1 if predictor > 1 then - -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction - -- But without implementation, better to return nil to avoid garbage return nil, 'predictor exists: ' .. tostring(predictor) end end @@ -625,7 +614,6 @@ local function maybe_apply_filter(dict, data, pdf, task) if next_uncompressed then uncompressed = next_uncompressed else - -- If chain fails, we stop return nil, 'filter failed: ' .. tostring(f) end end @@ -641,8 +629,6 @@ local function maybe_extract_object_stream(obj, pdf, task) return nil end local dict = obj.dict - - -- Determine length: try /Length first, fallback to parsed length local len = obj.stream.len local decl_len = maybe_dereference_object(dict.Length, pdf, task) @@ -1343,7 +1329,6 @@ local function search_text(task, pdf, mpart) end bl.data = tobj.uncompressed:span(bl.start, bl.len) - -- Only log preview of extracted text to avoid verbose logs if bl.len <= 256 then lua_util.debugm(N, task, 'extracted text from object %s:%s: %s', tobj.major, tobj.minor, bl.data) @@ -1363,9 +1348,7 @@ local function search_text(task, pdf, mpart) for _, chunk in ipairs(obj_or_err) do text[#text + 1] = chunk end - -- Add newline after each block to separate lines text[#text + 1] = '\n' - -- text[#text + 1] = obj_or_err lua_util.debugm(N, task, 'attached %s from content object %s:%s to %s:%s', obj_or_err, tobj.major, tobj.minor, obj.major, obj.minor) else @@ -1384,18 +1367,16 @@ local function search_text(task, pdf, mpart) if type(chunk) == 'userdata' then text[i] = tostring(chunk) elseif type(chunk) == 'table' then - -- Iterative flatten to avoid stack overflow with deeply nested tables local function flatten(t) local res = {} local stack = { { tbl = t, idx = 1 } } - local max_depth = 100 -- Limit depth to prevent infinite loops + local max_depth = 100 while #stack > 0 and #stack <= max_depth do local frame = stack[#stack] local tbl, idx = frame.tbl, frame.idx if idx > #tbl then - -- Done with this table, pop frame stack[#stack] = nil else local v = tbl[idx] @@ -1404,7 +1385,6 @@ local function search_text(task, pdf, mpart) if type(v) == 'userdata' then res[#res + 1] = tostring(v) elseif type(v) == 'table' then - -- Push new frame for nested table stack[#stack + 1] = { tbl = v, idx = 1 } elseif v ~= nil then res[#res + 1] = tostring(v) @@ -1425,19 +1405,17 @@ local function search_text(task, pdf, mpart) end end end - -- Aggregate and inject once + if task.inject_part then local all_text = {} for _, obj in ipairs(pdf.objects) do if obj.text and obj.text:len() > 0 then - -- Keep as rspamd_text, don't convert to string table.insert(all_text, obj.text) end end if #all_text > 0 then - -- Pass table of rspamd_text directly - will be efficiently merged in C task:inject_part('text', all_text, mpart) end end diff --git a/src/libmime/message.c b/src/libmime/message.c index 3d9e0d0849..ad2aaf21c3 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1119,19 +1119,14 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task, struct rspamd_mime_text_part *text_part, uint16_t *cur_url_order) { - /* Process plain text (no HTML support for injected parts yet) */ if (!rspamd_message_process_plain_text_part(task, text_part)) { return; } - /* Normalize text */ rspamd_normalize_text_part(task, text_part); - - /* Extract URLs - always use FIND_ALL for injected parts (plain text) */ rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order, RSPAMD_URL_FIND_ALL); - /* Add destructor for exceptions list (created by normalize and URL extraction) */ if (text_part->exceptions) { text_part->exceptions = g_list_sort(text_part->exceptions, exceptions_compare_func); @@ -1140,7 +1135,6 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task, text_part->exceptions); } - /* Create words for Bayes/stats */ rspamd_mime_part_create_words(task, text_part); } diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 89ef439cfc..375e354f81 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -2802,7 +2802,7 @@ lua_task_inject_part(lua_State *L) if (lua_type(L, 3) == LUA_TTABLE) { is_table = TRUE; /* Calculate total length first and validate all entries */ - int table_len = lua_objlen(L, 3); + int table_len = rspamd_lua_table_size(L, 3); if (table_len <= 0) { return luaL_error(L, "empty table provided"); } @@ -2843,12 +2843,10 @@ lua_task_inject_part(lua_State *L) part->part_type = RSPAMD_MIME_PART_TEXT; part->flags |= RSPAMD_MIME_PART_COMPUTED; - /* Set parent part if provided */ if (original_part) { part->parent_part = original_part; } - /* Basic headers setup */ part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct)); part->ct->type.begin = "text"; @@ -2859,12 +2857,10 @@ lua_task_inject_part(lua_State *L) part->ct->charset.begin = "utf-8"; part->ct->charset.len = 5; - /* Content setup - merge table or copy single content */ part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1); part->parsed_data.len = content_len; if (is_table) { - /* Efficiently merge all text chunks */ char *dst = (char *) part->parsed_data.begin; lua_pushnil(L); while (lua_next(L, 3) != 0) { @@ -2878,14 +2874,12 @@ lua_task_inject_part(lua_State *L) *dst = '\0'; } else { - /* Single content */ memcpy((char *) part->parsed_data.begin, content, content_len); ((char *) part->parsed_data.begin)[content_len] = '\0'; } part->raw_data = part->parsed_data; - /* Text part specific setup */ txt_part = rspamd_mempool_alloc0(task->task_pool, sizeof(*txt_part)); txt_part->mime_part = part; txt_part->raw.begin = part->parsed_data.begin; @@ -2895,14 +2889,11 @@ lua_task_inject_part(lua_State *L) txt_part->utf_stripped_text = (UText) UTEXT_INITIALIZER; txt_part->real_charset = "utf-8"; - /* Add to message */ part->specific.txt = txt_part; g_ptr_array_add(task->message->parts, part); g_ptr_array_add(task->message->text_parts, txt_part); - /* Process injected text part fully (URLs, words, normalization) */ if (task->cfg && task->message) { - /* Use high order number to ensure injected URLs are after original ones */ uint16_t cur_url_order = 10000; rspamd_message_process_injected_text_part(task, txt_part, &cur_url_order); } diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 5bcdf410ff..24fcd7233d 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1785,7 +1785,6 @@ lua_util_to_utf8(lua_State *L) return luaL_error(L, "invalid arguments"); } - /* Prevent integer overflow in buffer size calculation */ if (t->len > (G_MAXINT32 / 2 - 16)) { lua_pushnil(L); return 1; @@ -1796,7 +1795,6 @@ lua_util_to_utf8(lua_State *L) dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err); - /* Check for negative length (indicates error) or buffer overflow */ if (dest_len < 0) { g_free(dest); lua_pushnil(L); @@ -1810,7 +1808,6 @@ lua_util_to_utf8(lua_State *L) dest = g_malloc(dest_cap); dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err); - /* Check again after retry */ if (dest_len < 0) { g_free(dest); lua_pushnil(L);