From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Thu, 11 Dec 2025 12:24:53 +0000 (+0000)
Subject: [Minor] Clean up code style and fix Lua portability
X-Git-Tag: 3.14.3~40^2~2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=061e5cbb8fd66c594ad60e8d647dcf969a7bfa68;p=thirdparty%2Frspamd.git

[Minor] Clean up code style and fix Lua portability
---

diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index f647dfb981..550465c1a0 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -384,7 +384,6 @@ local function gen_text_grammar()
     local total_odd = math.ceil(total_checked / 2)
     local total_even = math.floor(total_checked / 2)
 
-    -- Correction for disjoint ranges if len > 32
     if len > 32 then
         total_odd = 16
         total_even = 16
@@ -403,7 +402,6 @@ local function gen_text_grammar()
     if charset and rspamd_util.to_utf8 then
        local conv = rspamd_util.to_utf8(s, charset)
        if conv then
-          -- Check for control characters to avoid garbage
           local garbage_limit = 0
           local clen = #conv
           for i = 1, clen do
@@ -414,7 +412,6 @@ local function gen_text_grammar()
           end
 
           if garbage_limit > 0 then
-             -- Treat as garbage
              return ''
           end
 
@@ -436,11 +433,8 @@ local function gen_text_grammar()
       for _, chunk in ipairs(t) do
         if type(chunk) == 'string' then
           table.insert(tres, chunk)
-        elseif type(chunk) == 'number' then
-          -- Heuristic: if number is < -200, append space
-          if chunk < -200 then
-            table.insert(tres, ' ')
-          end
+        elseif type(chunk) == 'number' and chunk < -200 then
+          table.insert(tres, ' ')
         end
       end
       res = table.concat(tres)
@@ -458,7 +452,6 @@ local function gen_text_grammar()
   local function nary_op_handler(...)
     local args = { ... }
     local op = args[#args]
-    -- local t = args[#args - 1] -- The table of numbers
 
     if op == 'Tm' then
       return '\n'
@@ -470,7 +463,7 @@ local function gen_text_grammar()
   local function ternary_op_handler(...)
     local args = { ... }
     local op = args[#args]
-    local a2 = args[#args - 2] -- Second to last argument (ty)
+    local a2 = args[#args - 2]
 
     if (op == 'Td' or op == 'TD') and type(a2) == 'number' and a2 ~= 0 then
       return '\n'
@@ -599,7 +592,6 @@ local function maybe_apply_filter(dict, data, pdf, task)
     if type(filt) == 'string' then
       filts = { filt }
     elseif type(filt) == 'table' then
-      -- Array of filters
       filts = filt
     end
 
@@ -608,11 +600,8 @@ local function maybe_apply_filter(dict, data, pdf, task)
 
       if type(decode_params) == 'table' then
         if decode_params.Predictor then
-          -- We can handle Predictor 1 (No prediction) or maybe others in future
           local predictor = tonumber(decode_params.Predictor) or 1
           if predictor > 1 then
-            -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction
-            -- But without implementation, better to return nil to avoid garbage
             return nil, 'predictor exists: ' .. tostring(predictor)
           end
         end
@@ -625,7 +614,6 @@ local function maybe_apply_filter(dict, data, pdf, task)
       if next_uncompressed then
         uncompressed = next_uncompressed
       else
-        -- If chain fails, we stop
         return nil, 'filter failed: ' .. tostring(f)
       end
     end
@@ -641,8 +629,6 @@ local function maybe_extract_object_stream(obj, pdf, task)
     return nil
   end
   local dict = obj.dict
-
-  -- Determine length: try /Length first, fallback to parsed length
   local len = obj.stream.len
   local decl_len = maybe_dereference_object(dict.Length, pdf, task)
 
@@ -1343,7 +1329,6 @@ local function search_text(task, pdf, mpart)
             end
 
             bl.data = tobj.uncompressed:span(bl.start, bl.len)
-            -- Only log preview of extracted text to avoid verbose logs
             if bl.len <= 256 then
               lua_util.debugm(N, task, 'extracted text from object %s:%s: %s',
                   tobj.major, tobj.minor, bl.data)
@@ -1363,9 +1348,7 @@ local function search_text(task, pdf, mpart)
                 for _, chunk in ipairs(obj_or_err) do
                   text[#text + 1] = chunk
                 end
-                -- Add newline after each block to separate lines
                 text[#text + 1] = '\n'
-                -- text[#text + 1] = obj_or_err
                 lua_util.debugm(N, task, 'attached %s from content object %s:%s to %s:%s',
                     obj_or_err, tobj.major, tobj.minor, obj.major, obj.minor)
               else
@@ -1384,18 +1367,16 @@ local function search_text(task, pdf, mpart)
           if type(chunk) == 'userdata' then
             text[i] = tostring(chunk)
           elseif type(chunk) == 'table' then
-            -- Iterative flatten to avoid stack overflow with deeply nested tables
             local function flatten(t)
               local res = {}
               local stack = { { tbl = t, idx = 1 } }
-              local max_depth = 100 -- Limit depth to prevent infinite loops
+              local max_depth = 100
 
               while #stack > 0 and #stack <= max_depth do
                 local frame = stack[#stack]
                 local tbl, idx = frame.tbl, frame.idx
 
                 if idx > #tbl then
-                  -- Done with this table, pop frame
                   stack[#stack] = nil
                 else
                   local v = tbl[idx]
@@ -1404,7 +1385,6 @@ local function search_text(task, pdf, mpart)
                   if type(v) == 'userdata' then
                     res[#res + 1] = tostring(v)
                   elseif type(v) == 'table' then
-                    -- Push new frame for nested table
                     stack[#stack + 1] = { tbl = v, idx = 1 }
                   elseif v ~= nil then
                     res[#res + 1] = tostring(v)
@@ -1425,19 +1405,17 @@ local function search_text(task, pdf, mpart)
       end
     end
   end
-  -- Aggregate and inject once
+
   if task.inject_part then
     local all_text = {}
 
     for _, obj in ipairs(pdf.objects) do
       if obj.text and obj.text:len() > 0 then
-        -- Keep as rspamd_text, don't convert to string
         table.insert(all_text, obj.text)
       end
     end
 
     if #all_text > 0 then
-      -- Pass table of rspamd_text directly - will be efficiently merged in C
       task:inject_part('text', all_text, mpart)
     end
   end
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 3d9e0d0849..ad2aaf21c3 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1119,19 +1119,14 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task,
 											   struct rspamd_mime_text_part *text_part,
 											   uint16_t *cur_url_order)
 {
-	/* Process plain text (no HTML support for injected parts yet) */
 	if (!rspamd_message_process_plain_text_part(task, text_part)) {
 		return;
 	}
 
-	/* Normalize text */
 	rspamd_normalize_text_part(task, text_part);
-
-	/* Extract URLs - always use FIND_ALL for injected parts (plain text) */
 	rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
 							RSPAMD_URL_FIND_ALL);
 
-	/* Add destructor for exceptions list (created by normalize and URL extraction) */
 	if (text_part->exceptions) {
 		text_part->exceptions = g_list_sort(text_part->exceptions,
 											exceptions_compare_func);
@@ -1140,7 +1135,6 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task,
 									  text_part->exceptions);
 	}
 
-	/* Create words for Bayes/stats */
 	rspamd_mime_part_create_words(task, text_part);
 }
 
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 89ef439cfc..375e354f81 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2802,7 +2802,7 @@ lua_task_inject_part(lua_State *L)
 	if (lua_type(L, 3) == LUA_TTABLE) {
 		is_table = TRUE;
 		/* Calculate total length first and validate all entries */
-		int table_len = lua_objlen(L, 3);
+		int table_len = rspamd_lua_table_size(L, 3);
 		if (table_len <= 0) {
 			return luaL_error(L, "empty table provided");
 		}
@@ -2843,12 +2843,10 @@ lua_task_inject_part(lua_State *L)
 			part->part_type = RSPAMD_MIME_PART_TEXT;
 			part->flags |= RSPAMD_MIME_PART_COMPUTED;
 
-			/* Set parent part if provided */
 			if (original_part) {
 				part->parent_part = original_part;
 			}
 
-			/* Basic headers setup */
 			part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct));
 
 			part->ct->type.begin = "text";
@@ -2859,12 +2857,10 @@ lua_task_inject_part(lua_State *L)
 			part->ct->charset.begin = "utf-8";
 			part->ct->charset.len = 5;
 
-			/* Content setup - merge table or copy single content */
 			part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1);
 			part->parsed_data.len = content_len;
 
 			if (is_table) {
-				/* Efficiently merge all text chunks */
 				char *dst = (char *) part->parsed_data.begin;
 				lua_pushnil(L);
 				while (lua_next(L, 3) != 0) {
@@ -2878,14 +2874,12 @@ lua_task_inject_part(lua_State *L)
 				*dst = '\0';
 			}
 			else {
-				/* Single content */
 				memcpy((char *) part->parsed_data.begin, content, content_len);
 				((char *) part->parsed_data.begin)[content_len] = '\0';
 			}
 
 			part->raw_data = part->parsed_data;
 
-			/* Text part specific setup */
 			txt_part = rspamd_mempool_alloc0(task->task_pool, sizeof(*txt_part));
 			txt_part->mime_part = part;
 			txt_part->raw.begin = part->parsed_data.begin;
@@ -2895,14 +2889,11 @@ lua_task_inject_part(lua_State *L)
 			txt_part->utf_stripped_text = (UText) UTEXT_INITIALIZER;
 			txt_part->real_charset = "utf-8";
 
-			/* Add to message */
 			part->specific.txt = txt_part;
 			g_ptr_array_add(task->message->parts, part);
 			g_ptr_array_add(task->message->text_parts, txt_part);
 
-			/* Process injected text part fully (URLs, words, normalization) */
 			if (task->cfg && task->message) {
-				/* Use high order number to ensure injected URLs are after original ones */
 				uint16_t cur_url_order = 10000;
 				rspamd_message_process_injected_text_part(task, txt_part, &cur_url_order);
 			}
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 5bcdf410ff..24fcd7233d 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1785,7 +1785,6 @@ lua_util_to_utf8(lua_State *L)
 		return luaL_error(L, "invalid arguments");
 	}
 
-	/* Prevent integer overflow in buffer size calculation */
 	if (t->len > (G_MAXINT32 / 2 - 16)) {
 		lua_pushnil(L);
 		return 1;
@@ -1796,7 +1795,6 @@ lua_util_to_utf8(lua_State *L)
 
 	dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err);
 
-	/* Check for negative length (indicates error) or buffer overflow */
 	if (dest_len < 0) {
 		g_free(dest);
 		lua_pushnil(L);
@@ -1810,7 +1808,6 @@ lua_util_to_utf8(lua_State *L)
 		dest = g_malloc(dest_cap);
 		dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err);
 
-		/* Check again after retry */
 		if (dest_len < 0) {
 			g_free(dest);
 			lua_pushnil(L);