[Minor] Clean up code style and fix Lua portability

author Vsevolod Stakhov <vsevolod@rspamd.com>

Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua

index f647dfb98195736030149c22cea94b70bb1a747b..550465c1a0009c3d6135554d919b028b51778b8f 100644 (file)
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -384,7 +384,6 @@ local function gen_text_grammar()
      local total_odd = math.ceil(total_checked / 2)
      local total_even = math.floor(total_checked / 2)
  
-    -- Correction for disjoint ranges if len > 32
      if len > 32 then
          total_odd = 16
          total_even = 16
@@ -403,7 +402,6 @@ local function gen_text_grammar()
      if charset and rspamd_util.to_utf8 then
         local conv = rspamd_util.to_utf8(s, charset)
         if conv then
-          -- Check for control characters to avoid garbage
            local garbage_limit = 0
            local clen = #conv
            for i = 1, clen do
@@ -414,7 +412,6 @@ local function gen_text_grammar()
            end
  
            if garbage_limit > 0 then
-             -- Treat as garbage
               return ''
            end
  
@@ -436,11 +433,8 @@ local function gen_text_grammar()
        for _, chunk in ipairs(t) do
          if type(chunk) == 'string' then
            table.insert(tres, chunk)
-        elseif type(chunk) == 'number' then
-          -- Heuristic: if number is < -200, append space
-          if chunk < -200 then
-            table.insert(tres, ' ')
-          end
+        elseif type(chunk) == 'number' and chunk < -200 then
+          table.insert(tres, ' ')
          end
        end
        res = table.concat(tres)
@@ -458,7 +452,6 @@ local function gen_text_grammar()
    local function nary_op_handler(...)
      local args = { ... }
      local op = args[#args]
-    -- local t = args[#args - 1] -- The table of numbers
  
      if op == 'Tm' then
        return '\n'
@@ -470,7 +463,7 @@ local function gen_text_grammar()
    local function ternary_op_handler(...)
      local args = { ... }
      local op = args[#args]
-    local a2 = args[#args - 2] -- Second to last argument (ty)
+    local a2 = args[#args - 2]
  
      if (op == 'Td' or op == 'TD') and type(a2) == 'number' and a2 ~= 0 then
        return '\n'
@@ -599,7 +592,6 @@ local function maybe_apply_filter(dict, data, pdf, task)
      if type(filt) == 'string' then
        filts = { filt }
      elseif type(filt) == 'table' then
-      -- Array of filters
        filts = filt
      end
  
@@ -608,11 +600,8 @@ local function maybe_apply_filter(dict, data, pdf, task)
  
        if type(decode_params) == 'table' then
          if decode_params.Predictor then
-          -- We can handle Predictor 1 (No prediction) or maybe others in future
            local predictor = tonumber(decode_params.Predictor) or 1
            if predictor > 1 then
-            -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction
-            -- But without implementation, better to return nil to avoid garbage
              return nil, 'predictor exists: ' .. tostring(predictor)
            end
          end
@@ -625,7 +614,6 @@ local function maybe_apply_filter(dict, data, pdf, task)
        if next_uncompressed then
          uncompressed = next_uncompressed
        else
-        -- If chain fails, we stop
          return nil, 'filter failed: ' .. tostring(f)
        end
      end
@@ -641,8 +629,6 @@ local function maybe_extract_object_stream(obj, pdf, task)
      return nil
    end
    local dict = obj.dict
-
-  -- Determine length: try /Length first, fallback to parsed length
    local len = obj.stream.len
    local decl_len = maybe_dereference_object(dict.Length, pdf, task)
  
@@ -1343,7 +1329,6 @@ local function search_text(task, pdf, mpart)
              end
  
              bl.data = tobj.uncompressed:span(bl.start, bl.len)
-            -- Only log preview of extracted text to avoid verbose logs
              if bl.len <= 256 then
                lua_util.debugm(N, task, 'extracted text from object %s:%s: %s',
                    tobj.major, tobj.minor, bl.data)
@@ -1363,9 +1348,7 @@ local function search_text(task, pdf, mpart)
                  for _, chunk in ipairs(obj_or_err) do
                    text[#text + 1] = chunk
                  end
-                -- Add newline after each block to separate lines
                  text[#text + 1] = '\n'
-                -- text[#text + 1] = obj_or_err
                  lua_util.debugm(N, task, 'attached %s from content object %s:%s to %s:%s',
                      obj_or_err, tobj.major, tobj.minor, obj.major, obj.minor)
                else
@@ -1384,18 +1367,16 @@ local function search_text(task, pdf, mpart)
            if type(chunk) == 'userdata' then
              text[i] = tostring(chunk)
            elseif type(chunk) == 'table' then
-            -- Iterative flatten to avoid stack overflow with deeply nested tables
              local function flatten(t)
                local res = {}
                local stack = { { tbl = t, idx = 1 } }
-              local max_depth = 100 -- Limit depth to prevent infinite loops
+              local max_depth = 100
  
                while #stack > 0 and #stack <= max_depth do
                  local frame = stack[#stack]
                  local tbl, idx = frame.tbl, frame.idx
  
                  if idx > #tbl then
-                  -- Done with this table, pop frame
                    stack[#stack] = nil
                  else
                    local v = tbl[idx]
@@ -1404,7 +1385,6 @@ local function search_text(task, pdf, mpart)
                    if type(v) == 'userdata' then
                      res[#res + 1] = tostring(v)
                    elseif type(v) == 'table' then
-                    -- Push new frame for nested table
                      stack[#stack + 1] = { tbl = v, idx = 1 }
                    elseif v ~= nil then
                      res[#res + 1] = tostring(v)
@@ -1425,19 +1405,17 @@ local function search_text(task, pdf, mpart)
        end
      end
    end
-  -- Aggregate and inject once
+
    if task.inject_part then
      local all_text = {}
  
      for _, obj in ipairs(pdf.objects) do
        if obj.text and obj.text:len() > 0 then
-        -- Keep as rspamd_text, don't convert to string
          table.insert(all_text, obj.text)
        end
      end
  
      if #all_text > 0 then
-      -- Pass table of rspamd_text directly - will be efficiently merged in C
        task:inject_part('text', all_text, mpart)
      end
    end
diff --git a/src/libmime/message.c b/src/libmime/message.c

index 3d9e0d0849b264acfc6491a2019e15d6248e1b7d..ad2aaf21c326d5a5ed49dfdc9065eeffc0c3e403 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1119,19 +1119,14 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task,
                                                                                            struct rspamd_mime_text_part *text_part,
                                                                                            uint16_t *cur_url_order)
  {
-       /* Process plain text (no HTML support for injected parts yet) */
         if (!rspamd_message_process_plain_text_part(task, text_part)) {
                 return;
         }
  
-       /* Normalize text */
         rspamd_normalize_text_part(task, text_part);
-
-       /* Extract URLs - always use FIND_ALL for injected parts (plain text) */
         rspamd_url_text_extract(task->task_pool, task, text_part, cur_url_order,
                                                         RSPAMD_URL_FIND_ALL);
  
-       /* Add destructor for exceptions list (created by normalize and URL extraction) */
         if (text_part->exceptions) {
                 text_part->exceptions = g_list_sort(text_part->exceptions,
                                                                                         exceptions_compare_func);
@@ -1140,7 +1135,6 @@ void rspamd_message_process_injected_text_part(struct rspamd_task *task,
                                                                           text_part->exceptions);
         }
  
-       /* Create words for Bayes/stats */
         rspamd_mime_part_create_words(task, text_part);
  }
  
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c

index 89ef439cfc957fb8744b49da0502d1db390fb650..375e354f81add87e2bf02e6d3f550cd5c7c21654 100644 (file)
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2802,7 +2802,7 @@ lua_task_inject_part(lua_State *L)
         if (lua_type(L, 3) == LUA_TTABLE) {
                 is_table = TRUE;
                 /* Calculate total length first and validate all entries */
-               int table_len = lua_objlen(L, 3);
+               int table_len = rspamd_lua_table_size(L, 3);
                 if (table_len <= 0) {
                         return luaL_error(L, "empty table provided");
                 }
@@ -2843,12 +2843,10 @@ lua_task_inject_part(lua_State *L)
                         part->part_type = RSPAMD_MIME_PART_TEXT;
                         part->flags |= RSPAMD_MIME_PART_COMPUTED;
  
-                       /* Set parent part if provided */
                         if (original_part) {
                                 part->parent_part = original_part;
                         }
  
-                       /* Basic headers setup */
                         part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct));
  
                         part->ct->type.begin = "text";
@@ -2859,12 +2857,10 @@ lua_task_inject_part(lua_State *L)
                         part->ct->charset.begin = "utf-8";
                         part->ct->charset.len = 5;
  
-                       /* Content setup - merge table or copy single content */
                         part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1);
                         part->parsed_data.len = content_len;
  
                         if (is_table) {
-                               /* Efficiently merge all text chunks */
                                 char *dst = (char *) part->parsed_data.begin;
                                 lua_pushnil(L);
                                 while (lua_next(L, 3) != 0) {
@@ -2878,14 +2874,12 @@ lua_task_inject_part(lua_State *L)
                                 *dst = '\0';
                         }
                         else {
-                               /* Single content */
                                 memcpy((char *) part->parsed_data.begin, content, content_len);
                                 ((char *) part->parsed_data.begin)[content_len] = '\0';
                         }
  
                         part->raw_data = part->parsed_data;
  
-                       /* Text part specific setup */
                         txt_part = rspamd_mempool_alloc0(task->task_pool, sizeof(*txt_part));
                         txt_part->mime_part = part;
                         txt_part->raw.begin = part->parsed_data.begin;
@@ -2895,14 +2889,11 @@ lua_task_inject_part(lua_State *L)
                         txt_part->utf_stripped_text = (UText) UTEXT_INITIALIZER;
                         txt_part->real_charset = "utf-8";
  
-                       /* Add to message */
                         part->specific.txt = txt_part;
                         g_ptr_array_add(task->message->parts, part);
                         g_ptr_array_add(task->message->text_parts, txt_part);
  
-                       /* Process injected text part fully (URLs, words, normalization) */
                         if (task->cfg && task->message) {
-                               /* Use high order number to ensure injected URLs are after original ones */
                                 uint16_t cur_url_order = 10000;
                                 rspamd_message_process_injected_text_part(task, txt_part, &cur_url_order);
                         }
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c

index 5bcdf410fff1134e6bbd87e48b694514d3e61e0c..24fcd7233d83618b36cda84d86ba13b0b5fa7b37 100644 (file)
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -1785,7 +1785,6 @@ lua_util_to_utf8(lua_State *L)
                 return luaL_error(L, "invalid arguments");
         }
  
-       /* Prevent integer overflow in buffer size calculation */
         if (t->len > (G_MAXINT32 / 2 - 16)) {
                 lua_pushnil(L);
                 return 1;
@@ -1796,7 +1795,6 @@ lua_util_to_utf8(lua_State *L)
  
         dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err);
  
-       /* Check for negative length (indicates error) or buffer overflow */
         if (dest_len < 0) {
                 g_free(dest);
                 lua_pushnil(L);
@@ -1810,7 +1808,6 @@ lua_util_to_utf8(lua_State *L)
                 dest = g_malloc(dest_cap);
                 dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err);
  
-               /* Check again after retry */
                 if (dest_len < 0) {
                         g_free(dest);
                         lua_pushnil(L);
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Thu, 11 Dec 2025 12:24:53 +0000 (12:24 +0000)
lualib/lua_content/pdf.lua		patch \| blob \| blame \| history
src/libmime/message.c		patch \| blob \| blame \| history
src/lua/lua_task.c		patch \| blob \| blame \| history
src/lua/lua_util.c		patch \| blob \| blame \| history