[Project] Various fixes

author Vsevolod Stakhov <vsevolod@rspamd.com>

Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)

committer Vsevolod Stakhov <vsevolod@rspamd.com>

Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)
author Vsevolod Stakhov <vsevolod@rspamd.com>
Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)
committer Vsevolod Stakhov <vsevolod@rspamd.com>
Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua

index b356407ec47a91be2bee5e117875a45c1f24ecfd..d49b2e699f162e00a04a35f64668511a286848c2 100644 (file)
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -335,7 +335,7 @@ local function gen_text_grammar()
    local gen = generic_grammar_elts()
  
    local function text_op_handler(...)
-    local args = {...}
+    local args = { ... }
      local op = args[#args]
      local t = args[#args - 1]
  
@@ -363,7 +363,7 @@ local function gen_text_grammar()
    end
  
    local function nary_op_handler(...)
-    local args = {...}
+    local args = { ... }
      local op = args[#args]
      -- local t = args[#args - 1] -- The table of numbers
  
@@ -375,7 +375,7 @@ local function gen_text_grammar()
    end
  
    local function ternary_op_handler(...)
-    local args = {...}
+    local args = { ... }
      local op = args[#args]
      local a2 = args[#args - 2] -- Second to last argument (ty)
  
@@ -510,9 +510,9 @@ local function maybe_apply_filter(dict, data, pdf, task)
            -- We can handle Predictor 1 (No prediction) or maybe others in future
            local predictor = tonumber(decode_params.Predictor) or 1
            if predictor > 1 then
-             -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction
-             -- But without implementation, better to return nil to avoid garbage
-             return nil, 'predictor exists: ' .. tostring(predictor)
+            -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction
+            -- But without implementation, better to return nil to avoid garbage
+            return nil, 'predictor exists: ' .. tostring(predictor)
            end
          end
        end
@@ -1139,9 +1139,6 @@ local function postprocess_pdf_objects(task, input, pdf)
  
        if now >= pdf.end_timestamp then
          pdf.timeout_processing = now - pdf.start_timestamp
-
-        io.stderr:write(string.format("DEBUG: Timeout! Start: %f, End: %f, Now: %f\n", pdf.start_timestamp, pdf.end_timestamp, now))
-
          lua_util.debugm(N, task, 'pdf: timeout processing grammars after spending %s seconds, ' ..
              '%s elements processed',
              pdf.timeout_processing, i)
@@ -1210,7 +1207,7 @@ local function offsets_to_blocks(starts, ends, out)
    end
  end
  
-local function search_text(task, pdf)
+local function search_text(task, pdf, mpart)
    for _, obj in ipairs(pdf.objects) do
      if obj.type == 'Page' and obj.contents then
        local text = {}
@@ -1254,7 +1251,7 @@ local function search_text(task, pdf)
  
                if ret then
                  if #obj_or_err == 0 then
-                   lua_util.debugm(N, task, 'empty text match from block: %s', bl.data)
+                  lua_util.debugm(N, task, 'empty text match from block: %s', bl.data)
                  end
                  for _, chunk in ipairs(obj_or_err) do
                    text[#text + 1] = chunk
@@ -1308,20 +1305,17 @@ local function search_text(task, pdf)
    -- Aggregate and inject once
    if task.inject_part then
      local all_text = {}
+
      for _, obj in ipairs(pdf.objects) do
-      if obj.text then
-        table.insert(all_text, tostring(obj.text))
+      if obj.text and obj.text:len() > 0 then
+        -- Keep as rspamd_text, don't convert to string
+        table.insert(all_text, obj.text)
        end
      end
  
      if #all_text > 0 then
-      local final_text = table.concat(all_text, "\n")
-      -- Only inject if it contains non-whitespace characters
-      if final_text:match("%S") then
-        task:inject_part('text', final_text)
-      else
-        lua_util.debugm(N, task, 'skipping injection of empty/whitespace-only text')
-      end
+      -- Pass table of rspamd_text directly - will be efficiently merged in C
+      task:inject_part('text', all_text, mpart)
      end
    end
  end
@@ -1361,8 +1355,6 @@ local function search_urls(task, pdf, mpart)
  end
  
  local function process_pdf(input, mpart, task)
-  -- io.stderr:write("DEBUG: process_pdf called, input len: " .. tostring(#input) .. "\n")
-
    if not config.enabled then
      -- Skip processing
      return {}
@@ -1371,7 +1363,6 @@ local function process_pdf(input, mpart, task)
    local matches = pdf_trie:match(input)
  
    if matches then
-    -- io.stderr:write("DEBUG: PDF matches found\n")
      local start_ts = rspamd_util.get_ticks()
      -- Temp object used to share data between pdf extraction methods
      local pdf_object = {
@@ -1423,7 +1414,7 @@ local function process_pdf(input, mpart, task)
        postprocess_pdf_objects(task, input, pdf_object)
        pdf_output.objects = pdf_object.objects
        if config.text_extraction then
-        search_text(task, pdf_object, pdf_output)
+        search_text(task, pdf_object, mpart)
        end
        if config.url_extraction then
          search_urls(task, pdf_object, mpart, pdf_output)
diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua

index 9ad7238dd3cc4729d28bae191da0c9b920dd92e2..b55e4ae0368d53a81fdb274d831adecaf37000a4 100644 (file)
--- a/lualib/rspamadm/mime.lua
+++ b/lualib/rspamadm/mime.lua
@@ -35,96 +35,96 @@ local parser = argparse()
      :require_command(true)
  
  parser:option "-c --config"
-    :description "Path to config file"
-    :argname("<cfg>")
-    :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+      :description "Path to config file"
+      :argname("<cfg>")
+      :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
  parser:mutex(
-  parser:flag "-j --json"
-  :description "JSON output",
-  parser:flag "-U --ucl"
-  :description "UCL output",
-  parser:flag "-M --messagepack"
-  :description "MessagePack output"
+    parser:flag "-j --json"
+          :description "JSON output",
+    parser:flag "-U --ucl"
+          :description "UCL output",
+    parser:flag "-M --messagepack"
+          :description "MessagePack output"
  )
  parser:flag "-C --compact"
-    :description "Use compact format"
+      :description "Use compact format"
  parser:flag "--no-file"
-    :description "Do not print filename"
+      :description "Do not print filename"
  
  -- Extract subcommand
  local extract = parser:command "extract ex e"
-    :description "Extracts data from MIME messages"
+                      :description "Extracts data from MIME messages"
  extract:argument "file"
-    :description "File to process"
-    :argname "<file>"
-    :args "+"
+       :description "File to process"
+       :argname "<file>"
+       :args "+"
  
  extract:flag "-t --text"
-    :description "Extracts plain text data from a message"
+       :description "Extracts plain text data from a message"
  extract:flag "-r --raw"
-    :description "Load as raw file"
+       :description "Load as raw file"
  extract:flag "-H --html"
-    :description "Extracts htm data from a message"
+       :description "Extracts htm data from a message"
  extract:option "-o --output"
-    :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
-    :argname("<type>")
-    :convert {
-      raw = "raw",
-      content = "content",
-      oneline = "content_oneline",
-      decoded = "raw_parsed",
-      decoded_utf = "raw_utf"
-    }
-    :default "content"
+       :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
+       :argname("<type>")
+       :convert {
+  raw = "raw",
+  content = "content",
+  oneline = "content_oneline",
+  decoded = "raw_parsed",
+  decoded_utf = "raw_utf"
+}
+       :default "content"
  extract:flag "-w --words"
-    :description "Extracts words"
+       :description "Extracts words"
  extract:flag "-p --part"
-    :description "Show part info"
+       :description "Show part info"
  extract:flag "-s --structure"
-    :description "Show structure info (e.g. HTML tags)"
+       :description "Show structure info (e.g. HTML tags)"
  extract:flag "-i --invisible"
-    :description "Show invisible content for HTML parts"
+       :description "Show invisible content for HTML parts"
  extract:option "-F --words-format"
-    :description "Words format ('stem', 'norm', 'raw', 'full')"
-    :argname("<type>")
-    :convert {
-      stem = "stem",
-      norm = "norm",
-      raw = "raw",
-      full = "full",
-    }
-    :default "stem"
+       :description "Words format ('stem', 'norm', 'raw', 'full')"
+       :argname("<type>")
+       :convert {
+  stem = "stem",
+  norm = "norm",
+  raw = "raw",
+  full = "full",
+}
+       :default "stem"
  
  local stat = parser:command "stat st s"
-    :description "Extracts statistical data from MIME messages"
+                   :description "Extracts statistical data from MIME messages"
  stat:argument "file"
      :description "File to process"
      :argname "<file>"
      :args "+"
  stat:mutex(
-  stat:flag "-m --meta"
-  :description "Lua metatokens",
-  stat:flag "-b --bayes"
-  :description "Bayes tokens",
-  stat:flag "-F --fuzzy"
-  :description "Fuzzy hashes"
+    stat:flag "-m --meta"
+        :description "Lua metatokens",
+    stat:flag "-b --bayes"
+        :description "Bayes tokens",
+    stat:flag "-F --fuzzy"
+        :description "Fuzzy hashes"
  )
  stat:flag "-s --shingles"
      :description "Show shingles for fuzzy hashes"
  
  local urls = parser:command "urls url u"
-    :description "Extracts URLs from MIME messages"
+                   :description "Extracts URLs from MIME messages"
  urls:argument "file"
      :description "File to process"
      :argname "<file>"
      :args "+"
  urls:mutex(
-  urls:flag "-t --tld"
-  :description "Get TLDs only",
-  urls:flag "-H --host"
-  :description "Get hosts only",
-  urls:flag "-f --full"
-  :description "Show piecewise urls as processed by Rspamd"
+    urls:flag "-t --tld"
+        :description "Get TLDs only",
+    urls:flag "-H --host"
+        :description "Get hosts only",
+    urls:flag "-f --full"
+        :description "Show piecewise urls as processed by Rspamd"
  )
  
  urls:flag "-u --unique"
@@ -137,75 +137,75 @@ urls:flag "-r --reverse"
      :description "Reverse sort order"
  
  local modify = parser:command "modify mod m"
-    :description "Modifies MIME message"
+                     :description "Modifies MIME message"
  modify:argument "file"
-    :description "File to process"
-    :argname "<file>"
-    :args "+"
+      :description "File to process"
+      :argname "<file>"
+      :args "+"
  
  modify:option "-a --add-header"
-    :description "Adds specific header"
-    :argname "<header=value>"
-    :count "*"
+      :description "Adds specific header"
+      :argname "<header=value>"
+      :count "*"
  modify:option "-r --remove-header"
-    :description "Removes specific header (all occurrences)"
-    :argname "<header>"
-    :count "*"
+      :description "Removes specific header (all occurrences)"
+      :argname "<header>"
+      :count "*"
  modify:option "-R --rewrite-header"
-    :description "Rewrites specific header, uses Lua string.format pattern"
-    :argname "<header=pattern>"
-    :count "*"
+      :description "Rewrites specific header, uses Lua string.format pattern"
+      :argname "<header=pattern>"
+      :count "*"
  modify:option "-t --text-footer"
-    :description "Adds footer to text/plain parts from a specific file"
-    :argname "<file>"
+      :description "Adds footer to text/plain parts from a specific file"
+      :argname "<file>"
  modify:option "-H --html-footer"
-    :description "Adds footer to text/html parts from a specific file"
-    :argname "<file>"
+      :description "Adds footer to text/html parts from a specific file"
+      :argname "<file>"
  
  local strip = parser:command "strip"
-    :description "Strip attachments from a message"
+                    :description "Strip attachments from a message"
  strip:argument "file"
-    :description "File to process"
-    :argname "<file>"
-    :args "+"
+     :description "File to process"
+     :argname "<file>"
+     :args "+"
  strip:flag "-i --keep-images"
-    :description "Keep images"
+     :description "Keep images"
  strip:option "--min-text-size"
-    :description "Minimal text size to keep"
-    :argname "<size>"
-    :convert(tonumber)
-    :default(0)
+     :description "Minimal text size to keep"
+     :argname "<size>"
+     :convert(tonumber)
+     :default(0)
  strip:option "--max-text-size"
-    :description "Max text size to keep"
-    :argname "<size>"
-    :convert(tonumber)
-    :default(math.huge)
+     :description "Max text size to keep"
+     :argname "<size>"
+     :convert(tonumber)
+     :default(math.huge)
  
  local anonymize = parser:command "anonymize"
-    :description "Try to remove sensitive information from a message"
+                        :description "Try to remove sensitive information from a message"
  anonymize:argument "file"
-    :description "File to process"
-    :argname "<file>"
-    :args "+"
+         :description "File to process"
+         :argname "<file>"
+         :args "+"
  anonymize:option "--exclude-header -X"
-    :description "Exclude specific headers from anonymization"
-    :argname "<header>"
-    :count "*"
+         :description "Exclude specific headers from anonymization"
+         :argname "<header>"
+         :count "*"
  anonymize:option "--include-header -I"
-    :description "Include specific headers from anonymization"
-    :argname "<header>"
-    :count "*"
+         :description "Include specific headers from anonymization"
+         :argname "<header>"
+         :count "*"
  anonymize:flag "--gpt"
-    :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
+         :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
  anonymize:option "--model"
-    :description "Model to use for anonymization"
-    :argname "<model>"
+         :description "Model to use for anonymization"
+         :argname "<model>"
  anonymize:option "--prompt"
-    :description "Prompt to use for anonymization"
-    :argname "<prompt>"
+         :description "Prompt to use for anonymization"
+         :argname "<prompt>"
  
  local sign = parser:command "sign"
-    :description "Performs DKIM signing"
+                   :description "Performs DKIM signing"
  sign:argument "file"
      :description "File to process"
      :argname "<file>"
@@ -227,33 +227,33 @@ sign:option "-t --type"
      :description "ARC or DKIM signing"
      :argname("<arc|dkim>")
      :convert {
-      ['arc'] = 'arc',
-      ['dkim'] = 'dkim',
-    }
+  ['arc'] = 'arc',
+  ['dkim'] = 'dkim',
+}
      :default 'dkim'
  sign:option "-o --output"
      :description "Output format"
      :argname("<message|signature>")
      :convert {
-      ['message'] = 'message',
-      ['signature'] = 'signature',
-    }
+  ['message'] = 'message',
+  ['signature'] = 'signature',
+}
      :default 'message'
  
  local dump = parser:command "dump"
-    :description "Dumps a raw message in different formats"
+                   :description "Dumps a raw message in different formats"
  dump:argument "file"
      :description "File to process"
      :argname "<file>"
      :args "+"
  -- Duplicate format for convenience
  dump:mutex(
-  parser:flag "-j --json"
-  :description "JSON output",
-  parser:flag "-U --ucl"
-  :description "UCL output",
-  parser:flag "-M --messagepack"
-  :description "MessagePack output"
+    parser:flag "-j --json"
+          :description "JSON output",
+    parser:flag "-U --ucl"
+          :description "UCL output",
+    parser:flag "-M --messagepack"
+          :description "MessagePack output"
  )
  dump:flag "-s --split"
      :description "Split the output file contents such that no content is embedded"
@@ -354,7 +354,7 @@ local function load_task(opts, fname)
  
    if not task:process_message() then
      parser:error(string.format('cannot read message from %s: %s', fname,
-      'failed to parse'))
+        'failed to parse'))
      return nil
    end
  
@@ -431,21 +431,21 @@ local function extract_handler(opts)
  
        if not opts.json and not opts.ucl then
          table.insert(out,
-          rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
-            part:get_mimepart():get_digest():sub(1, 8),
-            t,
-            part:get_language(),
-            part:get_length(), part:get_raw_length(),
-            part:get_words_count()))
+            rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
+                part:get_mimepart():get_digest():sub(1, 8),
+                t,
+                part:get_language(),
+                part:get_length(), part:get_raw_length(),
+                part:get_words_count()))
          table.insert(out,
-          rspamd_logger.slog('Stats: %s',
-            fun.foldl(function(acc, k, v)
-              if acc ~= '' then
-                return string.format('%s, %s:%s', acc, k, v)
-              else
-                return string.format('%s:%s', k, v)
-              end
-            end, '', part:get_stats())))
+            rspamd_logger.slog('Stats: %s',
+                fun.foldl(function(acc, k, v)
+                  if acc ~= '' then
+                    return string.format('%s, %s:%s', acc, k, v)
+                  else
+                    return string.format('%s:%s', k, v)
+                  end
+                end, '', part:get_stats())))
        end
      end
    end
@@ -456,13 +456,13 @@ local function extract_handler(opts)
          local mtype, msubtype = part:get_type()
          local det_mtype, det_msubtype = part:get_detected_type()
          table.insert(out,
-          rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
-            part:get_digest():sub(1, 8),
-            mtype, msubtype,
-            det_mtype, det_msubtype,
-            part:get_filename(),
-            part:get_detected_ext(),
-            part:get_length()))
+            rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
+                part:get_digest():sub(1, 8),
+                mtype, msubtype,
+                det_mtype, det_msubtype,
+                part:get_filename(),
+                part:get_detected_ext(),
+                part:get_length()))
        end
      end
    end
@@ -474,17 +474,17 @@ local function extract_handler(opts)
        return table.concat(words, ' ')
      else
        return table.concat(
-        fun.totable(
-          fun.map(function(w)
-            -- [1] - stemmed word
-            -- [2] - normalised word
-            -- [3] - raw word
-            -- [4] - flags (table of strings)
-            return string.format('%s|%s|%s(%s)',
-              w[3], w[2], w[1], table.concat(w[4], ','))
-          end, words)
-        ),
-        ' '
+          fun.totable(
+              fun.map(function(w)
+                -- [1] - stemmed word
+                -- [2] - normalised word
+                -- [3] - raw word
+                -- [4] - flags (table of strings)
+                return string.format('%s|%s|%s(%s)',
+                    w[3], w[2], w[1], table.concat(w[4], ','))
+              end, words)
+          ),
+          ' '
        )
      end
    end
@@ -501,15 +501,39 @@ local function extract_handler(opts)
      if opts.words then
        local how_words = opts['words_format'] or 'stem'
        table.insert(out_elts[fname], 'meta_words: ' ..
-        print_words(task:get_meta_words(how_words), how_words == 'full'))
+          print_words(task:get_meta_words(how_words), how_words == 'full'))
      end
  
      if opts.text or opts.html then
-      local mp = task:get_parts() or {}
+      local mp_all = task:get_parts(true) or {}
+
+      -- Build map: parent_part -> injected_text_part
+      local injected_map = {}
+      for _, p in ipairs(mp_all) do
+        if p:is_injected() and p:is_text() then
  
-      for _, mime_part in ipairs(mp) do
+          local parent = p:get_parent()
+          if parent then
+            injected_map[parent:get_digest()] = p:get_text()
+          end
+        end
+      end
+
+      -- Build table: {{part, injected_text or nil}, ...}
+      local parts_to_process = {}
+      for _, p in ipairs(mp_all) do
+        if not p:is_injected() then
+          table.insert(parts_to_process, { p, injected_map[p:get_digest()] })
+        end
+      end
+
+      -- Process the parts
+      for _, entry in ipairs(parts_to_process) do
+        local mime_part = entry[1]
+        local injected_part = entry[2]
          local how = opts.output
          local part
+
          if mime_part:is_text() then
            part = mime_part:get_text()
          end
@@ -524,10 +548,26 @@ local function extract_handler(opts)
            if opts.words then
              local how_words = opts['words_format'] or 'stem'
              table.insert(out_elts[fname], print_words(part:get_words(how_words),
-              how_words == 'full'))
+                how_words == 'full'))
            else
              table.insert(out_elts[fname], tostring(part:get_content(how)))
            end
+        elseif injected_part and opts.text and not injected_part:is_html() then
+          -- Show parent part info but content from injected child
+          maybe_print_mime_part_info(mime_part, out_elts[fname])
+          if not opts.json and not opts.ucl then
+            table.insert(out_elts[fname], string.format('[Extracted text from %s]',
+                mime_part:get_filename() or 'attachment'))
+            table.insert(out_elts[fname], '\n')
+          end
+
+          if opts.words then
+            local how_words = opts['words_format'] or 'stem'
+            table.insert(out_elts[fname], print_words(injected_part:get_words(how_words),
+                how_words == 'full'))
+          else
+            table.insert(out_elts[fname], tostring(injected_part:get_content(how)))
+          end
          elseif part and opts.html and part:is_html() then
            maybe_print_text_part_info(part, out_elts[fname])
            maybe_print_mime_part_info(mime_part, out_elts[fname])
@@ -538,7 +578,7 @@ local function extract_handler(opts)
            if opts.words then
              local how_words = opts['words_format'] or 'stem'
              table.insert(out_elts[fname], print_words(part:get_words(how_words),
-              how_words == 'full'))
+                how_words == 'full'))
            else
              if opts.structure then
                local hc = part:get_html()
@@ -547,11 +587,11 @@ local function extract_handler(opts)
                  local fun = require "fun"
                  if type(elt) == 'table' then
                    return table.concat(fun.totable(
-                    fun.map(
-                      function(t)
-                        return rspamd_logger.slog("%s", t)
-                      end,
-                      elt)), '\n')
+                      fun.map(
+                          function(t)
+                            return rspamd_logger.slog("%s", t)
+                          end,
+                          elt)), '\n')
                  else
                    return rspamd_logger.slog("%s", elt)
                  end
@@ -582,29 +622,9 @@ local function extract_handler(opts)
              if opts.invisible then
                local hc = part:get_html()
                table.insert(out_elts[fname], string.format('invisible content: %s',
-                tostring(hc:get_invisible())))
+                  tostring(hc:get_invisible())))
              end
            end
-        else
-          -- Not a text part, check for PDF
-          local _, msubtype = mime_part:get_type()
-          if msubtype == 'pdf' and opts.text then
-             local lua_content_pdf = require "lua_content.pdf"
-             -- Get raw content of the part
-             local content = mime_part:get_content()
-             if content then
-               local res = lua_content_pdf.process(content, mime_part, task)
-               if res and res.extract_text then
-                 local text_data = res.extract_text(res)
-                 if text_data and #text_data > 0 then
-                   maybe_print_mime_part_info(mime_part, out_elts[fname])
-                   for _, txt in ipairs(text_data) do
-                     table.insert(out_elts[fname], tostring(txt))
-                   end
-                 end
-               end
-             end
-          end
          end
  
          if not part then
@@ -654,10 +674,10 @@ local function stat_handler(opts)
        out_elts[fname] = bt
        process_func = function(e)
          return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
-          e.t2 or "", table.concat(fun.totable(
-            fun.map(function(k)
-              return k
-            end, e.flags)), ","))
+            e.t2 or "", table.concat(fun.totable(
+                fun.map(function(k)
+                  return k
+                end, e.flags)), ","))
        end
      elseif opts.fuzzy then
        local parts = task:get_parts() or {}
@@ -684,16 +704,16 @@ local function stat_handler(opts)
                digest = digest,
                shingles = shingles,
                type = string.format('%s/%s',
-                ({ part:get_type() })[1],
-                ({ part:get_type() })[2])
+                  ({ part:get_type() })[1],
+                  ({ part:get_type() })[2])
              })
            else
              table.insert(out_elts[fname], {
                digest = part:get_digest(),
                file = part:get_filename(),
                type = string.format('%s/%s',
-                ({ part:get_type() })[1],
-                ({ part:get_type() })[2])
+                  ({ part:get_type() })[1],
+                  ({ part:get_type() })[2])
              })
            end
          end
@@ -890,10 +910,10 @@ local function modify_handler(opts)
          if hname == name then
            local new_value = string.format(hpattern, hdr.decoded)
            new_value = string.format('%s:%s%s',
-            name, hdr.separator,
-            rspamd_util.fold_header(name,
-              rspamd_util.mime_header_encode(new_value),
-              task:get_newlines_type()))
+              name, hdr.separator,
+              rspamd_util.fold_header(name,
+                  rspamd_util.mime_header_encode(new_value),
+                  task:get_newlines_type()))
            out[#out + 1] = new_value
            return
          end
@@ -902,12 +922,12 @@ local function modify_handler(opts)
        if rewrite.need_rewrite_ct then
          if name:lower() == 'content-type' then
            local nct = string.format('%s: %s/%s; charset=utf-8',
-            'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
+              'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
            out[#out + 1] = nct
            return
          elseif name:lower() == 'content-transfer-encoding' then
            out[#out + 1] = string.format('%s: %s',
-            'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+              'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
            seen_cte = true
            return
          end
@@ -923,13 +943,13 @@ local function modify_handler(opts)
  
        if hname and hvalue then
          out[#out + 1] = string.format('%s: %s', hname,
-          rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
+            rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
        end
      end
  
      if not seen_cte and rewrite.need_rewrite_ct then
        out[#out + 1] = string.format('%s: %s',
-        'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+          'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
      end
  
      -- End of headers
@@ -1013,11 +1033,11 @@ local function sign_handler(opts)
        io.flush()
      else
        local dkim_hdr = string.format('%s: %s%s',
-        'DKIM-Signature',
-        rspamd_util.fold_header('DKIM-Signature',
-          rspamd_util.mime_header_encode(sig),
-          task:get_newlines_type()),
-        newline(task))
+          'DKIM-Signature',
+          rspamd_util.fold_header('DKIM-Signature',
+              rspamd_util.mime_header_encode(sig),
+              task:get_newlines_type()),
+          newline(task))
        io.write(dkim_hdr)
        io.flush()
        task:get_content():save_in_file(1)
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c

index d003cffb97d59e31210820a69aee8b6c08c023cb..01fa87724c5c348033ad7d74e4e61ff444c9e41f 100644 (file)
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -582,6 +582,13 @@ LUA_FUNCTION_DEF(mimepart, is_specific);
   */
  LUA_FUNCTION_DEF(mimepart, get_urls);
  
+/***
+ * @method mime_part:is_injected()
+ * Returns true if part was injected (computed/virtual) rather than being part of the original message
+ * @return {boolean} true if part is injected
+ */
+LUA_FUNCTION_DEF(mimepart, is_injected);
+
  static const struct luaL_reg mimepartlib_m[] = {
         LUA_INTERFACE_DEF(mimepart, get_content),
         LUA_INTERFACE_DEF(mimepart, get_raw_content),
@@ -620,6 +627,7 @@ static const struct luaL_reg mimepartlib_m[] = {
         LUA_INTERFACE_DEF(mimepart, get_specific),
         LUA_INTERFACE_DEF(mimepart, set_specific),
         LUA_INTERFACE_DEF(mimepart, is_specific),
+       LUA_INTERFACE_DEF(mimepart, is_injected),
         {"__tostring", rspamd_lua_class_tostring},
         {NULL, NULL}};
  
@@ -2463,6 +2471,21 @@ lua_mimepart_is_specific(lua_State *L)
         return 1;
  }
  
+static int
+lua_mimepart_is_injected(lua_State *L)
+{
+       LUA_TRACE_POINT;
+       struct rspamd_mime_part *part = lua_check_mimepart(L);
+
+       if (part == NULL) {
+               return luaL_error(L, "invalid arguments");
+       }
+
+       lua_pushboolean(L, part->flags & RSPAMD_MIME_PART_COMPUTED);
+
+       return 1;
+}
+
  static int
  lua_mimepart_set_specific(lua_State *L)
  {
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c

index a695177e8c3c01c1047e5e26467222edb120965d..ffd3b9bb2f12784a12be482320ad4e2d5a93f241 100644 (file)
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -319,22 +319,25 @@ LUA_FUNCTION_DEF(task, get_rawbody);
   */
  LUA_FUNCTION_DEF(task, get_emails);
  /***
- * @method task:inject_part(type, content)
+ * @method task:inject_part(type, content[, original_part])
   * Injects a virtual mime part into the task structure
   * @param {string} type part type (currently only "text" is supported)
- * @param {string} content part content
+ * @param {string/text/table} content part content (accepts string, rspamd_text, or table of rspamd_text chunks - will be efficiently concatenated in C)
+ * @param {rspamd_mimepart} original_part optional original mime part that this injected part is derived from (sets parent relationship)
   * @return {boolean} true if part was injected
   */
  LUA_FUNCTION_DEF(task, inject_part);
  /***
- * @method task:get_text_parts()
- * Get all text (and HTML) parts found in a message
+ * @method task:get_text_parts([include_virtual])
+ * Get all text (and HTML) parts found in a message. By default, injected/virtual parts are excluded.
+ * @param {boolean} include_virtual if true, include injected/virtual parts (default: false)
   * @return {table rspamd_text_part} list of text parts
   */
  LUA_FUNCTION_DEF(task, get_text_parts);
  /***
- * @method task:get_parts()
- * Get all mime parts found in a message
+ * @method task:get_parts([include_virtual])
+ * Get all mime parts found in a message. By default, injected/virtual parts are excluded.
+ * @param {boolean} include_virtual if true, include injected/virtual parts (default: false)
   * @return {table rspamd_mime_part} list of mime parts
   */
  LUA_FUNCTION_DEF(task, get_parts);
@@ -2783,10 +2786,40 @@ lua_task_inject_part(lua_State *L)
         LUA_TRACE_POINT;
         struct rspamd_task *task = lua_check_task(L, 1);
         const char *type = luaL_checkstring(L, 2);
-       gsize content_len;
-       const char *content = luaL_checklstring(L, 3, &content_len);
-       struct rspamd_mime_part *part;
+       struct rspamd_lua_text *content_text;
+       const char *content = NULL;
+       gsize content_len = 0;
+       struct rspamd_mime_part *part, *original_part = NULL;
         struct rspamd_mime_text_part *txt_part;
+       gboolean is_table = FALSE;
+
+       /* Accept string, rspamd_text, or table of texts */
+       if (lua_type(L, 3) == LUA_TTABLE) {
+               is_table = TRUE;
+               /* Calculate total length first */
+               lua_pushnil(L);
+               while (lua_next(L, 3) != 0) {
+                       struct rspamd_lua_text *t = lua_check_text_or_string(L, -1);
+                       if (t) {
+                               content_len += t->len;
+                       }
+                       lua_pop(L, 1);
+               }
+       }
+       else {
+               content_text = lua_check_text_or_string(L, 3);
+               if (!content_text) {
+                       return luaL_error(L, "invalid content argument (expected string, text, or table)");
+               }
+               content = content_text->start;
+               content_len = content_text->len;
+       }
+
+       /* Check for optional original_part parameter */
+       if (lua_gettop(L) >= 4 && lua_isuserdata(L, 4)) {
+               original_part = *((struct rspamd_mime_part **)
+                                                         rspamd_lua_check_udata_maybe(L, 4, rspamd_mimepart_classname));
+       }
  
         if (task && task->message) {
                 if (g_ascii_strcasecmp(type, "text") == 0) {
@@ -2794,6 +2827,11 @@ lua_task_inject_part(lua_State *L)
                         part->part_type = RSPAMD_MIME_PART_TEXT;
                         part->flags |= RSPAMD_MIME_PART_COMPUTED;
  
+                       /* Set parent part if provided */
+                       if (original_part) {
+                               part->parent_part = original_part;
+                       }
+
                         /* Basic headers setup */
                         part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct));
  
@@ -2805,9 +2843,30 @@ lua_task_inject_part(lua_State *L)
                         part->ct->charset.begin = "utf-8";
                         part->ct->charset.len = 5;
  
-                       /* Content setup */
-                       part->parsed_data.begin = rspamd_mempool_strdup(task->task_pool, content);
+                       /* Content setup - merge table or copy single content */
+                       part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1);
                         part->parsed_data.len = content_len;
+
+                       if (is_table) {
+                               /* Efficiently merge all text chunks */
+                               char *dst = (char *) part->parsed_data.begin;
+                               lua_pushnil(L);
+                               while (lua_next(L, 3) != 0) {
+                                       struct rspamd_lua_text *t = lua_check_text_or_string(L, -1);
+                                       if (t && t->len > 0) {
+                                               memcpy(dst, t->start, t->len);
+                                               dst += t->len;
+                                       }
+                                       lua_pop(L, 1);
+                               }
+                               *dst = '\0';
+                       }
+                       else {
+                               /* Single content */
+                               memcpy((char *) part->parsed_data.begin, content, content_len);
+                               ((char *) part->parsed_data.begin)[content_len] = '\0';
+                       }
+
                         part->raw_data = part->parsed_data;
  
                         /* Text part specific setup */
@@ -3100,18 +3159,28 @@ lua_task_get_parts(lua_State *L)
         unsigned int i;
         struct rspamd_task *task = lua_check_task(L, 1);
         struct rspamd_mime_part *part, **ppart;
+       gboolean include_virtual = FALSE;
+
+       if (lua_gettop(L) >= 2) {
+               include_virtual = lua_toboolean(L, 2);
+       }
  
         if (task != NULL) {
                 if (task->message) {
                         lua_createtable(L, MESSAGE_FIELD(task, parts)->len, 0);
+                       int idx = 1;
  
                         PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
                         {
+                               if (!include_virtual && (part->flags & RSPAMD_MIME_PART_COMPUTED)) {
+                                       continue;
+                               }
+
                                 ppart = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
                                 *ppart = part;
                                 rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
                                 /* Make it array */
-                               lua_rawseti(L, -2, i + 1);
+                               lua_rawseti(L, -2, idx++);
                         }
                 }
                 else {
author	Vsevolod Stakhov <vsevolod@rspamd.com>
	Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)
committer	Vsevolod Stakhov <vsevolod@rspamd.com>
	Mon, 24 Nov 2025 11:39:44 +0000 (11:39 +0000)
lualib/lua_content/pdf.lua		patch \| blob \| blame \| history
lualib/rspamadm/mime.lua		patch \| blob \| blame \| history
src/lua/lua_mimepart.c		patch \| blob \| blame \| history
src/lua/lua_task.c		patch \| blob \| blame \| history