From: Vsevolod Stakhov Date: Mon, 24 Nov 2025 11:39:44 +0000 (+0000) Subject: [Project] Various fixes X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e8945304821de288dec212c77fa88d43988e0cce;p=thirdparty%2Frspamd.git [Project] Various fixes --- diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index b356407ec4..d49b2e699f 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -335,7 +335,7 @@ local function gen_text_grammar() local gen = generic_grammar_elts() local function text_op_handler(...) - local args = {...} + local args = { ... } local op = args[#args] local t = args[#args - 1] @@ -363,7 +363,7 @@ local function gen_text_grammar() end local function nary_op_handler(...) - local args = {...} + local args = { ... } local op = args[#args] -- local t = args[#args - 1] -- The table of numbers @@ -375,7 +375,7 @@ local function gen_text_grammar() end local function ternary_op_handler(...) - local args = {...} + local args = { ... } local op = args[#args] local a2 = args[#args - 2] -- Second to last argument (ty) @@ -510,9 +510,9 @@ local function maybe_apply_filter(dict, data, pdf, task) -- We can handle Predictor 1 (No prediction) or maybe others in future local predictor = tonumber(decode_params.Predictor) or 1 if predictor > 1 then - -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction - -- But without implementation, better to return nil to avoid garbage - return nil, 'predictor exists: ' .. tostring(predictor) + -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction + -- But without implementation, better to return nil to avoid garbage + return nil, 'predictor exists: ' .. tostring(predictor) end end end @@ -1139,9 +1139,6 @@ local function postprocess_pdf_objects(task, input, pdf) if now >= pdf.end_timestamp then pdf.timeout_processing = now - pdf.start_timestamp - - io.stderr:write(string.format("DEBUG: Timeout! Start: %f, End: %f, Now: %f\n", pdf.start_timestamp, pdf.end_timestamp, now)) - lua_util.debugm(N, task, 'pdf: timeout processing grammars after spending %s seconds, ' .. '%s elements processed', pdf.timeout_processing, i) @@ -1210,7 +1207,7 @@ local function offsets_to_blocks(starts, ends, out) end end -local function search_text(task, pdf) +local function search_text(task, pdf, mpart) for _, obj in ipairs(pdf.objects) do if obj.type == 'Page' and obj.contents then local text = {} @@ -1254,7 +1251,7 @@ local function search_text(task, pdf) if ret then if #obj_or_err == 0 then - lua_util.debugm(N, task, 'empty text match from block: %s', bl.data) + lua_util.debugm(N, task, 'empty text match from block: %s', bl.data) end for _, chunk in ipairs(obj_or_err) do text[#text + 1] = chunk @@ -1308,20 +1305,17 @@ local function search_text(task, pdf) -- Aggregate and inject once if task.inject_part then local all_text = {} + for _, obj in ipairs(pdf.objects) do - if obj.text then - table.insert(all_text, tostring(obj.text)) + if obj.text and obj.text:len() > 0 then + -- Keep as rspamd_text, don't convert to string + table.insert(all_text, obj.text) end end if #all_text > 0 then - local final_text = table.concat(all_text, "\n") - -- Only inject if it contains non-whitespace characters - if final_text:match("%S") then - task:inject_part('text', final_text) - else - lua_util.debugm(N, task, 'skipping injection of empty/whitespace-only text') - end + -- Pass table of rspamd_text directly - will be efficiently merged in C + task:inject_part('text', all_text, mpart) end end end @@ -1361,8 +1355,6 @@ local function search_urls(task, pdf, mpart) end local function process_pdf(input, mpart, task) - -- io.stderr:write("DEBUG: process_pdf called, input len: " .. tostring(#input) .. "\n") - if not config.enabled then -- Skip processing return {} @@ -1371,7 +1363,6 @@ local function process_pdf(input, mpart, task) local matches = pdf_trie:match(input) if matches then - -- io.stderr:write("DEBUG: PDF matches found\n") local start_ts = rspamd_util.get_ticks() -- Temp object used to share data between pdf extraction methods local pdf_object = { @@ -1423,7 +1414,7 @@ local function process_pdf(input, mpart, task) postprocess_pdf_objects(task, input, pdf_object) pdf_output.objects = pdf_object.objects if config.text_extraction then - search_text(task, pdf_object, pdf_output) + search_text(task, pdf_object, mpart) end if config.url_extraction then search_urls(task, pdf_object, mpart, pdf_output) diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 9ad7238dd3..b55e4ae036 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -35,96 +35,96 @@ local parser = argparse() :require_command(true) parser:option "-c --config" - :description "Path to config file" - :argname("") - :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") + :description "Path to config file" + :argname("") + :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") parser:mutex( - parser:flag "-j --json" - :description "JSON output", - parser:flag "-U --ucl" - :description "UCL output", - parser:flag "-M --messagepack" - :description "MessagePack output" + parser:flag "-j --json" + :description "JSON output", + parser:flag "-U --ucl" + :description "UCL output", + parser:flag "-M --messagepack" + :description "MessagePack output" ) parser:flag "-C --compact" - :description "Use compact format" + :description "Use compact format" parser:flag "--no-file" - :description "Do not print filename" + :description "Do not print filename" -- Extract subcommand local extract = parser:command "extract ex e" - :description "Extracts data from MIME messages" + :description "Extracts data from MIME messages" extract:argument "file" - :description "File to process" - :argname "" - :args "+" + :description "File to process" + :argname "" + :args "+" extract:flag "-t --text" - :description "Extracts plain text data from a message" + :description "Extracts plain text data from a message" extract:flag "-r --raw" - :description "Load as raw file" + :description "Load as raw file" extract:flag "-H --html" - :description "Extracts htm data from a message" + :description "Extracts htm data from a message" extract:option "-o --output" - :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')" - :argname("") - :convert { - raw = "raw", - content = "content", - oneline = "content_oneline", - decoded = "raw_parsed", - decoded_utf = "raw_utf" - } - :default "content" + :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')" + :argname("") + :convert { + raw = "raw", + content = "content", + oneline = "content_oneline", + decoded = "raw_parsed", + decoded_utf = "raw_utf" +} + :default "content" extract:flag "-w --words" - :description "Extracts words" + :description "Extracts words" extract:flag "-p --part" - :description "Show part info" + :description "Show part info" extract:flag "-s --structure" - :description "Show structure info (e.g. HTML tags)" + :description "Show structure info (e.g. HTML tags)" extract:flag "-i --invisible" - :description "Show invisible content for HTML parts" + :description "Show invisible content for HTML parts" extract:option "-F --words-format" - :description "Words format ('stem', 'norm', 'raw', 'full')" - :argname("") - :convert { - stem = "stem", - norm = "norm", - raw = "raw", - full = "full", - } - :default "stem" + :description "Words format ('stem', 'norm', 'raw', 'full')" + :argname("") + :convert { + stem = "stem", + norm = "norm", + raw = "raw", + full = "full", +} + :default "stem" local stat = parser:command "stat st s" - :description "Extracts statistical data from MIME messages" + :description "Extracts statistical data from MIME messages" stat:argument "file" :description "File to process" :argname "" :args "+" stat:mutex( - stat:flag "-m --meta" - :description "Lua metatokens", - stat:flag "-b --bayes" - :description "Bayes tokens", - stat:flag "-F --fuzzy" - :description "Fuzzy hashes" + stat:flag "-m --meta" + :description "Lua metatokens", + stat:flag "-b --bayes" + :description "Bayes tokens", + stat:flag "-F --fuzzy" + :description "Fuzzy hashes" ) stat:flag "-s --shingles" :description "Show shingles for fuzzy hashes" local urls = parser:command "urls url u" - :description "Extracts URLs from MIME messages" + :description "Extracts URLs from MIME messages" urls:argument "file" :description "File to process" :argname "" :args "+" urls:mutex( - urls:flag "-t --tld" - :description "Get TLDs only", - urls:flag "-H --host" - :description "Get hosts only", - urls:flag "-f --full" - :description "Show piecewise urls as processed by Rspamd" + urls:flag "-t --tld" + :description "Get TLDs only", + urls:flag "-H --host" + :description "Get hosts only", + urls:flag "-f --full" + :description "Show piecewise urls as processed by Rspamd" ) urls:flag "-u --unique" @@ -137,75 +137,75 @@ urls:flag "-r --reverse" :description "Reverse sort order" local modify = parser:command "modify mod m" - :description "Modifies MIME message" + :description "Modifies MIME message" modify:argument "file" - :description "File to process" - :argname "" - :args "+" + :description "File to process" + :argname "" + :args "+" modify:option "-a --add-header" - :description "Adds specific header" - :argname "" - :count "*" + :description "Adds specific header" + :argname "" + :count "*" modify:option "-r --remove-header" - :description "Removes specific header (all occurrences)" - :argname "
" - :count "*" + :description "Removes specific header (all occurrences)" + :argname "
" + :count "*" modify:option "-R --rewrite-header" - :description "Rewrites specific header, uses Lua string.format pattern" - :argname "" - :count "*" + :description "Rewrites specific header, uses Lua string.format pattern" + :argname "" + :count "*" modify:option "-t --text-footer" - :description "Adds footer to text/plain parts from a specific file" - :argname "" + :description "Adds footer to text/plain parts from a specific file" + :argname "" modify:option "-H --html-footer" - :description "Adds footer to text/html parts from a specific file" - :argname "" + :description "Adds footer to text/html parts from a specific file" + :argname "" local strip = parser:command "strip" - :description "Strip attachments from a message" + :description "Strip attachments from a message" strip:argument "file" - :description "File to process" - :argname "" - :args "+" + :description "File to process" + :argname "" + :args "+" strip:flag "-i --keep-images" - :description "Keep images" + :description "Keep images" strip:option "--min-text-size" - :description "Minimal text size to keep" - :argname "" - :convert(tonumber) - :default(0) + :description "Minimal text size to keep" + :argname "" + :convert(tonumber) + :default(0) strip:option "--max-text-size" - :description "Max text size to keep" - :argname "" - :convert(tonumber) - :default(math.huge) + :description "Max text size to keep" + :argname "" + :convert(tonumber) + :default(math.huge) local anonymize = parser:command "anonymize" - :description "Try to remove sensitive information from a message" + :description "Try to remove sensitive information from a message" anonymize:argument "file" - :description "File to process" - :argname "" - :args "+" + :description "File to process" + :argname "" + :args "+" anonymize:option "--exclude-header -X" - :description "Exclude specific headers from anonymization" - :argname "
" - :count "*" + :description "Exclude specific headers from anonymization" + :argname "
" + :count "*" anonymize:option "--include-header -I" - :description "Include specific headers from anonymization" - :argname "
" - :count "*" + :description "Include specific headers from anonymization" + :argname "
" + :count "*" anonymize:flag "--gpt" - :description "Use LLM model for anonymization (requires GPT plugin to be configured)" + :description "Use LLM model for anonymization (requires GPT plugin to be configured)" anonymize:option "--model" - :description "Model to use for anonymization" - :argname "" + :description "Model to use for anonymization" + :argname "" anonymize:option "--prompt" - :description "Prompt to use for anonymization" - :argname "" + :description "Prompt to use for anonymization" + :argname "" local sign = parser:command "sign" - :description "Performs DKIM signing" + :description "Performs DKIM signing" sign:argument "file" :description "File to process" :argname "" @@ -227,33 +227,33 @@ sign:option "-t --type" :description "ARC or DKIM signing" :argname("") :convert { - ['arc'] = 'arc', - ['dkim'] = 'dkim', - } + ['arc'] = 'arc', + ['dkim'] = 'dkim', +} :default 'dkim' sign:option "-o --output" :description "Output format" :argname("") :convert { - ['message'] = 'message', - ['signature'] = 'signature', - } + ['message'] = 'message', + ['signature'] = 'signature', +} :default 'message' local dump = parser:command "dump" - :description "Dumps a raw message in different formats" + :description "Dumps a raw message in different formats" dump:argument "file" :description "File to process" :argname "" :args "+" -- Duplicate format for convenience dump:mutex( - parser:flag "-j --json" - :description "JSON output", - parser:flag "-U --ucl" - :description "UCL output", - parser:flag "-M --messagepack" - :description "MessagePack output" + parser:flag "-j --json" + :description "JSON output", + parser:flag "-U --ucl" + :description "UCL output", + parser:flag "-M --messagepack" + :description "MessagePack output" ) dump:flag "-s --split" :description "Split the output file contents such that no content is embedded" @@ -354,7 +354,7 @@ local function load_task(opts, fname) if not task:process_message() then parser:error(string.format('cannot read message from %s: %s', fname, - 'failed to parse')) + 'failed to parse')) return nil end @@ -431,21 +431,21 @@ local function extract_handler(opts) if not opts.json and not opts.ucl then table.insert(out, - rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s', - part:get_mimepart():get_digest():sub(1, 8), - t, - part:get_language(), - part:get_length(), part:get_raw_length(), - part:get_words_count())) + rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s', + part:get_mimepart():get_digest():sub(1, 8), + t, + part:get_language(), + part:get_length(), part:get_raw_length(), + part:get_words_count())) table.insert(out, - rspamd_logger.slog('Stats: %s', - fun.foldl(function(acc, k, v) - if acc ~= '' then - return string.format('%s, %s:%s', acc, k, v) - else - return string.format('%s:%s', k, v) - end - end, '', part:get_stats()))) + rspamd_logger.slog('Stats: %s', + fun.foldl(function(acc, k, v) + if acc ~= '' then + return string.format('%s, %s:%s', acc, k, v) + else + return string.format('%s:%s', k, v) + end + end, '', part:get_stats()))) end end end @@ -456,13 +456,13 @@ local function extract_handler(opts) local mtype, msubtype = part:get_type() local det_mtype, det_msubtype = part:get_detected_type() table.insert(out, - rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s', - part:get_digest():sub(1, 8), - mtype, msubtype, - det_mtype, det_msubtype, - part:get_filename(), - part:get_detected_ext(), - part:get_length())) + rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s', + part:get_digest():sub(1, 8), + mtype, msubtype, + det_mtype, det_msubtype, + part:get_filename(), + part:get_detected_ext(), + part:get_length())) end end end @@ -474,17 +474,17 @@ local function extract_handler(opts) return table.concat(words, ' ') else return table.concat( - fun.totable( - fun.map(function(w) - -- [1] - stemmed word - -- [2] - normalised word - -- [3] - raw word - -- [4] - flags (table of strings) - return string.format('%s|%s|%s(%s)', - w[3], w[2], w[1], table.concat(w[4], ',')) - end, words) - ), - ' ' + fun.totable( + fun.map(function(w) + -- [1] - stemmed word + -- [2] - normalised word + -- [3] - raw word + -- [4] - flags (table of strings) + return string.format('%s|%s|%s(%s)', + w[3], w[2], w[1], table.concat(w[4], ',')) + end, words) + ), + ' ' ) end end @@ -501,15 +501,39 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], 'meta_words: ' .. - print_words(task:get_meta_words(how_words), how_words == 'full')) + print_words(task:get_meta_words(how_words), how_words == 'full')) end if opts.text or opts.html then - local mp = task:get_parts() or {} + local mp_all = task:get_parts(true) or {} + + -- Build map: parent_part -> injected_text_part + local injected_map = {} + for _, p in ipairs(mp_all) do + if p:is_injected() and p:is_text() then - for _, mime_part in ipairs(mp) do + local parent = p:get_parent() + if parent then + injected_map[parent:get_digest()] = p:get_text() + end + end + end + + -- Build table: {{part, injected_text or nil}, ...} + local parts_to_process = {} + for _, p in ipairs(mp_all) do + if not p:is_injected() then + table.insert(parts_to_process, { p, injected_map[p:get_digest()] }) + end + end + + -- Process the parts + for _, entry in ipairs(parts_to_process) do + local mime_part = entry[1] + local injected_part = entry[2] local how = opts.output local part + if mime_part:is_text() then part = mime_part:get_text() end @@ -524,10 +548,26 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], print_words(part:get_words(how_words), - how_words == 'full')) + how_words == 'full')) else table.insert(out_elts[fname], tostring(part:get_content(how))) end + elseif injected_part and opts.text and not injected_part:is_html() then + -- Show parent part info but content from injected child + maybe_print_mime_part_info(mime_part, out_elts[fname]) + if not opts.json and not opts.ucl then + table.insert(out_elts[fname], string.format('[Extracted text from %s]', + mime_part:get_filename() or 'attachment')) + table.insert(out_elts[fname], '\n') + end + + if opts.words then + local how_words = opts['words_format'] or 'stem' + table.insert(out_elts[fname], print_words(injected_part:get_words(how_words), + how_words == 'full')) + else + table.insert(out_elts[fname], tostring(injected_part:get_content(how))) + end elseif part and opts.html and part:is_html() then maybe_print_text_part_info(part, out_elts[fname]) maybe_print_mime_part_info(mime_part, out_elts[fname]) @@ -538,7 +578,7 @@ local function extract_handler(opts) if opts.words then local how_words = opts['words_format'] or 'stem' table.insert(out_elts[fname], print_words(part:get_words(how_words), - how_words == 'full')) + how_words == 'full')) else if opts.structure then local hc = part:get_html() @@ -547,11 +587,11 @@ local function extract_handler(opts) local fun = require "fun" if type(elt) == 'table' then return table.concat(fun.totable( - fun.map( - function(t) - return rspamd_logger.slog("%s", t) - end, - elt)), '\n') + fun.map( + function(t) + return rspamd_logger.slog("%s", t) + end, + elt)), '\n') else return rspamd_logger.slog("%s", elt) end @@ -582,29 +622,9 @@ local function extract_handler(opts) if opts.invisible then local hc = part:get_html() table.insert(out_elts[fname], string.format('invisible content: %s', - tostring(hc:get_invisible()))) + tostring(hc:get_invisible()))) end end - else - -- Not a text part, check for PDF - local _, msubtype = mime_part:get_type() - if msubtype == 'pdf' and opts.text then - local lua_content_pdf = require "lua_content.pdf" - -- Get raw content of the part - local content = mime_part:get_content() - if content then - local res = lua_content_pdf.process(content, mime_part, task) - if res and res.extract_text then - local text_data = res.extract_text(res) - if text_data and #text_data > 0 then - maybe_print_mime_part_info(mime_part, out_elts[fname]) - for _, txt in ipairs(text_data) do - table.insert(out_elts[fname], tostring(txt)) - end - end - end - end - end end if not part then @@ -654,10 +674,10 @@ local function stat_handler(opts) out_elts[fname] = bt process_func = function(e) return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "", - e.t2 or "", table.concat(fun.totable( - fun.map(function(k) - return k - end, e.flags)), ",")) + e.t2 or "", table.concat(fun.totable( + fun.map(function(k) + return k + end, e.flags)), ",")) end elseif opts.fuzzy then local parts = task:get_parts() or {} @@ -684,16 +704,16 @@ local function stat_handler(opts) digest = digest, shingles = shingles, type = string.format('%s/%s', - ({ part:get_type() })[1], - ({ part:get_type() })[2]) + ({ part:get_type() })[1], + ({ part:get_type() })[2]) }) else table.insert(out_elts[fname], { digest = part:get_digest(), file = part:get_filename(), type = string.format('%s/%s', - ({ part:get_type() })[1], - ({ part:get_type() })[2]) + ({ part:get_type() })[1], + ({ part:get_type() })[2]) }) end end @@ -890,10 +910,10 @@ local function modify_handler(opts) if hname == name then local new_value = string.format(hpattern, hdr.decoded) new_value = string.format('%s:%s%s', - name, hdr.separator, - rspamd_util.fold_header(name, - rspamd_util.mime_header_encode(new_value), - task:get_newlines_type())) + name, hdr.separator, + rspamd_util.fold_header(name, + rspamd_util.mime_header_encode(new_value), + task:get_newlines_type())) out[#out + 1] = new_value return end @@ -902,12 +922,12 @@ local function modify_handler(opts) if rewrite.need_rewrite_ct then if name:lower() == 'content-type' then local nct = string.format('%s: %s/%s; charset=utf-8', - 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype) + 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype) out[#out + 1] = nct return elseif name:lower() == 'content-transfer-encoding' then out[#out + 1] = string.format('%s: %s', - 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') + 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') seen_cte = true return end @@ -923,13 +943,13 @@ local function modify_handler(opts) if hname and hvalue then out[#out + 1] = string.format('%s: %s', hname, - rspamd_util.fold_header(hname, hvalue, task:get_newlines_type())) + rspamd_util.fold_header(hname, hvalue, task:get_newlines_type())) end end if not seen_cte and rewrite.need_rewrite_ct then out[#out + 1] = string.format('%s: %s', - 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') + 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable') end -- End of headers @@ -1013,11 +1033,11 @@ local function sign_handler(opts) io.flush() else local dkim_hdr = string.format('%s: %s%s', - 'DKIM-Signature', - rspamd_util.fold_header('DKIM-Signature', - rspamd_util.mime_header_encode(sig), - task:get_newlines_type()), - newline(task)) + 'DKIM-Signature', + rspamd_util.fold_header('DKIM-Signature', + rspamd_util.mime_header_encode(sig), + task:get_newlines_type()), + newline(task)) io.write(dkim_hdr) io.flush() task:get_content():save_in_file(1) diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c index d003cffb97..01fa87724c 100644 --- a/src/lua/lua_mimepart.c +++ b/src/lua/lua_mimepart.c @@ -582,6 +582,13 @@ LUA_FUNCTION_DEF(mimepart, is_specific); */ LUA_FUNCTION_DEF(mimepart, get_urls); +/*** + * @method mime_part:is_injected() + * Returns true if part was injected (computed/virtual) rather than being part of the original message + * @return {boolean} true if part is injected + */ +LUA_FUNCTION_DEF(mimepart, is_injected); + static const struct luaL_reg mimepartlib_m[] = { LUA_INTERFACE_DEF(mimepart, get_content), LUA_INTERFACE_DEF(mimepart, get_raw_content), @@ -620,6 +627,7 @@ static const struct luaL_reg mimepartlib_m[] = { LUA_INTERFACE_DEF(mimepart, get_specific), LUA_INTERFACE_DEF(mimepart, set_specific), LUA_INTERFACE_DEF(mimepart, is_specific), + LUA_INTERFACE_DEF(mimepart, is_injected), {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; @@ -2463,6 +2471,21 @@ lua_mimepart_is_specific(lua_State *L) return 1; } +static int +lua_mimepart_is_injected(lua_State *L) +{ + LUA_TRACE_POINT; + struct rspamd_mime_part *part = lua_check_mimepart(L); + + if (part == NULL) { + return luaL_error(L, "invalid arguments"); + } + + lua_pushboolean(L, part->flags & RSPAMD_MIME_PART_COMPUTED); + + return 1; +} + static int lua_mimepart_set_specific(lua_State *L) { diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index a695177e8c..ffd3b9bb2f 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -319,22 +319,25 @@ LUA_FUNCTION_DEF(task, get_rawbody); */ LUA_FUNCTION_DEF(task, get_emails); /*** - * @method task:inject_part(type, content) + * @method task:inject_part(type, content[, original_part]) * Injects a virtual mime part into the task structure * @param {string} type part type (currently only "text" is supported) - * @param {string} content part content + * @param {string/text/table} content part content (accepts string, rspamd_text, or table of rspamd_text chunks - will be efficiently concatenated in C) + * @param {rspamd_mimepart} original_part optional original mime part that this injected part is derived from (sets parent relationship) * @return {boolean} true if part was injected */ LUA_FUNCTION_DEF(task, inject_part); /*** - * @method task:get_text_parts() - * Get all text (and HTML) parts found in a message + * @method task:get_text_parts([include_virtual]) + * Get all text (and HTML) parts found in a message. By default, injected/virtual parts are excluded. + * @param {boolean} include_virtual if true, include injected/virtual parts (default: false) * @return {table rspamd_text_part} list of text parts */ LUA_FUNCTION_DEF(task, get_text_parts); /*** - * @method task:get_parts() - * Get all mime parts found in a message + * @method task:get_parts([include_virtual]) + * Get all mime parts found in a message. By default, injected/virtual parts are excluded. + * @param {boolean} include_virtual if true, include injected/virtual parts (default: false) * @return {table rspamd_mime_part} list of mime parts */ LUA_FUNCTION_DEF(task, get_parts); @@ -2783,10 +2786,40 @@ lua_task_inject_part(lua_State *L) LUA_TRACE_POINT; struct rspamd_task *task = lua_check_task(L, 1); const char *type = luaL_checkstring(L, 2); - gsize content_len; - const char *content = luaL_checklstring(L, 3, &content_len); - struct rspamd_mime_part *part; + struct rspamd_lua_text *content_text; + const char *content = NULL; + gsize content_len = 0; + struct rspamd_mime_part *part, *original_part = NULL; struct rspamd_mime_text_part *txt_part; + gboolean is_table = FALSE; + + /* Accept string, rspamd_text, or table of texts */ + if (lua_type(L, 3) == LUA_TTABLE) { + is_table = TRUE; + /* Calculate total length first */ + lua_pushnil(L); + while (lua_next(L, 3) != 0) { + struct rspamd_lua_text *t = lua_check_text_or_string(L, -1); + if (t) { + content_len += t->len; + } + lua_pop(L, 1); + } + } + else { + content_text = lua_check_text_or_string(L, 3); + if (!content_text) { + return luaL_error(L, "invalid content argument (expected string, text, or table)"); + } + content = content_text->start; + content_len = content_text->len; + } + + /* Check for optional original_part parameter */ + if (lua_gettop(L) >= 4 && lua_isuserdata(L, 4)) { + original_part = *((struct rspamd_mime_part **) + rspamd_lua_check_udata_maybe(L, 4, rspamd_mimepart_classname)); + } if (task && task->message) { if (g_ascii_strcasecmp(type, "text") == 0) { @@ -2794,6 +2827,11 @@ lua_task_inject_part(lua_State *L) part->part_type = RSPAMD_MIME_PART_TEXT; part->flags |= RSPAMD_MIME_PART_COMPUTED; + /* Set parent part if provided */ + if (original_part) { + part->parent_part = original_part; + } + /* Basic headers setup */ part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct)); @@ -2805,9 +2843,30 @@ lua_task_inject_part(lua_State *L) part->ct->charset.begin = "utf-8"; part->ct->charset.len = 5; - /* Content setup */ - part->parsed_data.begin = rspamd_mempool_strdup(task->task_pool, content); + /* Content setup - merge table or copy single content */ + part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1); part->parsed_data.len = content_len; + + if (is_table) { + /* Efficiently merge all text chunks */ + char *dst = (char *) part->parsed_data.begin; + lua_pushnil(L); + while (lua_next(L, 3) != 0) { + struct rspamd_lua_text *t = lua_check_text_or_string(L, -1); + if (t && t->len > 0) { + memcpy(dst, t->start, t->len); + dst += t->len; + } + lua_pop(L, 1); + } + *dst = '\0'; + } + else { + /* Single content */ + memcpy((char *) part->parsed_data.begin, content, content_len); + ((char *) part->parsed_data.begin)[content_len] = '\0'; + } + part->raw_data = part->parsed_data; /* Text part specific setup */ @@ -3100,18 +3159,28 @@ lua_task_get_parts(lua_State *L) unsigned int i; struct rspamd_task *task = lua_check_task(L, 1); struct rspamd_mime_part *part, **ppart; + gboolean include_virtual = FALSE; + + if (lua_gettop(L) >= 2) { + include_virtual = lua_toboolean(L, 2); + } if (task != NULL) { if (task->message) { lua_createtable(L, MESSAGE_FIELD(task, parts)->len, 0); + int idx = 1; PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part) { + if (!include_virtual && (part->flags & RSPAMD_MIME_PART_COMPUTED)) { + continue; + } + ppart = lua_newuserdata(L, sizeof(struct rspamd_mime_part *)); *ppart = part; rspamd_lua_setclass(L, rspamd_mimepart_classname, -1); /* Make it array */ - lua_rawseti(L, -2, i + 1); + lua_rawseti(L, -2, idx++); } } else {