:require_command(true)
parser:option "-c --config"
- :description "Path to config file"
- :argname("<cfg>")
- :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
+ :description "Path to config file"
+ :argname("<cfg>")
+ :default(rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf")
parser:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
parser:flag "-C --compact"
- :description "Use compact format"
+ :description "Use compact format"
parser:flag "--no-file"
- :description "Do not print filename"
+ :description "Do not print filename"
-- Extract subcommand
local extract = parser:command "extract ex e"
- :description "Extracts data from MIME messages"
+ :description "Extracts data from MIME messages"
extract:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
extract:flag "-t --text"
- :description "Extracts plain text data from a message"
+ :description "Extracts plain text data from a message"
extract:flag "-r --raw"
- :description "Load as raw file"
+ :description "Load as raw file"
extract:flag "-H --html"
- :description "Extracts htm data from a message"
+ :description "Extracts htm data from a message"
extract:option "-o --output"
- :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
- :argname("<type>")
- :convert {
- raw = "raw",
- content = "content",
- oneline = "content_oneline",
- decoded = "raw_parsed",
- decoded_utf = "raw_utf"
- }
- :default "content"
+ :description "Output format ('raw', 'content', 'oneline', 'decoded', 'decoded_utf')"
+ :argname("<type>")
+ :convert {
+ raw = "raw",
+ content = "content",
+ oneline = "content_oneline",
+ decoded = "raw_parsed",
+ decoded_utf = "raw_utf"
+}
+ :default "content"
extract:flag "-w --words"
- :description "Extracts words"
+ :description "Extracts words"
extract:flag "-p --part"
- :description "Show part info"
+ :description "Show part info"
extract:flag "-s --structure"
- :description "Show structure info (e.g. HTML tags)"
+ :description "Show structure info (e.g. HTML tags)"
extract:flag "-i --invisible"
- :description "Show invisible content for HTML parts"
+ :description "Show invisible content for HTML parts"
extract:option "-F --words-format"
- :description "Words format ('stem', 'norm', 'raw', 'full')"
- :argname("<type>")
- :convert {
- stem = "stem",
- norm = "norm",
- raw = "raw",
- full = "full",
- }
- :default "stem"
+ :description "Words format ('stem', 'norm', 'raw', 'full')"
+ :argname("<type>")
+ :convert {
+ stem = "stem",
+ norm = "norm",
+ raw = "raw",
+ full = "full",
+}
+ :default "stem"
local stat = parser:command "stat st s"
- :description "Extracts statistical data from MIME messages"
+ :description "Extracts statistical data from MIME messages"
stat:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
stat:mutex(
- stat:flag "-m --meta"
- :description "Lua metatokens",
- stat:flag "-b --bayes"
- :description "Bayes tokens",
- stat:flag "-F --fuzzy"
- :description "Fuzzy hashes"
+ stat:flag "-m --meta"
+ :description "Lua metatokens",
+ stat:flag "-b --bayes"
+ :description "Bayes tokens",
+ stat:flag "-F --fuzzy"
+ :description "Fuzzy hashes"
)
stat:flag "-s --shingles"
:description "Show shingles for fuzzy hashes"
local urls = parser:command "urls url u"
- :description "Extracts URLs from MIME messages"
+ :description "Extracts URLs from MIME messages"
urls:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
urls:mutex(
- urls:flag "-t --tld"
- :description "Get TLDs only",
- urls:flag "-H --host"
- :description "Get hosts only",
- urls:flag "-f --full"
- :description "Show piecewise urls as processed by Rspamd"
+ urls:flag "-t --tld"
+ :description "Get TLDs only",
+ urls:flag "-H --host"
+ :description "Get hosts only",
+ urls:flag "-f --full"
+ :description "Show piecewise urls as processed by Rspamd"
)
urls:flag "-u --unique"
:description "Reverse sort order"
local modify = parser:command "modify mod m"
- :description "Modifies MIME message"
+ :description "Modifies MIME message"
modify:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
modify:option "-a --add-header"
- :description "Adds specific header"
- :argname "<header=value>"
- :count "*"
+ :description "Adds specific header"
+ :argname "<header=value>"
+ :count "*"
modify:option "-r --remove-header"
- :description "Removes specific header (all occurrences)"
- :argname "<header>"
- :count "*"
+ :description "Removes specific header (all occurrences)"
+ :argname "<header>"
+ :count "*"
modify:option "-R --rewrite-header"
- :description "Rewrites specific header, uses Lua string.format pattern"
- :argname "<header=pattern>"
- :count "*"
+ :description "Rewrites specific header, uses Lua string.format pattern"
+ :argname "<header=pattern>"
+ :count "*"
modify:option "-t --text-footer"
- :description "Adds footer to text/plain parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/plain parts from a specific file"
+ :argname "<file>"
modify:option "-H --html-footer"
- :description "Adds footer to text/html parts from a specific file"
- :argname "<file>"
+ :description "Adds footer to text/html parts from a specific file"
+ :argname "<file>"
local strip = parser:command "strip"
- :description "Strip attachments from a message"
+ :description "Strip attachments from a message"
strip:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
strip:flag "-i --keep-images"
- :description "Keep images"
+ :description "Keep images"
strip:option "--min-text-size"
- :description "Minimal text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(0)
+ :description "Minimal text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(0)
strip:option "--max-text-size"
- :description "Max text size to keep"
- :argname "<size>"
- :convert(tonumber)
- :default(math.huge)
+ :description "Max text size to keep"
+ :argname "<size>"
+ :convert(tonumber)
+ :default(math.huge)
local anonymize = parser:command "anonymize"
- :description "Try to remove sensitive information from a message"
+ :description "Try to remove sensitive information from a message"
anonymize:argument "file"
- :description "File to process"
- :argname "<file>"
- :args "+"
+ :description "File to process"
+ :argname "<file>"
+ :args "+"
anonymize:option "--exclude-header -X"
- :description "Exclude specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Exclude specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:option "--include-header -I"
- :description "Include specific headers from anonymization"
- :argname "<header>"
- :count "*"
+ :description "Include specific headers from anonymization"
+ :argname "<header>"
+ :count "*"
anonymize:flag "--gpt"
- :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
+ :description "Use LLM model for anonymization (requires GPT plugin to be configured)"
anonymize:option "--model"
- :description "Model to use for anonymization"
- :argname "<model>"
+ :description "Model to use for anonymization"
+ :argname "<model>"
anonymize:option "--prompt"
- :description "Prompt to use for anonymization"
- :argname "<prompt>"
+ :description "Prompt to use for anonymization"
+ :argname "<prompt>"
local sign = parser:command "sign"
- :description "Performs DKIM signing"
+ :description "Performs DKIM signing"
sign:argument "file"
:description "File to process"
:argname "<file>"
:description "ARC or DKIM signing"
:argname("<arc|dkim>")
:convert {
- ['arc'] = 'arc',
- ['dkim'] = 'dkim',
- }
+ ['arc'] = 'arc',
+ ['dkim'] = 'dkim',
+}
:default 'dkim'
sign:option "-o --output"
:description "Output format"
:argname("<message|signature>")
:convert {
- ['message'] = 'message',
- ['signature'] = 'signature',
- }
+ ['message'] = 'message',
+ ['signature'] = 'signature',
+}
:default 'message'
local dump = parser:command "dump"
- :description "Dumps a raw message in different formats"
+ :description "Dumps a raw message in different formats"
dump:argument "file"
:description "File to process"
:argname "<file>"
:args "+"
-- Duplicate format for convenience
dump:mutex(
- parser:flag "-j --json"
- :description "JSON output",
- parser:flag "-U --ucl"
- :description "UCL output",
- parser:flag "-M --messagepack"
- :description "MessagePack output"
+ parser:flag "-j --json"
+ :description "JSON output",
+ parser:flag "-U --ucl"
+ :description "UCL output",
+ parser:flag "-M --messagepack"
+ :description "MessagePack output"
)
dump:flag "-s --split"
:description "Split the output file contents such that no content is embedded"
if not task:process_message() then
parser:error(string.format('cannot read message from %s: %s', fname,
- 'failed to parse'))
+ 'failed to parse'))
return nil
end
if not opts.json and not opts.ucl then
table.insert(out,
- rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
- part:get_mimepart():get_digest():sub(1, 8),
- t,
- part:get_language(),
- part:get_length(), part:get_raw_length(),
- part:get_words_count()))
+ rspamd_logger.slog('Part: %s: %s, language: %s, size: %s (%s raw), words: %s',
+ part:get_mimepart():get_digest():sub(1, 8),
+ t,
+ part:get_language(),
+ part:get_length(), part:get_raw_length(),
+ part:get_words_count()))
table.insert(out,
- rspamd_logger.slog('Stats: %s',
- fun.foldl(function(acc, k, v)
- if acc ~= '' then
- return string.format('%s, %s:%s', acc, k, v)
- else
- return string.format('%s:%s', k, v)
- end
- end, '', part:get_stats())))
+ rspamd_logger.slog('Stats: %s',
+ fun.foldl(function(acc, k, v)
+ if acc ~= '' then
+ return string.format('%s, %s:%s', acc, k, v)
+ else
+ return string.format('%s:%s', k, v)
+ end
+ end, '', part:get_stats())))
end
end
end
local mtype, msubtype = part:get_type()
local det_mtype, det_msubtype = part:get_detected_type()
table.insert(out,
- rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
- part:get_digest():sub(1, 8),
- mtype, msubtype,
- det_mtype, det_msubtype,
- part:get_filename(),
- part:get_detected_ext(),
- part:get_length()))
+ rspamd_logger.slog('Mime Part: %s: %s/%s (%s/%s detected), filename: %s (%s detected ext), size: %s',
+ part:get_digest():sub(1, 8),
+ mtype, msubtype,
+ det_mtype, det_msubtype,
+ part:get_filename(),
+ part:get_detected_ext(),
+ part:get_length()))
end
end
end
return table.concat(words, ' ')
else
return table.concat(
- fun.totable(
- fun.map(function(w)
- -- [1] - stemmed word
- -- [2] - normalised word
- -- [3] - raw word
- -- [4] - flags (table of strings)
- return string.format('%s|%s|%s(%s)',
- w[3], w[2], w[1], table.concat(w[4], ','))
- end, words)
- ),
- ' '
+ fun.totable(
+ fun.map(function(w)
+ -- [1] - stemmed word
+ -- [2] - normalised word
+ -- [3] - raw word
+ -- [4] - flags (table of strings)
+ return string.format('%s|%s|%s(%s)',
+ w[3], w[2], w[1], table.concat(w[4], ','))
+ end, words)
+ ),
+ ' '
)
end
end
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], 'meta_words: ' ..
- print_words(task:get_meta_words(how_words), how_words == 'full'))
+ print_words(task:get_meta_words(how_words), how_words == 'full'))
end
if opts.text or opts.html then
- local mp = task:get_parts() or {}
+ local mp_all = task:get_parts(true) or {}
+
+ -- Build map: parent_part -> injected_text_part
+ local injected_map = {}
+ for _, p in ipairs(mp_all) do
+ if p:is_injected() and p:is_text() then
- for _, mime_part in ipairs(mp) do
+ local parent = p:get_parent()
+ if parent then
+ injected_map[parent:get_digest()] = p:get_text()
+ end
+ end
+ end
+
+ -- Build table: {{part, injected_text or nil}, ...}
+ local parts_to_process = {}
+ for _, p in ipairs(mp_all) do
+ if not p:is_injected() then
+ table.insert(parts_to_process, { p, injected_map[p:get_digest()] })
+ end
+ end
+
+ -- Process the parts
+ for _, entry in ipairs(parts_to_process) do
+ local mime_part = entry[1]
+ local injected_part = entry[2]
local how = opts.output
local part
+
if mime_part:is_text() then
part = mime_part:get_text()
end
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
table.insert(out_elts[fname], tostring(part:get_content(how)))
end
+ elseif injected_part and opts.text and not injected_part:is_html() then
+ -- Show parent part info but content from injected child
+ maybe_print_mime_part_info(mime_part, out_elts[fname])
+ if not opts.json and not opts.ucl then
+ table.insert(out_elts[fname], string.format('[Extracted text from %s]',
+ mime_part:get_filename() or 'attachment'))
+ table.insert(out_elts[fname], '\n')
+ end
+
+ if opts.words then
+ local how_words = opts['words_format'] or 'stem'
+ table.insert(out_elts[fname], print_words(injected_part:get_words(how_words),
+ how_words == 'full'))
+ else
+ table.insert(out_elts[fname], tostring(injected_part:get_content(how)))
+ end
elseif part and opts.html and part:is_html() then
maybe_print_text_part_info(part, out_elts[fname])
maybe_print_mime_part_info(mime_part, out_elts[fname])
if opts.words then
local how_words = opts['words_format'] or 'stem'
table.insert(out_elts[fname], print_words(part:get_words(how_words),
- how_words == 'full'))
+ how_words == 'full'))
else
if opts.structure then
local hc = part:get_html()
local fun = require "fun"
if type(elt) == 'table' then
return table.concat(fun.totable(
- fun.map(
- function(t)
- return rspamd_logger.slog("%s", t)
- end,
- elt)), '\n')
+ fun.map(
+ function(t)
+ return rspamd_logger.slog("%s", t)
+ end,
+ elt)), '\n')
else
return rspamd_logger.slog("%s", elt)
end
if opts.invisible then
local hc = part:get_html()
table.insert(out_elts[fname], string.format('invisible content: %s',
- tostring(hc:get_invisible())))
+ tostring(hc:get_invisible())))
end
end
- else
- -- Not a text part, check for PDF
- local _, msubtype = mime_part:get_type()
- if msubtype == 'pdf' and opts.text then
- local lua_content_pdf = require "lua_content.pdf"
- -- Get raw content of the part
- local content = mime_part:get_content()
- if content then
- local res = lua_content_pdf.process(content, mime_part, task)
- if res and res.extract_text then
- local text_data = res.extract_text(res)
- if text_data and #text_data > 0 then
- maybe_print_mime_part_info(mime_part, out_elts[fname])
- for _, txt in ipairs(text_data) do
- table.insert(out_elts[fname], tostring(txt))
- end
- end
- end
- end
- end
end
if not part then
out_elts[fname] = bt
process_func = function(e)
return string.format('%s (%d): "%s"+"%s", [%s]', e.data, e.win, e.t1 or "",
- e.t2 or "", table.concat(fun.totable(
- fun.map(function(k)
- return k
- end, e.flags)), ","))
+ e.t2 or "", table.concat(fun.totable(
+ fun.map(function(k)
+ return k
+ end, e.flags)), ","))
end
elseif opts.fuzzy then
local parts = task:get_parts() or {}
digest = digest,
shingles = shingles,
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
else
table.insert(out_elts[fname], {
digest = part:get_digest(),
file = part:get_filename(),
type = string.format('%s/%s',
- ({ part:get_type() })[1],
- ({ part:get_type() })[2])
+ ({ part:get_type() })[1],
+ ({ part:get_type() })[2])
})
end
end
if hname == name then
local new_value = string.format(hpattern, hdr.decoded)
new_value = string.format('%s:%s%s',
- name, hdr.separator,
- rspamd_util.fold_header(name,
- rspamd_util.mime_header_encode(new_value),
- task:get_newlines_type()))
+ name, hdr.separator,
+ rspamd_util.fold_header(name,
+ rspamd_util.mime_header_encode(new_value),
+ task:get_newlines_type()))
out[#out + 1] = new_value
return
end
if rewrite.need_rewrite_ct then
if name:lower() == 'content-type' then
local nct = string.format('%s: %s/%s; charset=utf-8',
- 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
+ 'Content-Type', rewrite.new_ct.type, rewrite.new_ct.subtype)
out[#out + 1] = nct
return
elseif name:lower() == 'content-transfer-encoding' then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
seen_cte = true
return
end
if hname and hvalue then
out[#out + 1] = string.format('%s: %s', hname,
- rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
+ rspamd_util.fold_header(hname, hvalue, task:get_newlines_type()))
end
end
if not seen_cte and rewrite.need_rewrite_ct then
out[#out + 1] = string.format('%s: %s',
- 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
+ 'Content-Transfer-Encoding', rewrite.new_cte or 'quoted-printable')
end
-- End of headers
io.flush()
else
local dkim_hdr = string.format('%s: %s%s',
- 'DKIM-Signature',
- rspamd_util.fold_header('DKIM-Signature',
- rspamd_util.mime_header_encode(sig),
- task:get_newlines_type()),
- newline(task))
+ 'DKIM-Signature',
+ rspamd_util.fold_header('DKIM-Signature',
+ rspamd_util.mime_header_encode(sig),
+ task:get_newlines_type()),
+ newline(task))
io.write(dkim_hdr)
io.flush()
task:get_content():save_in_file(1)
*/
LUA_FUNCTION_DEF(task, get_emails);
/***
- * @method task:inject_part(type, content)
+ * @method task:inject_part(type, content[, original_part])
* Injects a virtual mime part into the task structure
* @param {string} type part type (currently only "text" is supported)
- * @param {string} content part content
+ * @param {string/text/table} content part content (accepts string, rspamd_text, or table of rspamd_text chunks - will be efficiently concatenated in C)
+ * @param {rspamd_mimepart} original_part optional original mime part that this injected part is derived from (sets parent relationship)
* @return {boolean} true if part was injected
*/
LUA_FUNCTION_DEF(task, inject_part);
/***
- * @method task:get_text_parts()
- * Get all text (and HTML) parts found in a message
+ * @method task:get_text_parts([include_virtual])
+ * Get all text (and HTML) parts found in a message. By default, injected/virtual parts are excluded.
+ * @param {boolean} include_virtual if true, include injected/virtual parts (default: false)
* @return {table rspamd_text_part} list of text parts
*/
LUA_FUNCTION_DEF(task, get_text_parts);
/***
- * @method task:get_parts()
- * Get all mime parts found in a message
+ * @method task:get_parts([include_virtual])
+ * Get all mime parts found in a message. By default, injected/virtual parts are excluded.
+ * @param {boolean} include_virtual if true, include injected/virtual parts (default: false)
* @return {table rspamd_mime_part} list of mime parts
*/
LUA_FUNCTION_DEF(task, get_parts);
LUA_TRACE_POINT;
struct rspamd_task *task = lua_check_task(L, 1);
const char *type = luaL_checkstring(L, 2);
- gsize content_len;
- const char *content = luaL_checklstring(L, 3, &content_len);
- struct rspamd_mime_part *part;
+ struct rspamd_lua_text *content_text;
+ const char *content = NULL;
+ gsize content_len = 0;
+ struct rspamd_mime_part *part, *original_part = NULL;
struct rspamd_mime_text_part *txt_part;
+ gboolean is_table = FALSE;
+
+ /* Accept string, rspamd_text, or table of texts */
+ if (lua_type(L, 3) == LUA_TTABLE) {
+ is_table = TRUE;
+ /* Calculate total length first */
+ lua_pushnil(L);
+ while (lua_next(L, 3) != 0) {
+ struct rspamd_lua_text *t = lua_check_text_or_string(L, -1);
+ if (t) {
+ content_len += t->len;
+ }
+ lua_pop(L, 1);
+ }
+ }
+ else {
+ content_text = lua_check_text_or_string(L, 3);
+ if (!content_text) {
+ return luaL_error(L, "invalid content argument (expected string, text, or table)");
+ }
+ content = content_text->start;
+ content_len = content_text->len;
+ }
+
+ /* Check for optional original_part parameter */
+ if (lua_gettop(L) >= 4 && lua_isuserdata(L, 4)) {
+ original_part = *((struct rspamd_mime_part **)
+ rspamd_lua_check_udata_maybe(L, 4, rspamd_mimepart_classname));
+ }
if (task && task->message) {
if (g_ascii_strcasecmp(type, "text") == 0) {
part->part_type = RSPAMD_MIME_PART_TEXT;
part->flags |= RSPAMD_MIME_PART_COMPUTED;
+ /* Set parent part if provided */
+ if (original_part) {
+ part->parent_part = original_part;
+ }
+
/* Basic headers setup */
part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct));
part->ct->charset.begin = "utf-8";
part->ct->charset.len = 5;
- /* Content setup */
- part->parsed_data.begin = rspamd_mempool_strdup(task->task_pool, content);
+ /* Content setup - merge table or copy single content */
+ part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1);
part->parsed_data.len = content_len;
+
+ if (is_table) {
+ /* Efficiently merge all text chunks */
+ char *dst = (char *) part->parsed_data.begin;
+ lua_pushnil(L);
+ while (lua_next(L, 3) != 0) {
+ struct rspamd_lua_text *t = lua_check_text_or_string(L, -1);
+ if (t && t->len > 0) {
+ memcpy(dst, t->start, t->len);
+ dst += t->len;
+ }
+ lua_pop(L, 1);
+ }
+ *dst = '\0';
+ }
+ else {
+ /* Single content */
+ memcpy((char *) part->parsed_data.begin, content, content_len);
+ ((char *) part->parsed_data.begin)[content_len] = '\0';
+ }
+
part->raw_data = part->parsed_data;
/* Text part specific setup */
unsigned int i;
struct rspamd_task *task = lua_check_task(L, 1);
struct rspamd_mime_part *part, **ppart;
+ gboolean include_virtual = FALSE;
+
+ if (lua_gettop(L) >= 2) {
+ include_virtual = lua_toboolean(L, 2);
+ }
if (task != NULL) {
if (task->message) {
lua_createtable(L, MESSAGE_FIELD(task, parts)->len, 0);
+ int idx = 1;
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
{
+ if (!include_virtual && (part->flags & RSPAMD_MIME_PART_COMPUTED)) {
+ continue;
+ }
+
ppart = lua_newuserdata(L, sizeof(struct rspamd_mime_part *));
*ppart = part;
rspamd_lua_setclass(L, rspamd_mimepart_classname, -1);
/* Make it array */
- lua_rawseti(L, -2, i + 1);
+ lua_rawseti(L, -2, idx++);
}
}
else {