From de1229f674bc8896a0373d030d3ead0c98f0e731 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 26 Jun 2018 14:08:28 +0100 Subject: [PATCH] [Feature] Allow to show HTML structure on extraction --- lualib/rspamadm/mime.lua | 30 ++++++++++++++++++++++++++++-- src/lua/lua_html.c | 2 +- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index b44f647a0c..2cdec15139 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -74,6 +74,8 @@ extract:flag "-w --words" :description "Extracts words" extract:flag "-p --part" :description "Show part info" +extract:flag "-s --structure" + :description "Show structure info (e.g. HTML tags)" local stat = parser:command "stat st s" @@ -187,6 +189,7 @@ end local function extract_handler(opts) local out_elts = {} + local process_func if opts.words then -- Enable stemming @@ -249,7 +252,30 @@ local function extract_handler(opts) if opts.words then table.insert(out_elts[fname], table.concat(part:get_words(), ' ')) else - table.insert(out_elts[fname], tostring(part:get_content(how))) + if opts.structure then + local hc = part:get_html() + local res = {} + process_func = function(k, v) + return rspamd_logger.slog("%s = %s", k, v) + end + + hc:foreach_tag('any', function(tag) + local elt = {} + local ex = tag:get_extra() + elt.tag = tag:get_type() + if ex then + elt.extra = ex + end + local content = tag:get_content() + if content then + elt.content = content + end + table.insert(res, elt) + end) + out_elts[fname] = res + else + table.insert(out_elts[fname], tostring(part:get_content(how))) + end end end end @@ -260,7 +286,7 @@ local function extract_handler(opts) task:destroy() -- No automatic dtor end - print_elts(out_elts, opts) + print_elts(out_elts, opts, process_func) end local function stat_handler(opts) diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c index 3734e37ff0..dc058745cf 100644 --- a/src/lua/lua_html.c +++ b/src/lua/lua_html.c @@ -492,7 +492,7 @@ lua_html_foreach_tag (lua_State *L) lua_pop (L, 1); } - if (hc && g_hash_table_size (ud.tags) > 0 && lua_isfunction (L, 3)) { + if (hc && (ud.any || g_hash_table_size (ud.tags) > 0) && lua_isfunction (L, 3)) { if (hc->html_tags) { lua_pushvalue (L, 3); -- 2.47.3