From: Vsevolod Stakhov Date: Mon, 25 Jun 2018 15:28:43 +0000 (+0100) Subject: [Feature] Allow to extract words in `rspamadm mime` X-Git-Tag: 1.7.7~30 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=592ce3ab0a40fe016b0d90424d8eacc1e3025412;p=thirdparty%2Frspamd.git [Feature] Allow to extract words in `rspamadm mime` --- diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 300dd43aea..ceb2894c5e 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -72,6 +72,8 @@ extract:option "-o --output" decoded_utf = "raw_utf" } :default "content" +extract:flag "-w --words" + :description "Extracts words" local stat = parser:command "stat st s" @@ -185,19 +187,37 @@ end local function extract_handler(opts) local out_elts = {} + + if opts.words then + -- Enable stemming + rspamd_config:init_subsystem('langdet') + end + for _,fname in ipairs(opts.file) do local task = load_task(opts, fname) out_elts[fname] = {} + if not opts.text and not opts.html then + parser:error('please select html or text part to be extracted') + end + if opts.text or opts.html then local tp = task:get_text_parts() or {} for _,part in ipairs(tp) do local how = opts.output if opts.text and not part:is_html() then - table.insert(out_elts[fname], tostring(part:get_content(how))) + if opts.words then + table.insert(out_elts[fname], table.concat(part:get_words(), ' ')) + else + table.insert(out_elts[fname], tostring(part:get_content(how))) + end elseif opts.html and part:is_html() then - table.insert(out_elts[fname], tostring(part:get_content(how))) + if opts.words then + table.insert(out_elts[fname], table.concat(part:get_words(), ' ')) + else + table.insert(out_elts[fname], tostring(part:get_content(how))) + end end end end