From: Vsevolod Stakhov Date: Fri, 30 Nov 2018 14:10:20 +0000 (+0000) Subject: [Feature] Allow to choose words format in `rspamadm mime` X-Git-Tag: 1.8.3~21 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=34fd4ae9cfd85f58f0d18455cf929b79047d4bd7;p=thirdparty%2Frspamd.git [Feature] Allow to choose words format in `rspamadm mime` --- diff --git a/lualib/rspamadm/mime.lua b/lualib/rspamadm/mime.lua index 974d98bcfe..3fbcb014b8 100644 --- a/lualib/rspamadm/mime.lua +++ b/lualib/rspamadm/mime.lua @@ -76,6 +76,16 @@ extract:flag "-p --part" :description "Show part info" extract:flag "-s --structure" :description "Show structure info (e.g. HTML tags)" +extract:option "-F --words-format" + :description "Words format ('stem', 'norm', 'raw', 'full')" + :argname("") + :convert { + stem = "stem", + norm = "norm", + raw = "raw", + full = "full", + } + :default "stem" local stat = parser:command "stat st s" @@ -245,6 +255,28 @@ local function extract_handler(opts) end end + local function print_words(words, full) + local fun = require "fun" + + if not full then + return table.concat(words, ' ') + else + return table.concat( + fun.totable( + fun.map(function(w) + -- [1] - stemmed word + -- [2] - normalised word + -- [3] - raw word + -- [4] - flags (table of strings) + return string.format('%s|%s|%s(%s)', + w[3], w[2], w[1], table.concat(w[4], ',')) + end, words) + ), + ' ' + ) + end + end + for _,fname in ipairs(opts.file) do local task = load_task(opts, fname) out_elts[fname] = {} @@ -254,6 +286,12 @@ local function extract_handler(opts) opts.html = true end + if opts.words then + local howw = opts['words_format'] or 'stem' + table.insert(out_elts[fname], 'meta_words: ' .. + print_words(task:get_meta_words(howw), howw == 'full')) + end + if opts.text or opts.html then local mp = task:get_parts() or {} @@ -265,14 +303,18 @@ local function extract_handler(opts) if part and opts.text and not part:is_html() then maybe_print_text_part_info(part, out_elts[fname]) if opts.words then - table.insert(out_elts[fname], table.concat(part:get_words(), ' ')) + local howw = opts['words_format'] or 'stem' + table.insert(out_elts[fname], print_words(part:get_words(howw), + howw == 'full')) else table.insert(out_elts[fname], tostring(part:get_content(how))) end elseif part and opts.html and part:is_html() then maybe_print_text_part_info(part, out_elts[fname]) if opts.words then - table.insert(out_elts[fname], table.concat(part:get_words(), ' ')) + local howw = opts['words_format'] or 'stem' + table.insert(out_elts[fname], print_words(part:get_words(howw), + howw == 'full')) else if opts.structure then local hc = part:get_html() diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index fe463b7634..7bb45f3477 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -2516,7 +2516,7 @@ rspamd_lua_push_words (lua_State *L, GArray *words, lua_rawseti (L, -2, cnt ++); break; } - - return 1; } + + return 1; } \ No newline at end of file