From: Vsevolod Stakhov Date: Sun, 19 Jan 2020 09:39:06 +0000 (+0000) Subject: [Minor] Lua_content: Make text/urls extraction optional X-Git-Tag: 2.3~80 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=78ce6c313d9b8d1a104ba15d27363b5303cdc6c8;p=thirdparty%2Frspamd.git [Minor] Lua_content: Make text/urls extraction optional --- diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index 85f9398697..b577677e86 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -122,6 +122,8 @@ local exports = {} local config = { max_extraction_size = 512 * 1024, max_processing_size = 32 * 1024, + text_extraction = false, -- NYI feature + url_extraction = true, enabled = true, } @@ -626,7 +628,11 @@ local function process_dict(task, pdf, obj, dict) if obj.fonts[k] then local font = obj.fonts[k] - process_font(task, pdf, font, k) + + if config.text_extraction then + process_font(task, pdf, font, k) + end + lua_util.debugm(N, task, 'found font "%s" for object %s:%s -> %s', k, obj.major, obj.minor, font) end @@ -1047,8 +1053,12 @@ local function process_pdf(input, _, task) if pdf_output.start_objects and pdf_output.end_objects then -- Postprocess objects postprocess_pdf_objects(task, input, pdf_output) - search_text(task, pdf_output) - search_urls(task, pdf_output) + if config.text_extraction then + search_text(task, pdf_output) + end + if config.url_extraction then + search_urls(task, pdf_output) + end else pdf_output.flags.no_objects = true end