From eb120f830eecdbea31bf7c4090c45a7784de682b Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Sun, 8 Sep 2019 09:35:01 +0100 Subject: [PATCH] [Project] Lua_magic: Add heuristics for Office 2007+ --- lualib/lua_magic/heuristics.lua | 38 ++++++++++++++++++++++++++++++--- lualib/lua_magic/types.lua | 15 ++++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index 6a407f5e90..167edd0c9c 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -165,9 +165,42 @@ local function detect_ole_format(input, log_obj) until directory_offset >= inplen end - exports.ole_format_heuristic = detect_ole_format +local function detect_archive_flaw(part, arch) + local arch_type = arch:get_type() + local res = { + docx = 0, + xlsx = 0, + pptx = 0, + jar = 0, + } -- ext + confidence pairs + + -- General msoffice patterns + local function add_msoffice_confidence(incr) + res.docx = res.docx + incr + res.xlsx = res.xlsx + incr + res.pptx = res.pptx + incr + end + + if arch_type == 'zip' then + -- Find specific files/folders in zip file + local files = arch:get_files() or {} + for _,file in ipairs(files) do + if file == '[Content_Types].xml' then + add_msoffice_confidence(10) + elseif file == 'xl/' then + res.xlsx = res.xlsx + 30 + elseif file == 'word/' then + res.xlsx = res.docx + 30 + elseif file == 'ppt/' then + res.xlsx = res.pptx + 30 + end + end + end + + return arch_type:lower(),40 +end exports.mime_part_heuristic = function(part) if part:is_text() then if part:get_text():is_html() then @@ -184,8 +217,7 @@ exports.mime_part_heuristic = function(part) if part:is_archive() then local arch = part:get_archive() - -- TODO: add files heuristics - return arch:get_type():lower(),60 + return detect_archive_flaw(part, arch) end return nil diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua index c8850cd18d..c5de552c80 100644 --- a/lualib/lua_magic/types.lua +++ b/lualib/lua_magic/types.lua @@ -157,7 +157,20 @@ local types = { }, msg = { ct = 'application/vnd.ms-outlook', - type = 'executable' + type = 'msoffice' + }, + -- newer office (2007+) + docx = { + ct = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + type = 'msoffice' + }, + xlsx = { + ct = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + type = 'msoffice' + }, + pptx = { + ct = 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + type = 'msoffice' }, -- other pgp = { -- 2.47.3