exports.ole_format_heuristic = detect_ole_format
+local function process_detected(res)
+ local extensions = lua_util.keys(res)
+
+ if #extensions > 0 then
+ table.sort(extensions, function(ex1, ex2)
+ return res[ex1] > res[ex2]
+ end)
+
+ return extensions,res[extensions[1]]
+ end
+
+ return nil
+end
+
local function detect_archive_flaw(part, arch)
local arch_type = arch:get_type()
local res = {
xlsx = 0,
pptx = 0,
jar = 0,
+ odt = 0,
+ odp = 0,
+ ods = 0
} -- ext + confidence pairs
-- General msoffice patterns
res.xlsx = res.docx + 30
elseif file == 'ppt/' then
res.xlsx = res.pptx + 30
+ elseif file == 'META-INF/manifest.xml' then
+ -- Apply ODT detection logic
+ local content = part:get_content()
+
+ if #content > 80 then
+ -- https://lists.oasis-open.org/archives/office/200505/msg00006.html
+ local start_span = content:span(30, 50)
+
+ local mp = tostring(start_span:span(1, 8))
+ if mp == 'mimetype' then
+ local spec_type = tostring(start_span:span(9))
+ if spec_type:find('vnd.oasis.opendocument.text') then
+ res.odt = 40
+ elseif spec_type:find('vnd.oasis.opendocument.spreadsheet') then
+ res.ods = 40
+ elseif spec_type:find('vnd.oasis.opendocument.formula') then
+ res.ods = 40
+ elseif spec_type:find('vnd.oasis.opendocument.chart') then
+ res.ods = 40
+ elseif spec_type:find('vnd.oasis.opendocument.presentation') then
+ res.odp = 40
+ elseif spec_type:find('vnd.oasis.opendocument.image') then
+ -- Assume image as odt
+ res.odt = 40
+ elseif spec_type:find('vnd.oasis.opendocument.graphics') then
+ -- Assume image as odt
+ res.odt = 40
+ end
+ end
+ end
end
end
+
+ local ext,weight = process_detected(res)
+
+ if weight >= 40 then
+ return ext,weight
+ end
end
return arch_type:lower(),40
-- Ole files
ole = {
ct = 'application/octet-stream',
- type = 'msoffice'
+ type = 'office'
},
doc = {
ct = 'application/msword',
- type = 'msoffice'
+ type = 'office'
},
xls = {
ct = 'application/vnd.ms-excel',
- type = 'msoffice'
+ type = 'office'
},
ppt = {
ct = 'application/vnd.ms-powerpoint',
- type = 'msoffice'
+ type = 'office'
},
vsd = {
ct = 'application/vnd.visio',
- type = 'msoffice'
+ type = 'office'
},
msi = {
ct = 'application/x-msi',
},
msg = {
ct = 'application/vnd.ms-outlook',
- type = 'msoffice'
+ type = 'office'
},
-- newer office (2007+)
docx = {
ct = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
- type = 'msoffice'
+ type = 'office'
},
xlsx = {
ct = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
- type = 'msoffice'
+ type = 'office'
},
pptx = {
ct = 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
- type = 'msoffice'
+ type = 'office'
+ },
+ -- OpenOffice formats
+ odt = {
+ ct = 'application/vnd.oasis.opendocument.text',
+ type = 'office'
+ },
+ ods = {
+ ct = 'application/vnd.oasis.opendocument.spreadsheet',
+ type = 'office'
+ },
+ odp = {
+ ct = 'application/vnd.oasis.opendocument.presentation',
+ type = 'office'
},
-- other
pgp = {