local strs = {}
for ext,pats in pairs(msoffice_patterns) do
for _,pat in ipairs(pats) do
+ -- These are utf16 strings in fact...
strs[#strs + 1] = '^' ..
table.concat(
fun.totable(
strs = {}
for ext,pats in pairs(msoffice_clsids) do
for _,pat in ipairs(pats) do
+ -- Convert hex to re
local hex_table = {}
for i=1,#pat,2 do
local subc = pat:sub(i, i + 1)
until directory_offset >= inplen
end
+
exports.ole_format_heuristic = detect_ole_format
+exports.mime_part_heuristic = function(part)
+ if part:is_text() then
+ if part:get_text():is_html() then
+ return 'html',60
+ else
+ return 'txt',60
+ end
+ end
+
+ if part:is_image() then
+ local img = part:get_image()
+ return img:get_type():lower(),60
+ end
+
+ if part:is_archive() then
+ local arch = part:get_archive()
+ -- TODO: add files heuristics
+ return arch:get_type():lower(),60
+ end
+
+ return nil
+end
+
return exports
\ No newline at end of file
local patterns = require "lua_magic/patterns"
local types = require "lua_magic/types"
+local heuristics = require "lua_magic/heuristics"
local fun = require "fun"
local lua_util = require "lua_util"
return nil
end
+exports.detect_mime_part = function(part, log_obj)
+ local ext,weight = heuristics.mime_part_heuristic(part)
+
+ if ext and weight and weight > 20 then
+ return ext,types[ext]
+ end
+
+ return exports.detect(part:get_content(), log_obj)
+end
+
-- This parameter specifies how many bytes are checked in the input
-- Rspamd checks 2 chunks at start and 1 chunk at the end
exports.chunk_size = 32768
ct = 'application/x-uuencoded',
type = 'binary',
},
+ -- Types that are detected by Rspamd itself
+ -- Archives
+ zip = {
+ ct = 'application/zip',
+ type = 'archive',
+ },
+ rar = {
+ ct = 'application/x-rar',
+ type = 'archive',
+ },
+ ['7z'] = {
+ ct = 'x-7z-compressed',
+ type = 'archive',
+ },
+ gz = {
+ ct = 'application/gzip',
+ type = 'archive',
+ },
+ -- Images
+ png = {
+ ct = 'image/png',
+ type = 'image',
+ },
+ gif = {
+ ct = 'image/gif',
+ type = 'image',
+ },
+ jpg = {
+ ct = 'image/jpeg',
+ type = 'image',
+ },
+ bmp = {
+ type = 'image',
+ ct = 'image/bmp',
+ },
+ -- Text
+ txt = {
+ type = 'text',
+ ct = 'text/plain',
+ },
+ html = {
+ type = 'text',
+ ct = 'text/html',
+ },
}
return types
\ No newline at end of file