From: Vsevolod Stakhov Date: Thu, 22 Apr 2021 13:22:52 +0000 (+0100) Subject: [Minor] Lua_magic: Return utf8 check as it is useful for many cases X-Git-Tag: 3.0~466 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f4d5f9c4e45952d37d3aa2549f8a8cd133d09181;p=thirdparty%2Frspamd.git [Minor] Lua_magic: Return utf8 check as it is useful for many cases --- diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua index aa8e9e8190..66e186906d 100644 --- a/lualib/lua_magic/heuristics.lua +++ b/lualib/lua_magic/heuristics.lua @@ -334,6 +334,21 @@ exports.text_part_heuristic = function(part, log_obj, _) local n8bit = 0 while b >= 127 and n8bit < remain do + -- utf8 part + if bit.band(b, 0xe0) == 0xc0 and remain > 1 and + bit.band(bytes[idx + 1], 0xc0) == 0x80 then + return true,1 + elseif bit.band(b, 0xf0) == 0xe0 and remain > 2 and + bit.band(bytes[idx + 1], 0xc0) == 0x80 and + bit.band(bytes[idx + 2], 0xc0) == 0x80 then + return true,2 + elseif bit.band(b, 0xf8) == 0xf0 and remain > 3 and + bit.band(bytes[idx + 1], 0xc0) == 0x80 and + bit.band(bytes[idx + 2], 0xc0) == 0x80 and + bit.band(bytes[idx + 3], 0xc0) == 0x80 then + return true,3 + end + n8bit = n8bit + 1 idx = idx + 1 b = bytes[idx]