From ac585813528a7ca13e63bd53704b0bbfb2d3b773 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 18 Jan 2019 17:18:27 +0000 Subject: [PATCH] [Feature] Mime_types: Add MIME_BAD_UNICODE rule --- conf/scores.d/mime_types_group.conf | 5 +++++ src/lua/lua_util.c | 15 ++++++++------- src/plugins/lua/mime_types.lua | 25 +++++++++++++++++++------ 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/conf/scores.d/mime_types_group.conf b/conf/scores.d/mime_types_group.conf index 10cb1ba935..7a2847b1ef 100644 --- a/conf/scores.d/mime_types_group.conf +++ b/conf/scores.d/mime_types_group.conf @@ -56,4 +56,9 @@ symbols = { description = "Bad extension"; one_shot = true; } + "MIME_BAD_UNICODE" { + weight = 8.0; + description = "Filename with known obscured unicode characters"; + one_shot = true; + } } \ No newline at end of file diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 94554faa10..94f3bb7834 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -400,11 +400,11 @@ LUA_FUNCTION_DEF (util, is_utf_spoofed); LUA_FUNCTION_DEF (util, is_valid_utf8); /*** - * @function util.has_obscured_utf(str) + * @function util.has_obscured_unicode(str) * Returns true if a string has obscure UTF symbols (zero width spaces, order marks), ignores invalid utf characters - * @return {boolean} true if a has obscured utf characters + * @return {boolean} true if a has obscured unicode characters (+ character and offset if found) */ -LUA_FUNCTION_DEF (util, has_obscured_utf); +LUA_FUNCTION_DEF (util, has_obscured_unicode); /*** * @function util.readline([prompt]) @@ -616,7 +616,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, caseless_hash_fast), LUA_INTERFACE_DEF (util, is_utf_spoofed), LUA_INTERFACE_DEF (util, is_valid_utf8), - LUA_INTERFACE_DEF (util, has_obscured_utf), + LUA_INTERFACE_DEF (util, has_obscured_unicode), LUA_INTERFACE_DEF (util, readline), LUA_INTERFACE_DEF (util, readpassphrase), LUA_INTERFACE_DEF (util, file_exists), @@ -2618,24 +2618,25 @@ lua_util_is_valid_utf8 (lua_State *L) } static gint -lua_util_has_obscured_utf (lua_State *L) +lua_util_has_obscured_unicode (lua_State *L) { LUA_TRACE_POINT; const gchar *str; gsize len; - gint32 i = 0; + gint32 i = 0, prev_i; UChar32 uc; str = lua_tolstring (L, 1, &len); while (i < len) { + prev_i = i; U8_NEXT (str, i, len, uc); if (uc > 0) { if (IS_OBSCURED_CHAR (uc)) { lua_pushboolean (L, true); lua_pushnumber (L, uc); /* Character */ - lua_pushnumber (L, i); /* Offset */ + lua_pushnumber (L, prev_i); /* Offset */ return 3; } diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua index e9a10bc3c2..a7a859e533 100644 --- a/src/plugins/lua/mime_types.lua +++ b/src/plugins/lua/mime_types.lua @@ -21,6 +21,7 @@ end -- This plugin implements mime types checks for mail messages local logger = require "rspamd_logger" local lua_util = require "lua_util" +local rspamd_util = require "rspamd_util" local N = "mime_types" local settings = { file = '', @@ -32,6 +33,7 @@ local settings = { symbol_archive_in_archive = 'MIME_ARCHIVE_IN_ARCHIVE', symbol_double_extension = 'MIME_DOUBLE_BAD_EXTENSION', symbol_bad_extension = 'MIME_BAD_EXTENSION', + symbol_bad_unicode = 'MIME_BAD_UNICODE', regexp = false, extension_map = { -- extension -> mime_type html = 'text/html', @@ -832,6 +834,17 @@ local function check_mime_type(task) end local function check_filename(fname, ct, is_archive, part) + + local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname) + if has_bad_unicode then + task:insert_result(settings.symbol_bad_unicode, 1.0, + string.format("0x%xd after %s", char, + fname:sub(1, ch_pos))) + end + + -- Replace potentially bad characters with '?' + fname = fname:gsub('[^%s%g]', '?') + local ext,ext2,parts = gen_extension(fname) -- ext is the last extension, LOWERCASED -- ext2 is the one before last extension LOWERCASED @@ -945,7 +958,6 @@ local function check_mime_type(task) end if filename then - filename = filename:gsub('[^%s%g]', '?') check_filename(filename, ct, false, p) end @@ -976,11 +988,6 @@ local function check_mime_type(task) local nfiles = #fl for _,f in ipairs(fl) do - -- Strip bad characters - if f['name'] then - f['name'] = f['name']:gsub('[\128-\255%s%G]', '?') - end - if f['encrypted'] then task:insert_result(settings['symbol_encrypted_archive'], 1.0, f['name']) @@ -1158,6 +1165,12 @@ if opts then parent = id, group = 'mime_types', }) + rspamd_config:register_symbol({ + type = 'virtual', + name = settings['symbol_bad_unicode'], + parent = id, + group = 'mime_types', + }) rspamd_config:register_symbol({ type = 'virtual,nostat', name = 'MIME_TRACE', -- 2.47.3