]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Fix performance issue with is_utf_outside_range
authorMiecio Za <miecio@miecio.net>
Mon, 18 Mar 2019 13:06:56 +0000 (14:06 +0100)
committerMiecio Za <miecio@miecio.net>
Mon, 18 Mar 2019 13:06:56 +0000 (14:06 +0100)
Fix performace issue, add some checking and add few tests

src/lua/lua_util.c
test/lua/unit/rspamd_util.lua [new file with mode: 0644]

index 71d61da62e3d72d318a2b929d081d736d33a66c6..7c98a0989ec6b74b83190014346a173c81770fef 100644 (file)
@@ -22,6 +22,7 @@
 #include "libmime/email_addr.h"
 #include "libmime/content_type.h"
 #include "libmime/mime_headers.h"
+#include "libutil/hash.h"
 #include "linenoise.h"
 #include <math.h>
 #include <glob.h>
@@ -2458,6 +2459,12 @@ lua_util_is_utf_spoofed (lua_State *L)
                        uspoof_setChecks (spc_sgl,
                                        USPOOF_INVISIBLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
                                        &uc_err);
+                       if (uc_err != U_ZERO_ERROR) {
+                               msg_err ("Cannot set proper checks for uspoof: %s", u_errorName (uc_err));
+                               lua_pushboolean (L, false);
+                               uspoof_close(spc);
+                               return 1;
+                       }
                }
 
                ret = uspoof_checkUTF8 (spc_sgl, s1, l1, NULL, &uc_err);
@@ -2533,28 +2540,52 @@ lua_util_is_utf_outside_range(lua_State *L)
        guint32 range_start = lua_tointeger (L, 2);
        guint32 range_end = lua_tointeger (L, 3);
 
-       USpoofChecker *spc_sgl;
-       USet * allowed_chars;
-       UErrorCode uc_err = U_ZERO_ERROR;
+       static rspamd_lru_hash_t *validators;
+
+       if (validators == NULL) {
+               validators = rspamd_lru_hash_new(16, g_free, (GDestroyNotify)uspoof_close);
+       }
 
        if (string_to_check) {
-               spc_sgl = uspoof_open (&uc_err);
-               if (uc_err != U_ZERO_ERROR) {
-                       msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
-                       lua_pushboolean (L, false);
-                       uspoof_close(spc_sgl);
-                       return 1;
-               }
+               guint64 hash_key = (guint64)range_end << 32 || range_start;
+
+               USpoofChecker *validator = rspamd_lru_hash_lookup(validators, &hash_key, time(NULL));
+
+               UErrorCode uc_err = U_ZERO_ERROR;
+
+               if (validator == NULL) {
+                       USet * allowed_chars;
+                       guint64 * creation_hash_key = g_malloc(sizeof(guint64));
+                       *creation_hash_key = hash_key;
+
+                       validator = uspoof_open (&uc_err);
+                       if (uc_err != U_ZERO_ERROR) {
+                               msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
+                               lua_pushboolean (L, false);
+                               uspoof_close(validator);
+                               return 1;
+                       }
+
+                       allowed_chars = uset_openEmpty();
+                       uset_addRange(allowed_chars, range_start, range_end);
+                       uspoof_setAllowedChars(validator, allowed_chars, &uc_err);
+
+                       uspoof_setChecks (validator,
+                               USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);
 
-               allowed_chars = uset_openEmpty();
-               uset_addRange(allowed_chars, range_start, range_end);
-               uspoof_setAllowedChars(spc_sgl, allowed_chars, &uc_err);
+                       uset_close(allowed_chars);
+
+                       if (uc_err != U_ZERO_ERROR) {
+                               msg_err ("Cannot configure uspoof: %s", u_errorName (uc_err));
+                               lua_pushboolean (L, false);
+                               uspoof_close(validator);
+                               return 1;
+                       }
+
+                       rspamd_lru_hash_insert(validators, creation_hash_key, validator, time(NULL), 0);
+               }
 
-               uspoof_setChecks (spc_sgl,
-                       USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);
-               ret = uspoof_checkUTF8 (spc_sgl, string_to_check, len_of_string, NULL, &uc_err);
-               uset_close(allowed_chars);
-               uspoof_close(spc_sgl);
+               ret = uspoof_checkUTF8 (validator, string_to_check, len_of_string, NULL, &uc_err);
        }
        else {
                return luaL_error (L, "invalid arguments");
diff --git a/test/lua/unit/rspamd_util.lua b/test/lua/unit/rspamd_util.lua
new file mode 100644 (file)
index 0000000..802b400
--- /dev/null
@@ -0,0 +1,67 @@
+context("Rspamd util for lua - check generic functions", function()
+    local util  = require 'rspamd_util'
+
+    local cases = {
+        {
+            input = "test1",
+            result = false,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+        {
+            input = "test test xxx",
+            result = false,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+        {
+            input = "АбЫрвАлг",
+            result = true,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+        {
+            input = "АбЫрвАлг example",
+            result = true,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+        {
+            input = "example ąłśćżłóę",
+            result = false,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+        {
+            input = "ąłśćżłóę АбЫрвАлг",
+            result = true,
+            range_start = 0x0000,
+            range_end = 0x017f
+        },
+    }
+
+    for i,c in ipairs(cases) do
+        test("is_utf_outside_range, test case #" .. i, function()
+          local actual = util.is_utf_outside_range(c.input, c.range_start, c.range_end)
+
+          assert_equal(c.result, actual)
+        end)
+    end
+
+    test("is_utf_outside_range, check cache", function ()
+        cache_size = 20
+        for i = 1,cache_size do
+            local res = util.is_utf_outside_range("a", 0x0000, 0x0000+i)
+        end
+    end)
+
+    test("is_utf_outside_range, check empty string", function ()
+        assert_error(util.is_utf_outside_range)
+    end)
+
+    test("get_string_stats, test case", function()
+        local res = util.get_string_stats("this is test 99")
+        assert_equal(res["letters"], 10)
+        assert_equal(res["digits"], 2)
+    end)
+end)