]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Use shingles for images fuzzying
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Dec 2016 12:50:51 +0000 (12:50 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Dec 2016 12:51:08 +0000 (12:51 +0000)
src/plugins/fuzzy_check.c
test/rspamd_shingles_test.c

index f37ef45d08c8391c5dc1da775e78104869cfbb60..1f3561706ab24e2b311f769cc51dba17f2448c2c 100644 (file)
@@ -1242,7 +1242,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule,
                msg_debug_pool ("loading shingles of type %s with key %*xs",
                                rule->algorithm_str,
                                16, rule->shingles_key->str);
-               sh = rspamd_shingles_generate (words,
+               sh = rspamd_shingles_from_text (words,
                                rule->shingles_key->str, pool,
                                rspamd_shingles_default_filter, NULL,
                                rule->alg);
@@ -1299,9 +1299,8 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 {
        struct rspamd_fuzzy_shingle_cmd *shcmd;
        struct rspamd_fuzzy_encrypted_shingle_cmd *encshcmd, *cached;
-       guint i;
        struct fuzzy_cmd_io *io;
-       guint64 shingles[RSPAMD_SHINGLE_SIZE];
+       struct rspamd_shingle *sh;
 
        cached = fuzzy_cmd_get_cached (rule, pool, img);
 
@@ -1318,25 +1317,23 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
                /*
                 * Generate shingles
                 */
-               G_STATIC_ASSERT (G_N_ELEMENTS (img->fuzzy_sig) == RSPAMD_SHINGLE_SIZE);
-
-               for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
-                       shingles[i] = rspamd_cryptobox_fast_hash_specific (
-                                       RSPAMD_CRYPTOBOX_MUMHASH,
-                                       (const guchar *)&img->fuzzy_sig[i],
-                                       sizeof (img->fuzzy_sig[i]), 0);
+               sh = rspamd_shingles_from_image (img->dct,
+                               rule->shingles_key->str, pool,
+                               rspamd_shingles_default_filter, NULL,
+                               rule->alg);
+               if (sh != NULL) {
+                       memcpy (&shcmd->sgl, sh->hashes, sizeof (shcmd->sgl));
+                       shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
                }
+
                rspamd_cryptobox_hash (shcmd->basic.digest,
-                               (const guchar *)img->fuzzy_sig, sizeof (img->fuzzy_sig),
+                               (const guchar *)img->dct, sizeof (gdouble) * 64 * 64,
                                rule->hash_key->str, rule->hash_key->len);
 
                msg_debug_pool ("loading shingles of type %s with key %*xs",
                                rule->algorithm_str,
                                16, rule->shingles_key->str);
 
-               memcpy (&shcmd->sgl, shingles, sizeof (shcmd->sgl));
-               shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
-
                /*
                 * We always save encrypted command as it can handle both
                 * encrypted and unencrypted requests.
@@ -1605,9 +1602,10 @@ fuzzy_insert_result (struct fuzzy_client_session *session,
        nval *= rep->prob;
        msg_info_task (
                        "found fuzzy hash %*xs with weight: "
-                       "%.2f, in list: %s:%d%s",
+                       "%.2f, probability %.2f, in list: %s:%d%s",
                        (gint)sizeof (cmd->digest), cmd->digest,
                        nval,
+                       (gdouble)rep->prob,
                        symbol,
                        rep->flag,
                        map == NULL ? "(unknown)" : "");
index 401a26309274c5fdece29d018c76451689d2f6b1..e1367cca4c1ac7d5e89c37cc8a89c6bdb731929f 100644 (file)
@@ -117,11 +117,11 @@ test_case (gsize cnt, gsize max_len, gdouble perm_factor,
        ottery_rand_bytes (key, sizeof (key));
        input = generate_fuzzy_words (cnt, max_len);
        ts1 = rspamd_get_virtual_ticks ();
-       sgl = rspamd_shingles_generate (input, key, NULL,
+       sgl = rspamd_shingles_from_text (input, key, NULL,
                        rspamd_shingles_default_filter, NULL, alg);
        ts2 = rspamd_get_virtual_ticks ();
        permute_vector (input, perm_factor);
-       sgl_permuted = rspamd_shingles_generate (input, key, NULL,
+       sgl_permuted = rspamd_shingles_from_text (input, key, NULL,
                        rspamd_shingles_default_filter, NULL, alg);
 
        res = rspamd_shingles_compare (sgl, sgl_permuted);
@@ -203,28 +203,28 @@ rspamd_shingles_test_func (void)
                g_array_append_val (input, tok);
        }
 
-       sgl = rspamd_shingles_generate (input, key, NULL,
+       sgl = rspamd_shingles_from_text (input, key, NULL,
                                rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_OLD);
        for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
                g_assert (sgl->hashes[i] == expected_old[i]);
        }
        g_free (sgl);
 
-       sgl = rspamd_shingles_generate (input, key, NULL,
+       sgl = rspamd_shingles_from_text (input, key, NULL,
                        rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_XXHASH);
        for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
                g_assert (sgl->hashes[i] == expected_xxhash[i]);
        }
        g_free (sgl);
 
-       sgl = rspamd_shingles_generate (input, key, NULL,
+       sgl = rspamd_shingles_from_text (input, key, NULL,
                        rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_MUMHASH);
        for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
                g_assert (sgl->hashes[i] == expected_mumhash[i]);
        }
        g_free (sgl);
 
-       sgl = rspamd_shingles_generate (input, key, NULL,
+       sgl = rspamd_shingles_from_text (input, key, NULL,
                        rspamd_shingles_default_filter, NULL, RSPAMD_SHINGLES_FAST);
        for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
                g_assert (sgl->hashes[i] == expected_fasthash[i]);