]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Allow to use dictionaries for compression
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Sep 2016 10:20:29 +0000 (11:20 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 9 Sep 2016 12:51:26 +0000 (13:51 +0100)
contrib/zstd/zstd.h
src/libserver/cfg_file.h
src/libserver/cfg_rcl.c
src/libserver/task.c
src/libutil/util.c
src/rspamd.h

index d768ded3550565678a64696f7b7ecff3b7bc2502..bd5d68b2700de1c12b1c4a6bcec6b9ade7b11e50 100644 (file)
@@ -232,6 +232,7 @@ ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input
 ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer */
 
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
@@ -269,6 +270,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output
 
 ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
 ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 
 
 
@@ -408,7 +410,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
 /*======   compression   ======*/
 
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                  ZSTD_parameters params, unsigned long long pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
@@ -419,7 +420,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
 
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 
index 7ce7f98a44b946937b2c3a22e0d3e42a51e012c8..9a0fa0d223821003c5aabeb40b0cc9efd2f62d0b 100644 (file)
@@ -417,7 +417,9 @@ struct rspamd_config {
        struct rspamd_config_post_load_script *on_load; /**< list of scripts executed on config load                    */
 
        gchar *ssl_ca_path;                                                             /**< path to CA certs                                                                   */
-       gchar *ssl_ciphers;                                                     /**< set of preferred ciphers                                                   */
+       gchar *ssl_ciphers;                                                             /**< set of preferred ciphers                                                   */
+       gchar *zstd_input_dictionary;                                   /**< path to zstd input dictionary                                              */
+       gchar *zstd_output_dictionary;                                  /**< path to zstd output dictionary                                             */
 
        ref_entry_t ref;                                                                /**< reference counter                                                                  */
 };
index 0cca28575660e4f0292f101c8dac83ddef1abe04..c89392207254087f3d2b9592af0f031d0e5c58f2 100644 (file)
@@ -1974,6 +1974,19 @@ rspamd_rcl_config_init (struct rspamd_config *cfg)
                        G_STRUCT_OFFSET (struct rspamd_config, max_message),
                        RSPAMD_CL_FLAG_INT_SIZE,
                        "Maximum size of the message to be scanned");
+       rspamd_rcl_add_default_handler (sub,
+                       "zstd_input_dictionary",
+                       rspamd_rcl_parse_struct_string,
+                       G_STRUCT_OFFSET (struct rspamd_config, zstd_input_dictionary),
+                       RSPAMD_CL_FLAG_STRING_PATH,
+                       "Dictionary for zstd inbound protocol compression");
+       rspamd_rcl_add_default_handler (sub,
+                       "zstd_output_dictionary",
+                       rspamd_rcl_parse_struct_string,
+                       G_STRUCT_OFFSET (struct rspamd_config, zstd_output_dictionary),
+                       RSPAMD_CL_FLAG_STRING_PATH,
+                       "Dictionary for outbound zstd compression");
+
        /* New DNS configuration */
        ssub = rspamd_rcl_add_section_doc (&sub->subsections, "dns", NULL, NULL,
                        UCL_OBJECT, FALSE, TRUE,
index f24ee08587a5c627e845160e11c56b987d5cd9fa..d4b5cb696a89afa89b9d7ab33fac4b4f8ca566cb 100644 (file)
@@ -492,6 +492,44 @@ rspamd_task_load_message (struct rspamd_task *task,
                        ZSTD_outBuffer zout;
                        guchar *out;
                        gsize outlen, r;
+                       gulong dict_id;
+
+                       tok = rspamd_task_get_request_header (task, "dictionary");
+
+                       if (tok != NULL) {
+                               /* We need to use custom dictionary */
+                               if (!rspamd_strtoul (tok->begin, tok->len, &dict_id)) {
+                                       g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+                                                       "Non numeric dictionary");
+
+                                       return FALSE;
+                               }
+
+                               if (!task->cfg->libs_ctx->in_dict) {
+                                       g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+                                                       "Unknown dictionary, undefined locally");
+
+                                       return FALSE;
+                               }
+
+                               if (task->cfg->libs_ctx->in_dict->id != dict_id) {
+                                       g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR,
+                                                       "Unknown dictionary, invalid dictionary id");
+
+                                       return FALSE;
+                               }
+
+                               zstream = ZSTD_createDStream ();
+                               g_assert (zstream != NULL);
+                               g_assert (!ZSTD_isError (ZSTD_initDStream_usingDict(zstream,
+                                               task->cfg->libs_ctx->in_dict->dict,
+                                               task->cfg->libs_ctx->in_dict->size)));
+                       }
+                       else {
+                               zstream = ZSTD_createDStream ();
+                               g_assert (zstream != NULL);
+                               g_assert (!ZSTD_isError (ZSTD_initDStream (zstream)));
+                       }
 
                        zin.pos = 0;
                        zin.src = start;
@@ -502,9 +540,6 @@ rspamd_task_load_message (struct rspamd_task *task,
                        }
 
                        out = g_malloc (outlen);
-                       zstream = ZSTD_createDStream ();
-                       g_assert (zstream != NULL);
-                       g_assert (!ZSTD_isError (ZSTD_initDStream (zstream)));
                        zout.dst = out;
                        zout.pos = 0;
                        zout.size = outlen;
index 889cc1c6c80d97a7deea60c74b94815ad629290a..fbc06c039e39c81772464667a8097c8faa42bb27 100644 (file)
@@ -23,6 +23,7 @@
 #include "ottery.h"
 #include "cryptobox.h"
 #include "libutil/map.h"
+#include "contrib/zstd/zdict.h"
 
 #ifdef HAVE_OPENSSL
 #include <openssl/rand.h>
@@ -2090,6 +2091,38 @@ rspamd_init_libs (void)
        return ctx;
 }
 
+static struct zstd_dictionary *
+rspamd_open_zstd_dictionary (const char *path)
+{
+       struct zstd_dictionary *dict;
+
+       dict = g_slice_alloc0 (sizeof (*dict));
+       dict->dict = rspamd_file_xmap (path, PROT_READ, &dict->size);
+
+       if (dict->dict == NULL) {
+               g_slice_free1 (sizeof (*dict), dict);
+               return NULL;
+       }
+
+       dict->id = ZDICT_getDictID (dict->dict, dict->size);
+
+       if (dict->id == 0) {
+               g_slice_free1 (sizeof (*dict), dict);
+               return NULL;
+       }
+
+       return dict;
+}
+
+static void
+rspamd_free_zstd_dictionary (struct zstd_dictionary *dict)
+{
+       if (dict) {
+               munmap (dict->dict, dict->size);
+               g_slice_free1 (sizeof (*dict), dict);
+       }
+}
+
 void
 rspamd_config_libs (struct rspamd_external_libs_ctx *ctx,
                struct rspamd_config *cfg)
@@ -2131,6 +2164,23 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx,
                if (ctx->libmagic) {
                        magic_load (ctx->libmagic, cfg->magic_file);
                }
+
+               if (cfg->zstd_input_dictionary) {
+                       ctx->in_dict = rspamd_open_zstd_dictionary (cfg->zstd_input_dictionary);
+
+                       if (ctx->in_dict == NULL) {
+                               msg_err_config ("cannot open zstd dictionary in %s",
+                                               cfg->zstd_input_dictionary);
+                       }
+               }
+               if (cfg->zstd_output_dictionary) {
+                       ctx->out_dict = rspamd_open_zstd_dictionary (cfg->zstd_output_dictionary);
+
+                       if (ctx->out_dict == NULL) {
+                               msg_err_config ("cannot open zstd dictionary in %s",
+                                               cfg->zstd_output_dictionary);
+                       }
+               }
        }
 }
 
@@ -2151,6 +2201,8 @@ rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx)
                SSL_CTX_free (ctx->ssl_ctx);
 #endif
                rspamd_inet_library_destroy ();
+               rspamd_free_zstd_dictionary (ctx->in_dict);
+               rspamd_free_zstd_dictionary (ctx->out_dict);
                g_slice_free1 (sizeof (*ctx), ctx);
        }
 }
index e99e4c18dcb0beeb098cf4f6d41c79dcb21b60b8..545681bf57921f47e9d18ae689e1546f63186e8f 100644 (file)
@@ -299,12 +299,20 @@ struct controller_session {
        struct event_base *ev_base;                                 /**< Event base                                                                             */
 };
 
+struct zstd_dictionary {
+       const void *dict;
+       gsize size;
+       guint id;
+};
+
 struct rspamd_external_libs_ctx {
        magic_t libmagic;
        radix_compressed_t **local_addrs;
        struct rspamd_cryptobox_library_ctx *crypto_ctx;
        struct ottery_config *ottery_cfg;
        SSL_CTX *ssl_ctx;
+       struct zstd_dictionary *in_dict;
+       struct zstd_dictionary *out_dict;
        ref_entry_t ref;
 };