]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: Add headers filters
authorMarco Bettini <marco.bettini@open-xchange.com>
Fri, 10 Dec 2021 10:32:37 +0000 (11:32 +0100)
committertimo.sirainen <timo.sirainen@open-xchange.com>
Wed, 15 Dec 2021 21:56:18 +0000 (21:56 +0000)
src/plugins/fts/fts-api-private.h
src/plugins/fts/fts-api.c
src/plugins/fts/fts-build-mail.c

index a2eac7a099a1b8e5b10c95188f9ac7c4d9434313..a07056432b38c43cb3f1e90bb9194b14db0b81b5 100644 (file)
@@ -69,12 +69,21 @@ enum fts_backend_flags {
        FTS_BACKEND_FLAG_TOKENIZED_INPUT        = 0x10
 };
 
+struct fts_header_filters {
+       pool_t pool;
+       ARRAY_TYPE(const_string) includes;
+       ARRAY_TYPE(const_string) excludes;
+       bool loaded:1;
+       bool exclude_is_default:1;
+};
+
 struct fts_backend {
        const char *name;
        enum fts_backend_flags flags;
 
        struct fts_backend_vfuncs v;
        struct mail_namespace *ns;
+       struct fts_header_filters header_filters;
 
        bool updating:1;
 };
index 00d16c8f2f2ea4a77a8e7786716d7517d9518140..a6ea71672d41ddbe53ae76c6b48d69ae7ed23abe 100644 (file)
@@ -58,6 +58,23 @@ fts_backend_class_lookup(const char *backend_name)
        return NULL;
 }
 
+static void
+fts_header_filters_init(struct fts_backend *backend)
+{
+       struct fts_header_filters *filters = &backend->header_filters;
+       pool_t pool = filters->pool = pool_alloconly_create(
+               MEMPOOL_GROWING"fts_header_filters", 256);
+
+       p_array_init(&filters->includes, pool, 8);
+       p_array_init(&filters->excludes, pool, 8);
+}
+
+static void
+fts_header_filters_deinit(struct fts_backend *backend)
+{
+       pool_unref(&backend->header_filters.pool);
+}
+
 int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
                     const char **error_r, struct fts_backend **backend_r)
 {
@@ -76,6 +93,8 @@ int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
                i_free(backend);
                return -1;
        }
+
+       fts_header_filters_init(backend);
        *backend_r = backend;
        return 0;
 }
@@ -84,6 +103,7 @@ void fts_backend_deinit(struct fts_backend **_backend)
 {
        struct fts_backend *backend = *_backend;
 
+       fts_header_filters_deinit(backend);
        *_backend = NULL;
        backend->v.deinit(backend);
 }
index 0e86231276e6e10833ae80e454f143fa41487865..523f2b50197cc62403f80e0e62925a65e6199274 100644 (file)
@@ -476,6 +476,84 @@ static int fts_body_parser_finish(struct fts_mail_build_context *ctx,
        return 0;
 }
 
+static void
+load_header_filter(const char *key, struct fts_backend *backend,
+                  ARRAY_TYPE(const_string) list, bool *matches_all_r)
+{
+       const char *str = mail_user_plugin_getenv(backend->ns->user, key);
+
+       *matches_all_r = FALSE;
+       if (str == NULL || *str == '\0')
+               return;
+
+       char **entries = p_strsplit_spaces(backend->header_filters.pool, str, " ");
+       for (char **entry = entries; *entry != NULL; ++entry) {
+               const char *value = str_lcase(*entry);
+               array_push_back(&list, &value);
+               if (*value == '*') {
+                       *matches_all_r = TRUE;
+                       break;
+               }
+       }
+       array_sort(&list, i_strcmp_p);
+}
+
+static struct fts_header_filters *
+load_header_filters(struct fts_backend *backend)
+{
+       struct fts_header_filters *filters = &backend->header_filters;
+       if (!filters->loaded) {
+               bool match_all;
+
+               /* match_all return ignored in includes */
+               load_header_filter("fts_header_includes", backend,
+                                  filters->includes, &match_all);
+
+               load_header_filter("fts_header_excludes", backend,
+                                  filters->excludes, &match_all);
+               filters->loaded = TRUE;
+               filters->exclude_is_default = match_all;
+       }
+       return filters;
+}
+
+/* This performs comparison between two strings, where the second one can end
+ * with the wildcard '*'. When the match reaches a '*' on the pitem side, zero
+ * (match) is returned regardles of the remaining characters.
+ *
+ * The function obeys the same lexicographic order as i_strcmp_p() and
+ * strcmp(), which is the reason for the casts to unsigned before comparing.
+ */
+static int ATTR_PURE
+header_prefix_cmp(const char *const *pkey, const char *const *pitem)
+{
+       const char *key = *pkey;
+       const char *item = *pitem;
+
+       while (*key == *item && *key != '\0') key++, item++;
+       return item[0] == '*' && item[1] == '\0' ? 0 :
+              (unsigned char)*key - (unsigned char)*item;
+}
+
+static bool
+is_header_indexable(struct message_block *block, struct fts_backend *backend)
+{
+       struct fts_header_filters *filters = load_header_filters(backend);
+       bool indexable;
+       T_BEGIN {
+               const char *hdr = t_str_lcase(block->hdr->name);
+
+               if (array_bsearch(&filters->includes, &hdr, header_prefix_cmp) != NULL)
+                       indexable = TRUE;
+               else if (filters->exclude_is_default ||
+                        array_bsearch(&filters->excludes, &hdr, header_prefix_cmp) != NULL)
+                       indexable = FALSE;
+               else
+                       indexable = TRUE;
+       } T_END;
+       return indexable;
+}
+
 static int
 fts_build_mail_real(struct fts_backend_update_context *update_ctx,
                    struct mail *mail,
@@ -571,10 +649,12 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx,
                        continue;
 
                if (block.hdr != NULL) {
-                       fts_parse_mail_header(&ctx, &raw_block);
-                       if (fts_build_mail_header(&ctx, &block) < 0) {
-                               ret = -1;
-                               break;
+                       if (is_header_indexable(&block, update_ctx->backend)) {
+                               fts_parse_mail_header(&ctx, &raw_block);
+                               if (fts_build_mail_header(&ctx, &block) < 0) {
+                                       ret = -1;
+                                       break;
+                               }
                        }
                } else if (block.size == 0) {
                        /* end of headers */