]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: Don't pass NUL bytes to FTS backend. It can confuse them.
authorTimo Sirainen <tss@iki.fi>
Wed, 3 Aug 2011 15:59:07 +0000 (18:59 +0300)
committerTimo Sirainen <tss@iki.fi>
Wed, 3 Aug 2011 15:59:07 +0000 (18:59 +0300)
src/plugins/fts/fts-build.c
src/plugins/fts/fts-parser.c
src/plugins/fts/fts-parser.h

index dddc45a033ae3e17a140f71b4c89f3f66fc44a0b..03ebf7f062648ac6aa15b99397655b06b25987e7 100644 (file)
@@ -56,6 +56,31 @@ static void fts_parse_mail_header(struct fts_storage_build_context *ctx,
                fts_build_parse_content_disposition(ctx, hdr);
 }
 
+static void
+fts_build_unstructured_header(struct fts_storage_build_context *ctx,
+                             const struct message_header_line *hdr)
+{
+       const unsigned char *data = hdr->full_value;
+       unsigned char *buf = NULL;
+       unsigned int i;
+
+       /* @UNSAFE: if there are any NULs, replace them with spaces */
+       for (i = 0; i < hdr->full_value_len; i++) {
+               if (data[i] == '\0') {
+                       if (buf == NULL) {
+                               buf = i_malloc(hdr->full_value_len);
+                               memcpy(buf, data, i);
+                       }
+                       buf[i] = ' ';
+               } else if (buf != NULL) {
+                       buf[i] = data[i];
+               }
+       }
+       (void)fts_backend_update_build_more(ctx->update_ctx,
+                                           data, hdr->full_value_len);
+       i_free(buf);
+}
+
 static void fts_build_mail_header(struct fts_storage_build_context *ctx,
                                  const struct message_block *block)
 {
@@ -78,9 +103,8 @@ static void fts_build_mail_header(struct fts_storage_build_context *ctx,
 
        if (!message_header_is_address(hdr->name)) {
                /* regular unstructured header */
-               (void)fts_backend_update_build_more(ctx->update_ctx,
-                                                   hdr->full_value,
-                                                   hdr->full_value_len);
+               // FIXME: get rid of potential NULs
+               fts_build_unstructured_header(ctx, hdr);
        } else T_BEGIN {
                /* message address. normalize it to give better
                   search results. */
@@ -136,6 +160,8 @@ fts_build_body_begin(struct fts_storage_build_context *ctx, bool *binary_body_r)
                *binary_body_r = TRUE;
                key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY;
        }
+       if (ctx->body_parser == NULL)
+               ctx->body_parser = fts_parser_text_init();
        key.body_content_type = content_type;
        key.body_content_disposition = ctx->content_disposition;
        return fts_backend_update_set_build_key(ctx->update_ctx, &key);
index 5e8a59d4b6b4c3431a94967268c84c5038654be5..6c7362312cd685e079f5612e5d13aaddbaa2dc76 100644 (file)
@@ -26,11 +26,41 @@ bool fts_parser_init(struct mail_user *user,
        return FALSE;
 }
 
+struct fts_parser *fts_parser_text_init(void)
+{
+       return i_new(struct fts_parser, 1);
+}
+
+static bool data_has_nuls(const unsigned char *data, size_t size)
+{
+       size_t i;
+
+       for (i = 0; i < size; i++) {
+               if (data[i] == '\0')
+                       return TRUE;
+       }
+       return FALSE;
+}
+
+static void replace_nul_bytes(buffer_t *buf)
+{
+       unsigned char *data;
+       size_t i, size;
+
+       data = buffer_get_modifiable_data(buf, &size);
+       for (i = 0; i < size; i++) {
+               if (data[i] == '\0')
+                       data[i] = ' ';
+       }
+}
+
 void fts_parser_more(struct fts_parser *parser, struct message_block *block)
 {
-       parser->v.more(parser, block);
+       if (parser->v.more != NULL)
+               parser->v.more(parser, block);
 
-       if (!uni_utf8_data_is_valid(block->data, block->size)) {
+       if (!uni_utf8_data_is_valid(block->data, block->size) ||
+           data_has_nuls(block->data, block->size)) {
                /* output isn't valid UTF-8. make it. */
                if (parser->utf8_output == NULL) {
                        parser->utf8_output =
@@ -40,6 +70,7 @@ void fts_parser_more(struct fts_parser *parser, struct message_block *block)
                }
                (void)uni_utf8_get_valid_data(block->data, block->size,
                                              parser->utf8_output);
+               replace_nul_bytes(parser->utf8_output);
                block->data = parser->utf8_output->data;
                block->size = parser->utf8_output->used;
        }
@@ -53,5 +84,8 @@ void fts_parser_deinit(struct fts_parser **_parser)
 
        if (parser->utf8_output != NULL)
                buffer_free(&parser->utf8_output);
-       parser->v.deinit(parser);
+       if (parser->v.deinit != NULL)
+               parser->v.deinit(parser);
+       else
+               i_free(parser);
 }
index 75a7bfadd4f3de77a3e028f5ec2b956a22fbe631..6b8e709d070f31d77006137b88179d151cf49e06 100644 (file)
@@ -23,6 +23,8 @@ extern struct fts_parser_vfuncs fts_parser_script;
 bool fts_parser_init(struct mail_user *user,
                     const char *content_type, const char *content_disposition,
                     struct fts_parser **parser_r);
+struct fts_parser *fts_parser_text_init(void);
+
 /* The parser is initially called with message body blocks. Once message is
    finished, it's still called with incoming size=0 while the parser increases
    it to non-zero. */