fts_build_parse_content_disposition(ctx, hdr);
}
+static void
+fts_build_unstructured_header(struct fts_storage_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ const unsigned char *data = hdr->full_value;
+ unsigned char *buf = NULL;
+ unsigned int i;
+
+ /* @UNSAFE: if there are any NULs, replace them with spaces */
+ for (i = 0; i < hdr->full_value_len; i++) {
+ if (data[i] == '\0') {
+ if (buf == NULL) {
+ buf = i_malloc(hdr->full_value_len);
+ memcpy(buf, data, i);
+ }
+ buf[i] = ' ';
+ } else if (buf != NULL) {
+ buf[i] = data[i];
+ }
+ }
+ (void)fts_backend_update_build_more(ctx->update_ctx,
+ data, hdr->full_value_len);
+ i_free(buf);
+}
+
static void fts_build_mail_header(struct fts_storage_build_context *ctx,
const struct message_block *block)
{
if (!message_header_is_address(hdr->name)) {
/* regular unstructured header */
- (void)fts_backend_update_build_more(ctx->update_ctx,
- hdr->full_value,
- hdr->full_value_len);
+ // FIXME: get rid of potential NULs
+ fts_build_unstructured_header(ctx, hdr);
} else T_BEGIN {
/* message address. normalize it to give better
search results. */
*binary_body_r = TRUE;
key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY;
}
+ if (ctx->body_parser == NULL)
+ ctx->body_parser = fts_parser_text_init();
key.body_content_type = content_type;
key.body_content_disposition = ctx->content_disposition;
return fts_backend_update_set_build_key(ctx->update_ctx, &key);
return FALSE;
}
+struct fts_parser *fts_parser_text_init(void)
+{
+ return i_new(struct fts_parser, 1);
+}
+
+static bool data_has_nuls(const unsigned char *data, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (data[i] == '\0')
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static void replace_nul_bytes(buffer_t *buf)
+{
+ unsigned char *data;
+ size_t i, size;
+
+ data = buffer_get_modifiable_data(buf, &size);
+ for (i = 0; i < size; i++) {
+ if (data[i] == '\0')
+ data[i] = ' ';
+ }
+}
+
void fts_parser_more(struct fts_parser *parser, struct message_block *block)
{
- parser->v.more(parser, block);
+ if (parser->v.more != NULL)
+ parser->v.more(parser, block);
- if (!uni_utf8_data_is_valid(block->data, block->size)) {
+ if (!uni_utf8_data_is_valid(block->data, block->size) ||
+ data_has_nuls(block->data, block->size)) {
/* output isn't valid UTF-8. make it. */
if (parser->utf8_output == NULL) {
parser->utf8_output =
}
(void)uni_utf8_get_valid_data(block->data, block->size,
parser->utf8_output);
+ replace_nul_bytes(parser->utf8_output);
block->data = parser->utf8_output->data;
block->size = parser->utf8_output->used;
}
if (parser->utf8_output != NULL)
buffer_free(&parser->utf8_output);
- parser->v.deinit(parser);
+ if (parser->v.deinit != NULL)
+ parser->v.deinit(parser);
+ else
+ i_free(parser);
}
bool fts_parser_init(struct mail_user *user,
const char *content_type, const char *content_disposition,
struct fts_parser **parser_r);
+struct fts_parser *fts_parser_text_init(void);
+
/* The parser is initially called with message body blocks. Once message is
finished, it's still called with incoming size=0 while the parser increases
it to non-zero. */