xml_encode(str, backend->id_box_name);
}
-static int
-fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
- uint32_t uid, const unsigned char *data,
- size_t size, bool headers)
+static void
+fts_backend_solr_uid_changed(struct solr_fts_backend_build_context *ctx,
+ uint32_t uid)
+{
+ if (ctx->post == NULL) {
+ ctx->post = solr_connection_post_begin(solr_conn);
+ str_append(ctx->cmd, "<add>");
+ } else {
+ str_append(ctx->cmd, "</field></doc>");
+ }
+ ctx->prev_uid = uid;
+ ctx->headers = FALSE;
+
+ fts_backend_solr_add_doc_prefix(ctx, uid);
+ str_printfa(ctx->cmd, "<field name=\"id\">");
+ xml_encode_id(ctx->cmd, ctx->ctx.backend, uid, ctx->uid_validity);
+ str_append(ctx->cmd, "</field>");
+}
+
+static void
+fts_backend_solr_build_hdr(struct fts_backend_build_context *_ctx,
+ uint32_t uid)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
- string_t *cmd = ctx->cmd;
-
- /* body comes first, then headers */
- if (ctx->prev_uid != uid) {
- /* uid changed */
- if (ctx->post == NULL) {
- ctx->post = solr_connection_post_begin(solr_conn);
- str_append(cmd, "<add>");
- } else {
- str_append(cmd, "</field></doc>");
- }
- ctx->prev_uid = uid;
- fts_backend_solr_add_doc_prefix(ctx, uid);
- str_printfa(cmd, "<field name=\"id\">");
- xml_encode_id(cmd, _ctx->backend, uid, ctx->uid_validity);
- str_append(cmd, "</field>");
+ if (uid != ctx->prev_uid)
+ fts_backend_solr_uid_changed(ctx, uid);
+ else {
+ i_assert(!ctx->headers);
+ str_append(ctx->cmd, "</field>");
+ }
- ctx->headers = headers;
- if (headers) {
- str_append(cmd, "<field name=\"hdr\">");
- } else {
- str_append(cmd, "<field name=\"body\">");
- }
- } else if (headers && !ctx->headers) {
- str_append(cmd, "</field><field name=\"hdr\">");
- } else {
- i_assert(!(!headers && ctx->headers));
+ ctx->headers = TRUE;
+ str_append(ctx->cmd, "<field name=\"hdr\">");
+}
+
+static bool
+fts_backend_solr_build_body_begin(struct fts_backend_build_context *_ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition ATTR_UNUSED)
+{
+ struct solr_fts_backend_build_context *ctx =
+ (struct solr_fts_backend_build_context *)_ctx;
+
+ if (!fts_backend_default_can_index(content_type))
+ return FALSE;
+
+ if (uid != ctx->prev_uid)
+ fts_backend_solr_uid_changed(ctx, uid);
+ else {
+ /* body comes first, then headers */
+ i_assert(!ctx->headers);
}
- xml_encode_data(cmd, data, size);
- if (str_len(cmd) > SOLR_CMDBUF_SIZE-128) {
- solr_connection_post_more(ctx->post, str_data(cmd),
- str_len(cmd));
- str_truncate(cmd, 0);
+ ctx->headers = FALSE;
+ str_append(ctx->cmd, "<field name=\"body\">");
+ return TRUE;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+ const unsigned char *data, size_t size)
+{
+ struct solr_fts_backend_build_context *ctx =
+ (struct solr_fts_backend_build_context *)_ctx;
+
+ xml_encode_data(ctx->cmd, data, size);
+ if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
+ solr_connection_post_more(ctx->post, str_data(ctx->cmd),
+ str_len(ctx->cmd));
+ str_truncate(ctx->cmd, 0);
}
return 0;
}
fts_backend_solr_get_last_uid,
fts_backend_solr_get_all_last_uids,
fts_backend_solr_build_init,
+ fts_backend_solr_build_hdr,
+ fts_backend_solr_build_body_begin,
+ NULL,
fts_backend_solr_build_more,
fts_backend_solr_build_deinit,
fts_backend_solr_expunge,
struct squat_fts_backend_build_context {
struct fts_backend_build_context ctx;
struct squat_trie_build_context *build_ctx;
+ enum squat_index_type squat_type;
+ uint32_t uid;
};
static void
return 0;
}
+static void
+fts_backend_squat_build_hdr(struct fts_backend_build_context *_ctx,
+ uint32_t uid)
+{
+ struct squat_fts_backend_build_context *ctx =
+ (struct squat_fts_backend_build_context *)_ctx;
+
+ ctx->squat_type = SQUAT_INDEX_TYPE_HEADER;
+ ctx->uid = uid;
+}
+
+static bool
+fts_backend_squat_build_body_begin(struct fts_backend_build_context *_ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition ATTR_UNUSED)
+{
+ struct squat_fts_backend_build_context *ctx =
+ (struct squat_fts_backend_build_context *)_ctx;
+
+ if (!fts_backend_default_can_index(content_type))
+ return FALSE;
+
+ ctx->squat_type = SQUAT_INDEX_TYPE_BODY;
+ ctx->uid = uid;
+ return TRUE;
+}
+
static int
fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
- uint32_t uid, const unsigned char *data,
- size_t size, bool headers)
+ const unsigned char *data, size_t size)
{
struct squat_fts_backend_build_context *ctx =
(struct squat_fts_backend_build_context *)_ctx;
- enum squat_index_type squat_type;
- squat_type = headers ? SQUAT_INDEX_TYPE_HEADER :
- SQUAT_INDEX_TYPE_BODY;
- return squat_trie_build_more(ctx->build_ctx, uid, squat_type,
+ return squat_trie_build_more(ctx->build_ctx, ctx->uid, ctx->squat_type,
data, size);
}
fts_backend_squat_get_last_uid,
NULL,
fts_backend_squat_build_init,
+ fts_backend_squat_build_hdr,
+ fts_backend_squat_build_body_begin,
+ NULL,
fts_backend_squat_build_more,
fts_backend_squat_build_deinit,
fts_backend_squat_expunge,
int (*build_init)(struct fts_backend *backend, uint32_t *last_uid_r,
struct fts_backend_build_context **ctx_r);
- int (*build_more)(struct fts_backend_build_context *ctx, uint32_t uid,
- const unsigned char *data, size_t size, bool headers);
+ void (*build_hdr)(struct fts_backend_build_context *ctx, uint32_t uid);
+ bool (*build_body_begin)(struct fts_backend_build_context *ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition);
+ void (*build_body_end)(struct fts_backend_build_context *ctx);
+ int (*build_more)(struct fts_backend_build_context *ctx,
+ const unsigned char *data, size_t size);
int (*build_deinit)(struct fts_backend_build_context *ctx);
void (*expunge)(struct fts_backend *backend, struct mail *mail);
void fts_backend_register(const struct fts_backend *backend);
void fts_backend_unregister(const char *name);
+bool fts_backend_default_can_index(const char *content_type);
+
void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
const ARRAY_TYPE(seq_range) *definite_filter,
ARRAY_TYPE(seq_range) *maybe_dest,
return ret;
}
-int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
- const unsigned char *data, size_t size, bool headers)
+void fts_backend_build_hdr(struct fts_backend_build_context *ctx, uint32_t uid)
{
- return ctx->backend->v.build_more(ctx, uid, data, size, headers);
+ ctx->backend->v.build_hdr(ctx, uid);
+}
+
+bool fts_backend_build_body_begin(struct fts_backend_build_context *ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition)
+{
+ return ctx->backend->v.build_body_begin(ctx, uid, content_type,
+ content_disposition);
+}
+
+void fts_backend_build_body_end(struct fts_backend_build_context *ctx)
+{
+ if (ctx->backend->v.build_body_end != NULL)
+ ctx->backend->v.build_body_end(ctx);
+}
+
+int fts_backend_build_more(struct fts_backend_build_context *ctx,
+ const unsigned char *data, size_t size)
+{
+ return ctx->backend->v.build_more(ctx, data, size);
}
int fts_backend_build_deinit(struct fts_backend_build_context **_ctx)
pool_unref(&ctx->pool);
return ret;
}
+
+bool fts_backend_default_can_index(const char *content_type)
+{
+ return strncasecmp(content_type, "text/", 5) == 0 ||
+ strcasecmp(content_type, "message/rfc822") == 0;
+}
#include "seq-range-array.h"
enum fts_lookup_flags {
+ /* Search within header and/or body.
+ At least one of these must be set. */
FTS_LOOKUP_FLAG_HEADER = 0x01,
FTS_LOOKUP_FLAG_BODY = 0x02,
+
+ /* The key must NOT be found */
FTS_LOOKUP_FLAG_INVERT = 0x04
};
/* Get the last_uid for the mailbox. */
int fts_backend_get_last_uid(struct fts_backend *backend, uint32_t *last_uid_r);
/* Get last_uids for all mailboxes that might be backend mailboxes for a
- virtual mailbox. Depending on virtual mailbox configuration, this function
- may also return mailboxes that don't really even match the virtual mailbox
- patterns. The caller should filter out the list itself. */
+ virtual mailbox. The backend can use mailbox_get_virtual_backend_boxes() or
+ mailbox_get_virtual_box_patterns() functions to get the list of mailboxes.
+
+ Depending on virtual mailbox configuration, this function may also return
+ mailboxes that don't even match the virtual mailbox patterns. The caller
+ needs to be able to ignore the unnecessary ones. */
int fts_backend_get_all_last_uids(struct fts_backend *backend, pool_t pool,
ARRAY_TYPE(fts_backend_uid_map) *last_uids);
-/* Initialize adding new data to the index. last_uid_r is set to the last UID
- that exists in the index. */
+/* Initialize adding new data to the index. last_uid_r is set to the last
+ indexed message's IMAP UID */
int fts_backend_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
struct fts_backend_build_context **ctx_r);
-/* Add more contents to the index. The data must contain only full valid
- UTF-8 characters, but it doesn't need to be NUL-terminated. size contains
- the data size in bytes, not characters. headers is TRUE if the data contains
- message headers instead of message body. */
-int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
- const unsigned char *data, size_t size,
- bool headers);
+/* Switch to building index for mail's headers or MIME part headers. */
+void fts_backend_build_hdr(struct fts_backend_build_context *ctx, uint32_t uid);
+/* Switch to building index for the next body part. If backend doesn't want
+ to index this body part (based on content type/disposition check), it can
+ return FALSE and caller will skip to next part. The backend must return
+ TRUE for all text/xxx and message/rfc822 content types.
+
+ The content_type contains a valid parsed "type/subtype" string. For messages
+ without (valid) Content-Type header, the content_type is set to "text/plain".
+ The content_disposition is passed without parsing/validation if it exists,
+ otherwise it's NULL. */
+bool fts_backend_build_body_begin(struct fts_backend_build_context *ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition);
+/* Called once when the whole body part has been sent. */
+void fts_backend_build_body_end(struct fts_backend_build_context *ctx);
+/* Add more content to the index for the currently selected header/body part.
+ The data must contain only full valid UTF-8 characters, but it doesn't need
+ to be NUL-terminated. size contains the data size in bytes, not characters.
+ This function may be called many times and the data block sizes may be
+ small. Backend returns 0 if ok, -1 if build should be aborted. */
+int fts_backend_build_more(struct fts_backend_build_context *ctx,
+ const unsigned char *data, size_t size);
/* Finish adding new data to the index. */
int fts_backend_build_deinit(struct fts_backend_build_context **ctx);
bool fts_backend_is_building(struct fts_backend *backend);
/* Expunge given mail from the backend. Note that the transaction may still
- fail later. */
+ fail later, so backend shouldn't do anything irreversible. */
void fts_backend_expunge(struct fts_backend *backend, struct mail *mail);
/* Called after transaction has been committed or rollbacked. */
void fts_backend_expunge_finish(struct fts_backend *backend,
struct mailbox *box, bool committed);
/* Lock/unlock the backend for multiple lookups. Returns 1 if locked, 0 if
- locking timeouted, -1 if error.
+ locking timeouted, -1 if error. If backend doesn't require locking, it
+ always returns 1.
It's not required to call these functions manually, but if you're doing
multiple lookup/filter operations this avoids multiple lock/unlock calls. */
/* Start building a FTS lookup. */
struct fts_backend_lookup_context *
fts_backend_lookup_init(struct fts_backend *backend);
-/* Add a new search key to the lookup. */
+/* Add a new search key to the lookup. The keys are ANDed together. */
void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx,
const char *key, enum fts_lookup_flags flags);
-/* Finish the lookup and return found UIDs. */
+/* Finish the lookup and return found UIDs. The definite_uids are returned
+ to client directly, while for maybe_uids Dovecot first verifies (by
+ opening and reading the mail) that they really do contain the searched
+ keys. The maybe_uids is useful with backends that can only filter out
+ messages, but can't definitively say if the search matched a message. */
int fts_backend_lookup_deinit(struct fts_backend_lookup_context **ctx,
ARRAY_TYPE(seq_range) *definite_uids,
ARRAY_TYPE(seq_range) *maybe_uids,
#include "str.h"
#include "istream.h"
#include "time-util.h"
+#include "rfc822-parser.h"
#include "message-parser.h"
#include "message-decoder.h"
#include "mail-namespace.h"
uint32_t uid;
string_t *headers;
+ char *content_type, *content_disposition;
};
struct fts_transaction_context {
if (str_len(ctx->headers) == 0)
return 0;
- if (fts_backend_build_more(ctx->build, ctx->uid, str_data(ctx->headers),
- str_len(ctx->headers), TRUE) < 0)
+ fts_backend_build_hdr(ctx->build, ctx->uid);
+ if (fts_backend_build_more(ctx->build, str_data(ctx->headers),
+ str_len(ctx->headers)) < 0)
return -1;
str_truncate(ctx->headers, 0);
return 0;
}
-static bool fts_build_want_index_part(const struct message_block *block)
+static void fts_build_parse_content_type(struct fts_storage_build_context *ctx,
+ const struct message_header_line *hdr)
{
- /* we'll index only text/xxx and message/rfc822 parts for now */
- return (block->part->flags &
- (MESSAGE_PART_FLAG_TEXT |
- MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0;
+ struct rfc822_parser_context parser;
+ string_t *content_type;
+
+ rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+ (void)rfc822_skip_lwsp(&parser);
+
+ T_BEGIN {
+ content_type = t_str_new(64);
+ if (rfc822_parse_content_type(&parser, content_type) >= 0) {
+ i_free(ctx->content_type);
+ ctx->content_type = i_strdup(str_c(content_type));
+ }
+ } T_END;
+}
+
+static void
+fts_build_parse_content_disposition(struct fts_storage_build_context *ctx,
+ const struct message_header_line *hdr)
+{
+ /* just pass it as-is to backend. */
+ i_free(ctx->content_disposition);
+ ctx->content_disposition =
+ i_strndup(hdr->full_value, hdr->full_value_len);
+}
+
+static void fts_parse_mail_header(struct fts_storage_build_context *ctx,
+ const struct message_block *raw_block)
+{
+ const struct message_header_line *hdr = raw_block->hdr;
+
+ if (strcasecmp(hdr->name, "Content-Type") == 0)
+ fts_build_parse_content_type(ctx, hdr);
+ else if (strcasecmp(hdr->name, "Content-Disposition") == 0)
+ fts_build_parse_content_disposition(ctx, hdr);
}
static void fts_build_mail_header(struct fts_storage_build_context *ctx,
struct message_decoder_context *decoder;
struct message_block raw_block, block;
struct message_part *prev_part, *parts;
+ bool skip_body = FALSE, body_part = FALSE;
int ret;
ctx->uid = uid;
parser = message_parser_init(pool_datastack_create(), input,
MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
0);
- decoder = message_decoder_init(MESSAGE_DECODER_FLAG_DTCASE);
+ decoder = message_decoder_init(MESSAGE_DECODER_FLAG_DTCASE |
+ MESSAGE_DECODER_FLAG_RETURN_BINARY);
for (;;) {
ret = message_parser_parse_next_block(parser, &raw_block);
i_assert(ret != 0);
ret = 0;
break;
}
- if (raw_block.hdr == NULL && raw_block.size != 0 &&
- !fts_build_want_index_part(&raw_block)) {
- /* skipping this body */
- continue;
+
+ if (raw_block.part != prev_part) {
+ /* body part changed. we're now parsing the end of
+ boundary, possibly followed by message epilogue */
+ if (!skip_body && prev_part != NULL) {
+ i_assert(body_part);
+ fts_backend_build_body_end(ctx->build);
+ }
+ prev_part = raw_block.part;
+ i_free_and_null(ctx->content_type);
+ i_free_and_null(ctx->content_disposition);
+
+ if (raw_block.size != 0) {
+ /* multipart. skip until beginning of next
+ part's headers */
+ skip_body = TRUE;
+ }
+ }
+
+ if (raw_block.hdr != NULL) {
+ /* always handle headers */
+ } else if (raw_block.size == 0) {
+ /* end of headers */
+ const char *content_type = ctx->content_type == NULL ?
+ "text/plain" : ctx->content_type;
+
+ skip_body = !fts_backend_build_body_begin(ctx->build,
+ ctx->uid, content_type,
+ ctx->content_disposition);
+ body_part = TRUE;
+ } else {
+ if (skip_body)
+ continue;
}
if (!message_decoder_decode_next_block(decoder, &raw_block,
&block))
continue;
- if (block.hdr != NULL)
+ if (block.hdr != NULL) {
+ fts_parse_mail_header(ctx, &raw_block);
fts_build_mail_header(ctx, &block);
- else if (block.size == 0) {
+ } else if (block.size == 0) {
/* end of headers */
str_append_c(ctx->headers, '\n');
} else {
- if (fts_backend_build_more(ctx->build, ctx->uid,
- block.data, block.size,
- FALSE) < 0) {
+ i_assert(body_part);
+ if (fts_backend_build_more(ctx->build,
+ block.data, block.size) < 0) {
ret = -1;
break;
}
}
}
+ if (!skip_body && body_part)
+ fts_backend_build_body_end(ctx->build);
if (message_parser_deinit(&parser, &parts) < 0)
mail_set_cache_corrupted(ctx->mail, MAIL_FETCH_MESSAGE_PARTS);
message_decoder_deinit(&decoder);
str_free(&ctx->headers);
mail_search_args_unref(&ctx->search_args);
+ i_free(ctx->content_type);
+ i_free(ctx->content_disposition);
i_free(ctx);
return ret;
}