Message sort index handling rewrite to fix several race conditions when

author Timo Sirainen <tss@iki.fi>

Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)

committer Timo Sirainen <tss@iki.fi>

Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)
author Timo Sirainen <tss@iki.fi>
Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)
committer Timo Sirainen <tss@iki.fi>
Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)
diff --git a/src/lib-storage/index/Makefile.am b/src/lib-storage/index/Makefile.am

index 150769df0f2ea0f395ee398820557c6e4513c525..086d324031fe6f4a58ecbbfa5e35af7123f242a1 100644 (file)
--- a/src/lib-storage/index/Makefile.am
+++ b/src/lib-storage/index/Makefile.am
@@ -16,6 +16,7 @@ libstorage_index_a_SOURCES = \
         index-mailbox-check.c \
         index-search.c \
         index-sort.c \
+       index-sort-string.c \
         index-status.c \
         index-storage.c \
         index-sync.c \
@@ -25,6 +26,7 @@ libstorage_index_a_SOURCES = \
  headers = \
         index-mail.h \
         index-sort.h \
+       index-sort-private.h \
         index-storage.h \
         index-sync-changes.h
  
diff --git a/src/lib-storage/index/index-sort-private.h b/src/lib-storage/index/index-sort-private.h

new file mode 100644 (file)

index 0000000..83e2cfb
--- /dev/null
+++ b/src/lib-storage/index/index-sort-private.h
@@ -0,0 +1,33 @@
+#ifndef INDEX_SORT_PRIVATE_H
+#define INDEX_SORT_PRIVATE_H
+
+#include "index-sort.h"
+
+struct mail_search_sort_program {
+       struct mailbox_transaction_context *t;
+       enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE];
+       struct mail *temp_mail;
+
+       void (*sort_list_add)(struct mail_search_sort_program *program,
+                             struct mail *mail);
+       void (*sort_list_finish)(struct mail_search_sort_program *program);
+       void *context;
+
+       ARRAY_TYPE(uint32_t) seqs;
+       unsigned int iter_idx;
+
+       unsigned int reverse:1;
+};
+
+int index_sort_header_get(struct mail *mail, uint32_t seq,
+                         enum mail_sort_type sort_type, string_t *dest);
+int index_sort_node_cmp_type(struct mail *mail,
+                            const enum mail_sort_type *sort_program,
+                            uint32_t seq1, uint32_t seq2);
+
+void index_sort_list_init_string(struct mail_search_sort_program *program);
+void index_sort_list_add_string(struct mail_search_sort_program *program,
+                               struct mail *mail);
+void index_sort_list_finish_string(struct mail_search_sort_program *program);
+
+#endif
diff --git a/src/lib-storage/index/index-sort-string.c b/src/lib-storage/index/index-sort-string.c

new file mode 100644 (file)

index 0000000..c8806ca
--- /dev/null
+++ b/src/lib-storage/index/index-sort-string.c
@@ -0,0 +1,705 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+/* The idea is that we use 32bit integers for string sort IDs which specifiy
+   the sort order for primary sort condition. The whole 32bit integer space is
+   used and whenever adding a string, the available space is halved and the new
+   ID is added in the middle. For example if we add one mail the first time, it
+   gets ID 2^31. If we then add two mails which are sorted before the first
+   one, they get IDs 2^31/3 and 2^31/3*2. Once we run out of the available
+   space between IDs, more space is made by renumbering some IDs.
+*/
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "index-storage.h"
+#include "index-sort-private.h"
+
+#include <stdlib.h>
+
+struct mail_sort_node {
+       uint32_t seq:29;
+       uint32_t wanted:1;
+       uint32_t no_update:1;
+       uint32_t sort_id_changed:1;
+       uint32_t sort_id;
+};
+ARRAY_DEFINE_TYPE(mail_sort_node, struct mail_sort_node);
+
+struct sort_string_context {
+       struct mail_search_sort_program *program;
+
+       ARRAY_TYPE(mail_sort_node) zero_nodes, nonzero_nodes, sorted_nodes;
+       const char **sort_strings;
+       pool_t sort_string_pool;
+       unsigned int first_missing_sort_id_idx;
+
+       uint32_t ext_id, last_seq, highest_reset_id;
+
+       unsigned int regetting:1;
+};
+
+static char expunged_msg;
+static struct sort_string_context *static_zero_cmp_context;
+static struct mail_search_sort_program *static_sort_node_cmp_context;
+
+static void index_sort_node_add(struct sort_string_context *ctx,
+                               struct mail_sort_node *node);
+
+void index_sort_list_init_string(struct mail_search_sort_program *program)
+{
+       struct index_mailbox *ibox = (struct index_mailbox *)program->t->box;
+       struct sort_string_context *ctx;
+       const char *name;
+
+       switch (program->sort_program[0] & MAIL_SORT_MASK) {
+       case MAIL_SORT_CC:
+               name = "sort-c";
+               break;
+       case MAIL_SORT_FROM:
+               name = "sort-f";
+               break;
+       case MAIL_SORT_SUBJECT:
+               name = "sort-s";
+               break;
+       case MAIL_SORT_TO:
+               name = "sort-t";
+               break;
+       default:
+               i_unreached();
+       }
+
+       program->context = ctx = i_new(struct sort_string_context, 1);
+       ctx->program = program;
+       ctx->ext_id = mail_index_ext_register(ibox->index, name, 0,
+                                             sizeof(uint32_t),
+                                             sizeof(uint32_t));
+       i_array_init(&ctx->zero_nodes, 128);
+       i_array_init(&ctx->nonzero_nodes, 128);
+}
+
+static void index_sort_generate_seqs(struct sort_string_context *ctx)
+{
+       struct mail_sort_node *nodes, *nodes2;
+       unsigned int i, j, count, count2;
+       uint32_t seq;
+
+       nodes = array_get_modifiable(&ctx->nonzero_nodes, &count);
+       nodes2 = array_get_modifiable(&ctx->zero_nodes, &count2);
+
+       if (!array_is_created(&ctx->program->seqs))
+               i_array_init(&ctx->program->seqs, count + count2);
+       else
+               array_clear(&ctx->program->seqs);
+
+       for (i = j = 0;;) {
+               if (i < count && j < count2) {
+                       if (nodes[i].seq < nodes2[j].seq)
+                               seq = nodes[i++].seq;
+                       else
+                               seq = nodes2[j++].seq;
+               } else if (i < count) {
+                       seq = nodes[i++].seq;
+               } else if (j < count2) {
+                       seq = nodes2[j++].seq;
+               } else {
+                       break;
+               }
+               array_append(&ctx->program->seqs, &seq, 1);
+       }
+}
+
+static void index_sort_reget_sort_ids(struct sort_string_context *ctx)
+{
+       struct mail_sort_node node;
+       const uint32_t *seqs;
+       unsigned int i, count;
+
+       i_assert(!ctx->regetting);
+       ctx->regetting = TRUE;
+
+       index_sort_generate_seqs(ctx);
+       array_clear(&ctx->zero_nodes);
+       array_clear(&ctx->nonzero_nodes);
+
+       memset(&node, 0, sizeof(node));
+       node.wanted = TRUE;
+       seqs = array_get(&ctx->program->seqs, &count);
+       for (i = 0; i < count; i++) {
+               node.seq = seqs[i];
+               index_sort_node_add(ctx, &node);
+       }
+       ctx->regetting = FALSE;
+}
+
+static void index_sort_node_add(struct sort_string_context *ctx,
+                               struct mail_sort_node *node)
+{
+       struct index_transaction_context *t =
+               (struct index_transaction_context *)ctx->program->t;
+       struct mail_index_map *map;
+       const void *data;
+       uint32_t reset_id;
+       bool expunged;
+
+       mail_index_lookup_ext_full(t->trans_view, node->seq,
+                                  ctx->ext_id, &map, &data, &expunged);
+       if (expunged) {
+               /* we don't want to update expunged messages' sort IDs */
+               node->no_update = TRUE;
+               /* we can't trust expunged messages' sort IDs. they might be
+                  valid, but it's also possible that sort IDs were updated
+                  and the expunged messages' sort IDs became invalid. we could
+                  use sort ID if we could know the extension's reset_id at the
+                  time of the expunge so we could compare it to
+                  highest_reset_id, but this isn't currently possible. */
+               node->sort_id = 0;
+       } else {
+               node->sort_id = data == NULL ? 0 : *(const uint32_t *)data;
+       }
+
+       if (node->sort_id != 0) {
+               /* if reset ID increases, lookup all existing messages' sort
+                  IDs again. if it decreases, ignore the sort ID. */
+               if (!mail_index_ext_get_reset_id(t->trans_view, map,
+                                                ctx->ext_id, &reset_id))
+                       reset_id = 0;
+               if (reset_id != ctx->highest_reset_id) {
+                       if (reset_id > ctx->highest_reset_id) {
+                               ctx->highest_reset_id = reset_id;
+                               index_sort_reget_sort_ids(ctx);
+                       } else {
+                               i_assert(expunged);
+                               node->sort_id = 0;
+                       }
+               }
+       }
+
+       if (node->sort_id == 0)
+               array_append(&ctx->zero_nodes, node, 1);
+       else
+               array_append(&ctx->nonzero_nodes, node, 1);
+       if (ctx->last_seq < node->seq)
+               ctx->last_seq = node->seq;
+}
+
+void index_sort_list_add_string(struct mail_search_sort_program *program,
+                               struct mail *mail)
+{
+       struct mail_sort_node node;
+
+       memset(&node, 0, sizeof(node));
+       node.seq = mail->seq;
+       node.wanted = TRUE;
+
+       index_sort_node_add(program->context, &node);
+}
+
+static int sort_node_zero_string_cmp(const void *p1, const void *p2)
+{
+       const struct mail_sort_node *n1 = p1, *n2 = p2;
+
+       return strcmp(static_zero_cmp_context->sort_strings[n1->seq],
+                     static_zero_cmp_context->sort_strings[n2->seq]);
+}
+
+static void index_sort_zeroes(struct sort_string_context *ctx)
+{
+       struct mail *mail = ctx->program->temp_mail;
+       enum mail_sort_type sort_type = ctx->program->sort_program[0];
+       string_t *str;
+       pool_t pool;
+       struct mail_sort_node *nodes;
+       unsigned int i, count;
+
+       /* first get all the messages' sort strings. although this takes more
+          memory, it makes error handling easier and probably also helps
+          CPU caching. */
+       ctx->sort_strings = i_new(const char *, ctx->last_seq + 1);
+       ctx->sort_string_pool = pool =
+               pool_alloconly_create("sort strings", 1024*64);
+       str = t_str_new(512);
+       nodes = array_get_modifiable(&ctx->zero_nodes, &count);
+       for (i = 0; i < count; i++) {
+               i_assert(nodes[i].seq <= ctx->last_seq);
+
+               index_sort_header_get(mail, nodes[i].seq, sort_type, str);
+               ctx->sort_strings[nodes[i].seq] = str_len(str) == 0 ? "" :
+                       p_strdup(pool, str_c(str));
+       }
+
+       /* we have all strings, sort nodes based on them */
+       static_zero_cmp_context = ctx;
+       qsort(nodes, count, sizeof(struct mail_sort_node),
+             sort_node_zero_string_cmp);
+}
+
+static const char *
+index_sort_get_expunged_string(struct sort_string_context *ctx, uint32_t idx,
+                              string_t *str)
+{
+       struct mail *mail = ctx->program->temp_mail;
+       enum mail_sort_type sort_type = ctx->program->sort_program[0];
+       const struct mail_sort_node *nodes;
+       const char *result = NULL;
+       unsigned int i, count;
+       uint32_t sort_id;
+
+       /* Look forwards and backwards to see if there are
+          identical sort_ids. If we do find them, try to get
+          their sort string and use it to update the rest. */
+       nodes = array_get(&ctx->nonzero_nodes, &count);
+       sort_id = nodes[idx].sort_id;
+       /* If previous sort ID is identical and its sort string is set, we can
+          trust it. If it's expunged, we already verified that there are no
+          non-expunged messages. */
+       if (idx > 0 && nodes[idx-1].sort_id == sort_id &&
+           ctx->sort_strings[nodes[idx].seq] != NULL)
+               return ctx->sort_strings[nodes[idx].seq];
+
+       /* Go forwards as long as there are identical sort IDs. If we find one
+          that's not expunged, update string table for all messages with
+          identical sort IDs. */
+       for (i = idx + 1; i < count; i++) {
+               if (nodes[i].sort_id != sort_id)
+                       break;
+
+               if (ctx->sort_strings[nodes[i].seq] != NULL) {
+                       /* usually we fill all identical sort_ids and this
+                          shouldn't happen, but we can get here if we skipped
+                          over messages when binary searching */
+                       result = ctx->sort_strings[nodes[i].seq];
+                       break;
+               }
+               if (index_sort_header_get(mail, nodes[i].seq,
+                                         sort_type, str) >= 0) {
+                       result = str_len(str) == 0 ? "" :
+                               p_strdup(ctx->sort_string_pool, str_c(str));
+                       break;
+               }
+       }
+       if (result == NULL) {
+               /* unknown */
+               return &expunged_msg;
+       }
+
+       /* fill all identical sort_ids with the same value */
+       for (i = idx; i > 0 && nodes[i-1].sort_id == sort_id; i--) ;
+       for (i = idx; i < count && nodes[i].sort_id == sort_id; i++)
+               ctx->sort_strings[nodes[i].seq] = result;
+       return result;
+}
+
+static const char *
+index_sort_get_string(struct sort_string_context *ctx,
+                     uint32_t idx, uint32_t seq)
+{
+       struct mail *mail = ctx->program->temp_mail;
+       int ret;
+
+       if (ctx->sort_strings[seq] == NULL) T_BEGIN {
+               string_t *str;
+
+               str = t_str_new(256);
+               ret = index_sort_header_get(mail, seq,
+                                           ctx->program->sort_program[0], str);
+               if (str_len(str) > 0) {
+                       ctx->sort_strings[seq] =
+                               p_strdup(ctx->sort_string_pool, str_c(str));
+               } else if (ret >= 0) {
+                       ctx->sort_strings[seq] = "";
+               } else {
+                       ctx->sort_strings[seq] = 
+                               index_sort_get_expunged_string(ctx, idx, str);
+               }
+       } T_END;
+
+       return ctx->sort_strings[seq];
+}
+
+static void
+index_sort_bsearch(struct sort_string_context *ctx, const char *key,
+                  unsigned int start_idx, unsigned int *idx_r,
+                  const char **prev_str_r)
+{
+       const struct mail_sort_node *nodes;
+       const char *str, *str2;
+       unsigned int idx, left_idx, right_idx, prev;
+       int ret;
+
+       nodes = array_get_modifiable(&ctx->nonzero_nodes, &right_idx);
+       idx = left_idx = start_idx;
+       while (left_idx < right_idx) {
+               idx = (left_idx + right_idx) / 2;
+               str = index_sort_get_string(ctx, idx, nodes[idx].seq);
+               if (str != &expunged_msg)
+                       ret = strcmp(key, str);
+               else {
+                       /* put expunged messages first */
+                       ret = 1;
+                       for (prev = idx; prev > 0; ) {
+                               prev--;
+                               str2 = index_sort_get_string(ctx, prev,
+                                                            nodes[prev].seq);
+                               if (str2 != &expunged_msg) {
+                                       ret = strcmp(key, str2);
+                                       if (ret <= 0) {
+                                               idx = prev;
+                                               str = str2;
+                                       }
+                                       break;
+                               }
+                       }
+               }
+               if (ret > 0)
+                       left_idx = idx+1;
+               else if (ret < 0)
+                       right_idx = idx;
+               else {
+                       *idx_r = idx + 1;
+                       *prev_str_r = str;
+                       return;
+               }
+       }
+
+       if (left_idx > idx)
+               idx++;
+
+       *idx_r = idx;
+       if (idx > start_idx) {
+               *prev_str_r = index_sort_get_string(ctx, idx - 1,
+                                                   nodes[idx-1].seq);
+       }
+}
+
+static void index_sort_merge(struct sort_string_context *ctx)
+{
+       struct mail_sort_node *znodes, *nznodes;
+       const char *zstr, *nzstr, *prev_str;
+       unsigned int zpos, nzpos, nz_next_pos, zcount, nzcount;
+       int ret;
+
+       /* both zero_nodes and nonzero_nodes are sorted. we'll now just have
+          to merge them together. use sorted_nodes as the result array. */
+       i_array_init(&ctx->sorted_nodes, array_count(&ctx->nonzero_nodes) +
+                    array_count(&ctx->zero_nodes));
+
+       znodes = array_get_modifiable(&ctx->zero_nodes, &zcount);
+       nznodes = array_get_modifiable(&ctx->nonzero_nodes, &nzcount);
+
+       prev_str = NULL;
+       for (zpos = nzpos = 0; zpos < zcount && nzpos < nzcount; ) {
+               zstr = ctx->sort_strings[znodes[zpos].seq];
+               nzstr = index_sort_get_string(ctx, nzpos, nznodes[nzpos].seq);
+
+               if (nzstr != &expunged_msg)
+                       ret = strcmp(zstr, nzstr);
+               else if (prev_str != NULL && strcmp(zstr, prev_str) == 0) {
+                       /* identical to previous message, must keep them
+                          together */
+                       ret = -1;
+               } else {
+                       /* we can't be yet sure about the order, but future
+                          nznodes may reveal that the znode must be added
+                          later. if future nznodes don't reveal that, we have
+                          no idea about these nodes' order. so just always
+                          put the expunged message first. */
+                       ret = 1;
+               }
+
+               if (ret <= 0) {
+                       array_append(&ctx->sorted_nodes, &znodes[zpos], 1);
+                       prev_str = nzstr;
+                       zpos++;
+               } else {
+                       array_append(&ctx->sorted_nodes, &nznodes[nzpos], 1);
+                       prev_str = nzstr;
+                       nzpos++;
+
+                       /* avoid looking up all existing messages' strings by
+                          binary searching the next zero-node position. don't
+                          bother if it looks like more work than linear
+                          scanning. */
+                       if (zcount - zpos < (nzcount - nzpos)/2) {
+                               index_sort_bsearch(ctx, zstr, nzpos,
+                                                  &nz_next_pos, &prev_str);
+                               array_append(&ctx->sorted_nodes,
+                                            &nznodes[nzpos],
+                                            nz_next_pos - nzpos);
+                               nzpos = nz_next_pos;
+                       }
+               }
+       }
+       /* only one of zero_nodes and nonzero_nodes can be non-empty now */
+       for (; zpos < zcount; zpos++)
+               array_append(&ctx->sorted_nodes, &znodes[zpos], 1);
+       for (; nzpos < nzcount; nzpos++)
+               array_append(&ctx->sorted_nodes, &nznodes[nzpos], 1);
+
+       /* future index_sort_get_string() calls use ctx->nonzero_nodes, but we
+          use only ctx->sorted_nodes. make them identical. */
+       array_free(&ctx->nonzero_nodes);
+       ctx->nonzero_nodes = ctx->sorted_nodes;
+}
+
+static void
+index_sort_add_ids_range(struct sort_string_context *ctx,
+                        unsigned int left_idx, unsigned int right_idx)
+{
+
+       struct mail_sort_node *nodes;
+       unsigned int i, count, rightmost_idx, skip;
+       const char *left_str = NULL, *right_str = NULL, *str;
+       uint32_t left_sort_id, right_sort_id;
+
+       nodes = array_get_modifiable(&ctx->sorted_nodes, &count);
+       rightmost_idx = count - 1;
+
+       /* get the sort IDs from left and right */
+       left_sort_id = nodes[left_idx].sort_id;
+       right_sort_id = nodes[right_idx].sort_id;
+       /* check if all of them should have the same sort IDs. we don't want
+          to hit the renumbering code in that situation. */
+       if ((left_sort_id == right_sort_id && left_sort_id != 0) ||
+           left_sort_id == (uint32_t)-1 || right_sort_id == 1) {
+               /* they should all have the same sort ID */
+               for (i = left_idx + 1; i < right_idx; i++) {
+                       nodes[i].sort_id = left_sort_id;
+                       nodes[i].sort_id_changed = TRUE;
+               }
+               return;
+       }
+
+       if (left_sort_id == 0) {
+               i_assert(left_idx == 0);
+               left_sort_id = 1;
+       }
+       if (right_sort_id == 0) {
+               i_assert(right_idx == rightmost_idx);
+               right_sort_id = (uint32_t)-1;
+       }
+       i_assert(left_sort_id <= right_sort_id);
+
+       while ((right_sort_id - left_sort_id) / (right_idx-left_idx + 2) == 0) {
+               /* we most likely don't have enough space. we have to
+                  renumber some of the existing sort IDs. do this by
+                  widening the area we're giving sort IDs. */
+               if (left_idx > 0) {
+                       left_idx--;
+                       left_sort_id = left_idx == 0 ? 1 :
+                               nodes[left_idx].sort_id;
+                       i_assert(left_sort_id != 0);
+               }
+
+               while (right_idx < rightmost_idx) {
+                       if (nodes[++right_idx].sort_id > 1)
+                               break;
+               }
+               right_sort_id = right_idx == rightmost_idx ? (uint32_t)-1 :
+                       nodes[right_idx].sort_id;
+               i_assert(left_sort_id < right_sort_id);
+       }
+
+       if (nodes[left_idx].sort_id != 0) {
+               left_str = index_sort_get_string(ctx, left_idx,
+                                                nodes[left_idx].seq);
+               if (left_str == &expunged_msg) {
+                       /* not equivalent with any message */
+                       left_str = NULL;
+               }
+               left_idx++;
+       }
+       if (nodes[right_idx].sort_id != 0) {
+               right_str = index_sort_get_string(ctx, right_idx,
+                                                 nodes[right_idx].seq);
+               if (right_str == &expunged_msg) {
+                       /* not equivalent with any message */
+                       right_str = NULL;
+               }
+               right_idx--;
+       }
+       i_assert(left_idx <= right_idx);
+
+       /* give (new) sort IDs to all nodes in left_idx..right_idx range.
+          divide the available space so that each message gets an equal sized
+          share. some messages' sort strings may be equivalent, so give them
+          the same sort IDs. */
+       for (i = left_idx; i <= right_idx; i++) {
+               str = index_sort_get_string(ctx, i, nodes[i].seq);
+               if (str == &expunged_msg) {
+                       /* it doesn't really matter what we give to this
+                          message, since it's only temporary and we don't
+                          know its correct position anyway. so let's assume
+                          it's equivalent to previous message. */
+                       nodes[i].sort_id = left_sort_id;
+                       continue;
+               }
+
+               if (left_str != NULL && strcmp(str, left_str) == 0)
+                       nodes[i].sort_id = left_sort_id;
+               else if (right_str != NULL && strcmp(str, right_str) == 0) {
+                       /* the rest of the sort IDs should be the same */
+                       nodes[i].sort_id = right_sort_id;
+                       left_sort_id = right_sort_id;
+               } else {
+                       /* divide the available space equally. leave the same
+                          sized space also between the first and the last
+                          messages */
+                       if (left_str != NULL)
+                               i_assert(strcmp(left_str, str) < 0);
+                       if (right_str != NULL)
+                               i_assert(strcmp(right_str, str) > 0);
+                       skip = (right_sort_id - left_sort_id) /
+                               (right_idx - i + 2);
+                       i_assert(skip > 0);
+                       left_sort_id += skip;
+                       i_assert(left_sort_id < right_sort_id);
+
+                       nodes[i].sort_id = left_sort_id;
+                       left_str = str;
+               }
+               nodes[i].sort_id_changed = TRUE;
+       }
+}
+
+static void
+index_sort_add_sort_ids(struct sort_string_context *ctx)
+{
+       const struct mail_sort_node *nodes;
+       unsigned int i, left_idx, right_idx, count;
+
+       nodes = array_get(&ctx->sorted_nodes, &count);
+       for (i = 0; i < count; i++) {
+               if (nodes[i].sort_id != 0)
+                       continue;
+
+               /* get the range for all sort_id=0 nodes. include the nodes
+                  left and right of the range as well */
+               for (right_idx = i + 1; right_idx < count; right_idx++) {
+                       if (nodes[right_idx].sort_id != 0)
+                               break;
+               }
+               if (right_idx == count)
+                       right_idx--;
+               left_idx = i == 0 ? 0 : i - 1;
+               index_sort_add_ids_range(ctx, left_idx, right_idx);
+       }
+}
+
+static void index_sort_write_changed_sort_ids(struct sort_string_context *ctx)
+{
+       struct index_transaction_context *t =
+               (struct index_transaction_context *)ctx->program->t;
+       uint32_t ext_id = ctx->ext_id;
+       const struct mail_sort_node *nodes;
+       unsigned int i, count;
+
+       mail_index_ext_reset_inc(t->trans, ext_id, ctx->highest_reset_id, FALSE);
+
+       /* add the missing sort IDs to index */
+       nodes = array_get_modifiable(&ctx->sorted_nodes, &count);
+       for (i = 0; i < count; i++) {
+               i_assert(nodes[i].sort_id != 0);
+               if (!nodes[i].sort_id_changed || nodes[i].no_update)
+                       continue;
+
+               mail_index_update_ext(t->trans, nodes[i].seq, ext_id,
+                                     &nodes[i].sort_id, NULL);
+       }
+}
+
+static int sort_node_cmp(const void *p1, const void *p2)
+{
+       struct mail_search_sort_program *program = static_sort_node_cmp_context;
+       const struct mail_sort_node *n1 = p1, *n2 = p2;
+
+       if (n1->sort_id < n2->sort_id)
+               return -1;
+       if (n1->sort_id > n2->sort_id)
+               return 1;
+
+       return index_sort_node_cmp_type(program->temp_mail,
+                                       program->sort_program + 1,
+                                       n1->seq, n2->seq);
+}
+
+static void index_sort_add_missing(struct sort_string_context *ctx)
+{
+       struct mail_sort_node node;
+       const uint32_t *seqs;
+       unsigned int i, count;
+       uint32_t seq, next_seq;
+
+
+       seqs = array_get(&ctx->program->seqs, &count);
+       for (i = 0, next_seq = 1; i < count; i++) {
+               if (seqs[i] == next_seq)
+                       next_seq++;
+               else {
+                       i_assert(next_seq < seqs[i]);
+                       for (seq = next_seq; seq < seqs[i]; seq++) {
+                               memset(&node, 0, sizeof(node));
+                               node.seq = seq;
+                               index_sort_node_add(ctx, &node);
+                       }
+                       next_seq = seqs[i] + 1;
+               }
+       }
+}
+
+void index_sort_list_finish_string(struct mail_search_sort_program *program)
+{
+       struct sort_string_context *ctx = program->context;
+       struct mail_sort_node *nodes;
+       unsigned int i, count;
+       uint32_t seq;
+
+       nodes = array_get_modifiable(&ctx->nonzero_nodes, &count);
+
+       static_sort_node_cmp_context = program;
+       if (array_count(&ctx->zero_nodes) == 0) {
+               /* fast path: we have all sort IDs */
+               qsort(nodes, count, sizeof(struct mail_sort_node),
+                     sort_node_cmp);
+
+               i_array_init(&program->seqs, count);
+               for (i = 0; i < count; i++) {
+                       seq = nodes[i].seq;
+                       array_append(&program->seqs, &seq, 1);
+               }
+               array_free(&ctx->nonzero_nodes);
+       } else {
+               /* we have to add some sort IDs. we'll do this for all
+                  messages, so first remember what messages we wanted
+                  to know about. */
+               index_sort_generate_seqs(ctx);
+               /* add messages not in seqs list */
+               index_sort_add_missing(ctx);
+               /* sort all messages with sort IDs */
+               nodes = array_get_modifiable(&ctx->nonzero_nodes, &count);
+               qsort(nodes, count, sizeof(struct mail_sort_node),
+                     sort_node_cmp);
+               /* sort all messages without sort IDs */
+               index_sort_zeroes(ctx);
+               /* merge zero and non-zero arrays into sorted_nodes */
+               index_sort_merge(ctx);
+               /* give sort IDs to messages missing them */
+               index_sort_add_sort_ids(ctx);
+               index_sort_write_changed_sort_ids(ctx);
+
+               nodes = array_get_modifiable(&ctx->sorted_nodes, &count);
+               array_clear(&program->seqs);
+               for (i = 0; i < count; i++) {
+                       if (nodes[i].wanted) {
+                               seq = nodes[i].seq;
+                               array_append(&program->seqs, &seq, 1);
+                       }
+               }
+               pool_unref(&ctx->sort_string_pool);
+               i_free(ctx->sort_strings);
+               array_free(&ctx->sorted_nodes);
+               /* NOTE: we already freed nonzero_nodes and made it point to
+                  sorted_nodes. */
+       }
+
+       array_free(&ctx->zero_nodes);
+}
diff --git a/src/lib-storage/index/index-sort.c b/src/lib-storage/index/index-sort.c

index 41124fe010724756c753061a2709821e54d4da0d..33abaec43b9e311185d52575ae8d9a1cd279e103 100644 (file)
--- a/src/lib-storage/index/index-sort.c
+++ b/src/lib-storage/index/index-sort.c
@@ -1,56 +1,27 @@
  /* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
  
-/* The idea in here is that we use a 32bit integer (sort ID) which specifies
-   the sort order for primary sort condition. With fixed size fields (time,
-   size) we use the field itself as the sort ID. They can be looked up fast
-   enough from cache file, so we don't add them to index file.
-
-   Strings can't be used as sort IDs directly. The way they're currently
-   handled is that the whole 32bit integer space is used for them and whenever
-   adding a string, the available space is halved and the new ID is added in
-   the middle. For example if we add one mail the first time, it gets ID
-   2^31. If we then add two mails which are sorted before the first one, they
-   get IDs 2^31/3 and 2^31/3*2. Once we run out of the available space between
-   IDs, a large amount of the IDs are renumbered.
-*/
-
  #include "lib.h"
  #include "array.h"
-#include "bsearch-insert-pos.h"
  #include "str.h"
  #include "unichar.h"
  #include "message-address.h"
  #include "imap-base-subject.h"
  #include "index-storage.h"
-#include "index-sort.h"
+#include "index-sort-private.h"
  
  #include <stdlib.h>
  
-#define RENUMBER_SPACE 100
-
-struct mail_sort_node {
+struct mail_sort_node_date {
         uint32_t seq;
-       uint32_t sort_id;
+       time_t date;
  };
-ARRAY_DEFINE_TYPE(mail_sort_node, struct mail_sort_node);
-
-struct mail_search_sort_program {
-       struct mailbox_transaction_context *t;
-       enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE];
-       struct mail *temp_mail;
-
-       ARRAY_TYPE(mail_sort_node) nodes, all_nodes;
-       const struct mail_sort_node *nodes_ptr;
-       unsigned int nodes_count, iter_idx;
+ARRAY_DEFINE_TYPE(mail_sort_node_date, struct mail_sort_node_date);
  
-       uint32_t ext_id;
-       unsigned int first_missing_sort_id_idx;
-       uint32_t (*get_sort_id)(struct mail *);
-
-       unsigned int reverse:1;
-       unsigned int sort_ids_added:1;
-       unsigned int missing_sort_ids:1;
+struct mail_sort_node_size {
+       uint32_t seq;
+       uoff_t size;
  };
+ARRAY_DEFINE_TYPE(mail_sort_node_size, struct mail_sort_node_size);
  
  struct sort_cmp_context {
         struct mail_search_sort_program *program;
@@ -59,17 +30,154 @@ struct sort_cmp_context {
  
  static struct sort_cmp_context static_node_cmp_context;
  
-static uint32_t sort_get_arrival(struct mail *mail);
-static uint32_t sort_get_date(struct mail *mail);
-static uint32_t sort_get_size(struct mail *mail);
+static void
+index_sort_list_add_arrival(struct mail_search_sort_program *program,
+                           struct mail *mail)
+{
+       ARRAY_TYPE(mail_sort_node_date) *nodes = program->context;
+       struct mail_sort_node_date *node;
+
+       node = array_append_space(nodes);
+       node->seq = mail->seq;
+       if (mail_get_received_date(mail, &node->date) < 0)
+               node->date = 0;
+}
+
+static void
+index_sort_list_add_date(struct mail_search_sort_program *program,
+                        struct mail *mail)
+{
+       ARRAY_TYPE(mail_sort_node_date) *nodes = program->context;
+       struct mail_sort_node_date *node;
+
+       node = array_append_space(nodes);
+       node->seq = mail->seq;
+       if (mail_get_date(mail, &node->date, NULL) < 0)
+               node->date = 0;
+       else if (node->date == 0) {
+               if (mail_get_received_date(mail, &node->date) < 0)
+                       node->date = 0;
+       }
+}
+
+static void
+index_sort_list_add_size(struct mail_search_sort_program *program,
+                        struct mail *mail)
+{
+       ARRAY_TYPE(mail_sort_node_size) *nodes = program->context;
+       struct mail_sort_node_size *node;
+
+       node = array_append_space(nodes);
+       node->seq = mail->seq;
+       if (mail_get_virtual_size(mail, &node->size) < 0)
+               node->size = 0;
+}
+
+void index_sort_list_add(struct mail_search_sort_program *program,
+                        struct mail *mail)
+{
+       i_assert(mail->transaction == program->t);
+
+       program->sort_list_add(program, mail);
+}
+
+static int sort_node_date_cmp(const void *p1, const void *p2)
+{
+       struct sort_cmp_context *ctx = &static_node_cmp_context;
+       const struct mail_sort_node_date *n1 = p1, *n2 = p2;
+
+       if (n1->date < n2->date)
+               return -1;
+       if (n2->date > n2->date)
+               return 1;
+
+       return index_sort_node_cmp_type(ctx->mail,
+                                       ctx->program->sort_program + 1,
+                                       n1->seq, n2->seq);
+}
+
+static void
+index_sort_list_finish_date(struct mail_search_sort_program *program)
+{
+       ARRAY_TYPE(mail_sort_node_date) *nodes = program->context;
+       struct mail_sort_node_date *date_nodes;
+       unsigned int count;
+
+       date_nodes = array_get_modifiable(nodes, &count);
+       qsort(date_nodes, count, sizeof(struct mail_sort_node_date),
+             sort_node_date_cmp);
+       memcpy(&program->seqs, nodes, sizeof(program->seqs));
+       i_free(nodes);
+       program->context = NULL;
+}
+
+static int sort_node_size_cmp(const void *p1, const void *p2)
+{
+       struct sort_cmp_context *ctx = &static_node_cmp_context;
+       const struct mail_sort_node_size *n1 = p1, *n2 = p2;
+
+       if (n1->size < n2->size)
+               return -1;
+       if (n2->size > n2->size)
+               return 1;
+
+       return index_sort_node_cmp_type(ctx->mail,
+                                       ctx->program->sort_program + 1,
+                                       n1->seq, n2->seq);
+}
+
+static void
+index_sort_list_finish_size(struct mail_search_sort_program *program)
+{
+       ARRAY_TYPE(mail_sort_node_size) *nodes = program->context;
+       struct mail_sort_node_size *size_nodes;
+       unsigned int count;
+
+       size_nodes = array_get_modifiable(nodes, &count);
+       qsort(size_nodes, count, sizeof(struct mail_sort_node_size),
+             sort_node_size_cmp);
+       memcpy(&program->seqs, nodes, sizeof(program->seqs));
+       i_free(nodes);
+       program->context = NULL;
+}
+
+void index_sort_list_finish(struct mail_search_sort_program *program)
+{
+       memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
+       static_node_cmp_context.program = program;
+       static_node_cmp_context.mail = program->temp_mail;
+
+       program->sort_list_finish(program);
+
+       if (program->reverse)
+               program->iter_idx = array_count(&program->seqs);
+}
+
+bool index_sort_list_next(struct mail_search_sort_program *program,
+                         struct mail *mail)
+{
+       const uint32_t *seqp;
+
+       if (!program->reverse) {
+               if (program->iter_idx == array_count(&program->seqs))
+                       return FALSE;
+
+               seqp = array_idx(&program->seqs, program->iter_idx++);
+       } else {
+               if (program->iter_idx == 0)
+                       return FALSE;
+
+               seqp = array_idx(&program->seqs, --program->iter_idx);
+       }
+       mail_set_seq(mail, *seqp);
+       return TRUE;
+}
  
  struct mail_search_sort_program *
  index_sort_program_init(struct mailbox_transaction_context *t,
                         const enum mail_sort_type *sort_program)
  {
-       struct index_mailbox *ibox = (struct index_mailbox *)t->box;
         struct mail_search_sort_program *program;
-       const char *name = NULL;
         unsigned int i;
  
         if (sort_program == NULL || sort_program[0] == MAIL_SORT_END)
@@ -79,7 +187,6 @@ index_sort_program_init(struct mailbox_transaction_context *t,
         program = i_new(struct mail_search_sort_program, 1);
         program->t = t;
         program->temp_mail = mail_alloc(t, 0, NULL);
-       i_array_init(&program->nodes, 64);
  
         /* primary reversion isn't stored to sort_program. we handle it by
            iterating backwards at the end. */
@@ -95,32 +202,42 @@ index_sort_program_init(struct mailbox_transaction_context *t,
  
         switch (program->sort_program[0] & MAIL_SORT_MASK) {
         case MAIL_SORT_ARRIVAL:
-               program->get_sort_id = sort_get_arrival;
+       case MAIL_SORT_DATE: {
+               ARRAY_TYPE(mail_sort_node_date) *nodes;
+
+               nodes = i_malloc(sizeof(*nodes));
+               i_array_init(nodes, 128);
+
+               if ((program->sort_program[0] &
+                    MAIL_SORT_MASK) == MAIL_SORT_ARRIVAL)
+                       program->sort_list_add = index_sort_list_add_arrival;
+               else
+                       program->sort_list_add = index_sort_list_add_date;
+               program->sort_list_finish = index_sort_list_finish_date;
+               program->context = nodes;
                 break;
-       case MAIL_SORT_DATE:
-               program->get_sort_id = sort_get_date;
-               break;
-       case MAIL_SORT_SIZE:
-               program->get_sort_id = sort_get_size;
+       }
+       case MAIL_SORT_SIZE: {
+               ARRAY_TYPE(mail_sort_node_size) *nodes;
+
+               nodes = i_malloc(sizeof(*nodes));
+               i_array_init(nodes, 128);
+               program->sort_list_add = index_sort_list_add_size;
+               program->sort_list_finish = index_sort_list_finish_size;
+               program->context = nodes;
                 break;
+       }
         case MAIL_SORT_CC:
-               name = "sort-c";
-               break;
         case MAIL_SORT_FROM:
-               name = "sort-f";
-               break;
         case MAIL_SORT_SUBJECT:
-               name = "sort-s";
-               break;
         case MAIL_SORT_TO:
-               name = "sort-t";
+               program->sort_list_add = index_sort_list_add_string;
+               program->sort_list_finish = index_sort_list_finish_string;
+               index_sort_list_init_string(program);
                 break;
         default:
                 i_unreached();
         }
-       program->ext_id = name == NULL ? (uint32_t)-1 :
-               mail_index_ext_register(ibox->index, name, 0,
-                                       sizeof(uint32_t), sizeof(uint32_t));
         return program;
  }
  
@@ -130,106 +247,70 @@ void index_sort_program_deinit(struct mail_search_sort_program **_program)
  
         *_program = NULL;
         mail_free(&program->temp_mail);
-       array_free(&program->nodes);
+       array_free(&program->seqs);
         i_free(program);
  }
  
-static const char *get_first_mailbox(struct mail *mail, const char *header)
+static int
+get_first_mailbox(struct mail *mail, const char *header, const char **mailbox_r)
  {
         struct message_address *addr;
         const char *str;
+       int ret;
  
-       if (mail_get_first_header_utf8(mail, header, &str) <= 0)
-               return "";
+       if ((ret = mail_get_first_header_utf8(mail, header, &str)) <= 0) {
+               *mailbox_r = "";
+               return ret;
+       }
  
         addr = message_address_parse(pool_datastack_create(),
                                      (const unsigned char *)str,
                                      strlen(str), 1, TRUE);
-       return addr != NULL ? addr->mailbox : "";
+       *mailbox_r = addr != NULL ? addr->mailbox : "";
+       return 0;
  }
  
-static void
-sort_header_get(string_t *dest, enum mail_sort_type sort_type,
-               struct mail *mail, uint32_t seq)
+int index_sort_header_get(struct mail *mail, uint32_t seq,
+                         enum mail_sort_type sort_type, string_t *dest)
  {
         const char *str;
+       int ret;
  
         mail_set_seq(mail, seq);
+       str_truncate(dest, 0);
+
         switch (sort_type & MAIL_SORT_MASK) {
         case MAIL_SORT_SUBJECT:
-               if (mail_get_first_header(mail, "Subject", &str) <= 0)
-                       return;
+               if ((ret = mail_get_first_header(mail, "Subject", &str)) <= 0)
+                       return ret;
                 str = imap_get_base_subject_cased(pool_datastack_create(),
                                                   str, NULL);
                 str_append(dest, str);
-               return;
+               return 0;
         case MAIL_SORT_CC:
-               str = get_first_mailbox(mail, "Cc");
+               ret = get_first_mailbox(mail, "Cc", &str);
                 break;
         case MAIL_SORT_FROM:
-               str = get_first_mailbox(mail, "From");
+               ret = get_first_mailbox(mail, "From", &str);
                 break;
         case MAIL_SORT_TO:
-               str = get_first_mailbox(mail, "To");
+               ret = get_first_mailbox(mail, "To", &str);
                 break;
         default:
                 i_unreached();
         }
  
         (void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, dest);
+       return ret;
  }
  
-static uint32_t sort_get_arrival(struct mail *mail)
-{
-       time_t t;
-
-       if (mail_get_received_date(mail, &t) < 0)
-               t = 0;
-
-       i_assert(t != (time_t)-1);
-       /* FIXME: truncation isn't good.. */
-       return t <= 0 ? 1 :
-               ((uint64_t)t >= (uint32_t)-1 ? (uint32_t)-1 : (uint32_t)t + 1);
-}
-
-static uint32_t sort_get_date(struct mail *mail)
-{
-       time_t t;
-
-       if (mail_get_date(mail, &t, NULL) < 0)
-               t = 0;
-       if (t == 0) {
-               if (mail_get_received_date(mail, &t) < 0)
-                       return 1;
-       }
-       i_assert(t != (time_t)-1);
-       /* FIXME: truncation isn't good.. */
-       return t <= 0 ? 1 :
-               ((uint64_t)t >= (uint32_t)-1 ? (uint32_t)-1 : (uint32_t)t + 1);
-}
-
-static uint32_t sort_get_size(struct mail *mail)
-{
-       uoff_t size;
-
-       if (mail_get_virtual_size(mail, &size) < 0)
-               return 1;
-
-       /* FIXME: elsewhere we support 64bit message sizes, but here
-          we support only 32bit sizes.. It's a bit too much trouble
-          to support 64bit here currently, so until such messages
-          actually start showing up somewhere, 32bit is enough */
-       i_assert(size < (uint32_t)-1);
-       return size + 1;
-}
-
-static int sort_node_cmp_type(struct sort_cmp_context *ctx,
-                             const enum mail_sort_type *sort_program,
-                             const struct mail_sort_node *n1,
-                             const struct mail_sort_node *n2)
+int index_sort_node_cmp_type(struct mail *mail,
+                            const enum mail_sort_type *sort_program,
+                            uint32_t seq1, uint32_t seq2)
  {
         enum mail_sort_type sort_type;
-       uint32_t time1, time2, size1, size2;
+       time_t time1, time2;
+       uoff_t size1, size2;
         int ret = 0;
  
         sort_type = *sort_program & MAIL_SORT_MASK;
@@ -243,353 +324,71 @@ static int sort_node_cmp_type(struct sort_cmp_context *ctx,
  
                         str1 = t_str_new(256);
                         str2 = t_str_new(256);
-                       sort_header_get(str1, sort_type, ctx->mail, n1->seq);
-                       sort_header_get(str2, sort_type, ctx->mail, n2->seq);
+                       index_sort_header_get(mail, seq1, sort_type, str1);
+                       index_sort_header_get(mail, seq2, sort_type, str2);
  
                         ret = strcmp(str_c(str1), str_c(str2));
                 } T_END;
                 break;
         case MAIL_SORT_ARRIVAL:
-               mail_set_seq(ctx->mail, n1->seq);
-               time1 = sort_get_arrival(ctx->mail);
+               mail_set_seq(mail, seq1);
+               if (mail_get_received_date(mail, &time1) < 0)
+                       time1 = 0;
  
-               mail_set_seq(ctx->mail, n2->seq);
-               time2 = sort_get_arrival(ctx->mail);
+               mail_set_seq(mail, seq2);
+               if (mail_get_received_date(mail, &time2) < 0)
+                       time1 = 0;
  
                 ret = time1 < time2 ? -1 :
                         (time1 > time2 ? 1 : 0);
                 break;
         case MAIL_SORT_DATE:
-               mail_set_seq(ctx->mail, n1->seq);
-               time1 = sort_get_date(ctx->mail);
+               mail_set_seq(mail, seq1);
+               if (mail_get_date(mail, &time1, NULL) < 0)
+                       time1 = 0;
+               else if (time1 == 0) {
+                       if (mail_get_received_date(mail, &time1) < 0)
+                               time1 = 0;
+               }
  
-               mail_set_seq(ctx->mail, n2->seq);
-               time2 = sort_get_date(ctx->mail);
+               mail_set_seq(mail, seq2);
+               if (mail_get_date(mail, &time2, NULL) < 0)
+                       time2 = 0;
+               else if (time2 == 0) {
+                       if (mail_get_received_date(mail, &time2) < 0)
+                               time2 = 0;
+               }
  
                 ret = time1 < time2 ? -1 :
                         (time1 > time2 ? 1 : 0);
                 break;
         case MAIL_SORT_SIZE:
-               mail_set_seq(ctx->mail, n1->seq);
-               size1 = sort_get_size(ctx->mail);
+               mail_set_seq(mail, seq1);
+               if (mail_get_virtual_size(mail, &size1) < 0)
+                       size1 = 0;
  
-               mail_set_seq(ctx->mail, n2->seq);
-               size2 = sort_get_size(ctx->mail);
+               mail_set_seq(mail, seq2);
+               if (mail_get_virtual_size(mail, &size2) < 0)
+                       size2 = 0;
  
                 ret = size1 < size2 ? -1 :
                         (size1 > size2 ? 1 : 0);
                 break;
         case MAIL_SORT_END:
-               return n1->seq < n2->seq ? -1 :
-                       (n1->seq > n2->seq ? 1 : 0);
+               return seq1 < seq2 ? -1 :
+                       (seq1 > seq2 ? 1 : 0);
         case MAIL_SORT_MASK:
         case MAIL_SORT_FLAG_REVERSE:
                 i_unreached();
         }
  
-       if (ret == 0)
-               return sort_node_cmp_type(ctx, sort_program+1, n1, n2);
+       if (ret == 0) {
+               return index_sort_node_cmp_type(mail, sort_program+1,
+                                               seq1, seq2);
+       }
  
         /* primary reversion isn't in sort_program */
         if ((*sort_program & MAIL_SORT_FLAG_REVERSE) != 0)
                 ret = ret < 0 ? 1 : -1;
         return ret;
  }
-
-static int sort_node_cmp(const void *p1, const void *p2)
-{
-       struct sort_cmp_context *ctx = &static_node_cmp_context;
-       const struct mail_sort_node *n1 = p1, *n2 = p2;
-
-       if (n1->sort_id < n2->sort_id)
-               return -1;
-       if (n1->sort_id > n2->sort_id)
-               return 1;
-
-       return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2);
-}
-
-static int sort_node_cmp_nozero_sort_id(const void *p1, const void *p2)
-{
-       struct sort_cmp_context *ctx = &static_node_cmp_context;
-       const struct mail_sort_node *n1 = p1, *n2 = p2;
-       const enum mail_sort_type *sort_program;
-
-       /* Use sort IDs only if both have them */
-       if (n1->sort_id != 0 && n2->sort_id != 0) {
-               if (n1->sort_id < n2->sort_id)
-                       return -1;
-               if (n1->sort_id > n2->sort_id)
-                       return 1;
-               sort_program = ctx->program->sort_program + 1;
-       } else {
-               sort_program = ctx->program->sort_program;
-       }
-
-       return sort_node_cmp_type(ctx, sort_program, n1, n2);
-}
-
-static void
-index_sort_add_ids_range(struct mail_search_sort_program *program,
-                        struct mail *mail, unsigned int left_idx,
-                        unsigned int right_idx)
-{
-       struct mail_sort_node *nodes;
-       unsigned int i, count, rightmost_idx;
-       uint32_t left_sort_id, right_sort_id;
-       string_t *right_str, *left_str, *str;
-       unsigned int skip;
-       bool have_right_sort_id = FALSE;
-
-       nodes = array_get_modifiable(&program->all_nodes, &count);
-       rightmost_idx = count - 1;
-
-       /* get the sort IDs from left and right */
-       left_sort_id = left_idx == 0 ? 1 : nodes[left_idx].sort_id;
-       right_sort_id = right_idx == rightmost_idx ? (uint32_t)-1 :
-               nodes[right_idx].sort_id;
-       i_assert(left_sort_id != 0 && right_sort_id != 0);
-
-       while ((right_sort_id - left_sort_id) / (right_idx-left_idx + 2) == 0) {
-               /* we most likely don't have enough space. we have to
-                  renumber some of the existing sort IDs. do this by
-                  widening the area we're giving sort IDs. */
-               if (left_idx > 0) {
-                       left_idx--;
-                       left_sort_id = left_idx == 0 ? 1 :
-                               nodes[left_idx].sort_id;
-                       i_assert(left_sort_id != 0);
-               }
-
-               while (right_idx < rightmost_idx) {
-                       if (nodes[++right_idx].sort_id != 0)
-                               break;
-               }
-               right_sort_id = right_idx == rightmost_idx ? (uint32_t)-1 :
-                       nodes[right_idx].sort_id;
-               i_assert(left_sort_id < right_sort_id);
-       }
-
-       left_str = t_str_new(256);
-       right_str = t_str_new(256);
-       if (nodes[left_idx].sort_id != 0) {
-               sort_header_get(left_str, program->sort_program[0], mail,
-                               nodes[left_idx].seq);
-               left_idx++;
-       }
-       if (nodes[right_idx].sort_id != 0) {
-               have_right_sort_id = TRUE;
-               sort_header_get(right_str, program->sort_program[0], mail,
-                               nodes[right_idx].seq);
-               right_idx--;
-       }
-
-       /* give (new) sort IDs to all nodes in left_idx..right_idx range.
-          divide the available space so that each messagets an equal sized
-          share. some messages' sort strings may be equivalent, so give them
-          the same sort IDs. */
-       str = str_new(default_pool, 256);
-       for (i = left_idx; i <= right_idx; i++) {
-               T_BEGIN {
-                       sort_header_get(str, program->sort_program[0], mail,
-                                       nodes[i].seq);
-               } T_END;
-
-               if (left_idx != 0 && str_equals(str, left_str))
-                       nodes[i].sort_id = left_sort_id;
-               else if (have_right_sort_id && str_equals(str, right_str)) {
-                       /* the rest of the sort IDs should be the same */
-                       nodes[i].sort_id = right_sort_id;
-               } else {
-                       /* divide the available space equally. leave the same
-                          sized space also between the first and the last
-                          messages */
-                       skip = (right_sort_id - left_sort_id) /
-                               (right_idx - i + 2);
-                       i_assert(skip > 0);
-                       left_sort_id += skip;
-                       nodes[i].sort_id = left_sort_id;
-
-                       str_truncate(left_str, 0);
-                       str_append_str(left_str, str);
-               }
-       }
-       str_free(&str);
-}
-
-static void
-index_sort_add_ids(struct mail_search_sort_program *program, struct mail *mail)
-{
-       const struct mail_sort_node *nodes;
-       unsigned int i, left_idx = 0, right_idx, count;
-
-       nodes = array_get(&program->all_nodes, &count);
-       for (i = 0; i < count; i++) {
-               if (nodes[i].sort_id != 0)
-                       continue;
-
-               /* get the range for all sort_id=0 nodes. include the nodes
-                  left and right of the range as well */
-               for (right_idx = i + 1; right_idx < count; right_idx++) {
-                       if (nodes[right_idx].sort_id != 0)
-                               break;
-               }
-               if (right_idx == count)
-                       right_idx--;
-               left_idx = i == 0 ? 0 : i - 1;
-               T_BEGIN {
-                       index_sort_add_ids_range(program, mail,
-                                                left_idx, right_idx);
-               } T_END;
-       }
-}
-
-static void
-index_sort_add_string_sort_ids(struct mail_search_sort_program *program,
-                              uint32_t last_seq)
-{
-       ARRAY_TYPE(mail_sort_node) seq_nodes_arr;
-       struct mail_sort_node *nodes, node, *seq_nodes;
-       unsigned int i, count, count2;
-
-       /* insert missing nodes */
-       memset(&node, 0, sizeof(node));
-       node.seq = array_count(&program->all_nodes) + 1;
-       for (; node.seq <= last_seq; node.seq++)
-               array_append(&program->all_nodes, &node, 1);
-
-       /* sort everything. use sort_ids whenever possible */
-       nodes = array_get_modifiable(&program->all_nodes, &count);
-       i_assert(count == last_seq);
-       qsort(nodes, count, sizeof(struct mail_sort_node),
-             sort_node_cmp_nozero_sort_id);
-
-       /* we can now build the sort_ids */
-       index_sort_add_ids(program, static_node_cmp_context.mail);
-
-       /* @UNSAFE: and finally get the range sorted back by sequence */
-       i_array_init(&seq_nodes_arr, count);
-       (void)array_idx_modifiable(&seq_nodes_arr, count-1);
-       seq_nodes = array_get_modifiable(&seq_nodes_arr, &count2);
-       i_assert(count2 == count);
-       for (i = 0; i < count; i++)
-               seq_nodes[nodes[i].seq-1] = nodes[i];
-
-       array_free(&program->all_nodes);
-       program->all_nodes = seq_nodes_arr;
-}
-
-static void index_sort_build(struct mail_search_sort_program *program)
-{
-       struct index_transaction_context *t =
-               (struct index_transaction_context *)program->t;
-       struct mail_sort_node node, *all_nodes, *nodes;
-       const void *data;
-       uint32_t last_seq;
-       unsigned int seq, i, count, count2;
-
-       /* add messages that have sort_ids. they're always at the beginning
-          of the mailbox. */
-       memset(&node, 0, sizeof(node));
-       last_seq = mail_index_view_get_messages_count(t->ibox->view);
-       i_array_init(&program->all_nodes, last_seq);
-       for (seq = 1; seq <= last_seq; seq++) {
-               node.seq = seq;
-
-               mail_index_lookup_ext(t->ibox->view, seq, program->ext_id,
-                                     &data, NULL);
-               node.sort_id = data == NULL ? 0 : *(const uint32_t *)data;
-               if (node.sort_id == 0) {
-                       /* the rest don't have sort_ids either */
-                       break;
-               }
-               array_append(&program->all_nodes, &node, 1);
-       }
-       i_assert(seq <= last_seq);
-       index_sort_add_string_sort_ids(program, last_seq);
-
-       /* add the missing sort IDs to index */
-       all_nodes = array_get_modifiable(&program->all_nodes, &count);
-       for (; seq <= count; seq++) {
-               i_assert(all_nodes[seq-1].sort_id != 0);
-               mail_index_update_ext(t->trans, seq, program->ext_id,
-                                     &all_nodes[seq-1].sort_id, NULL);
-       }
-
-       /* set missing sort_ids to wanted nodes */
-       nodes = array_get_modifiable(&program->nodes, &count2);
-       for (i = program->first_missing_sort_id_idx; i < count2; i++)
-               nodes[i].sort_id = all_nodes[nodes[i].seq-1].sort_id;
-       array_free(&program->all_nodes);
-}
-
-void index_sort_list_add(struct mail_search_sort_program *program,
-                        struct mail *mail)
-{
-       struct index_transaction_context *t =
-               (struct index_transaction_context *)program->t;
-       const void *data;
-       struct mail_sort_node node;
-
-       i_assert(mail->transaction == program->t);
-
-       node.seq = mail->seq;
-       if (program->ext_id == (uint32_t)-1) {
-               /* no indexing for this field */
-               node.sort_id = program->get_sort_id(mail);
-               array_append(&program->nodes, &node, 1);
-               return;
-       }
-
-       mail_index_lookup_ext(t->trans_view, mail->seq,
-                             program->ext_id, &data, NULL);
-       node.sort_id = data == NULL ? 0 : *(const uint32_t *)data;
-       if (node.sort_id == 0 && !program->missing_sort_ids) {
-               program->missing_sort_ids = TRUE;
-               program->first_missing_sort_id_idx =
-                       array_count(&program->nodes);
-       }
-       array_append(&program->nodes, &node, 1);
-}
-
-void index_sort_list_finish(struct mail_search_sort_program *program)
-{
-       struct mail_sort_node *nodes;
-
-       memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
-       static_node_cmp_context.program = program;
-       static_node_cmp_context.mail = program->temp_mail;
-
-       if (program->missing_sort_ids) {
-               i_assert(program->ext_id != (uint32_t)-1);
-               index_sort_build(program);
-       }
-
-       nodes = array_get_modifiable(&program->nodes, &program->nodes_count);
-       qsort(nodes, program->nodes_count, sizeof(struct mail_sort_node),
-             sort_node_cmp);
-
-       program->nodes_ptr = nodes;
-       if (program->reverse)
-               program->iter_idx = program->nodes_count;
-}
-
-bool index_sort_list_next(struct mail_search_sort_program *program,
-                         struct mail *mail)
-{
-       const struct mail_sort_node *node;
-
-       if (!program->reverse) {
-               if (program->iter_idx == program->nodes_count)
-                       return FALSE;
-
-               node = &program->nodes_ptr[program->iter_idx++];
-       } else {
-               if (program->iter_idx == 0)
-                       return FALSE;
-
-               node = &program->nodes_ptr[--program->iter_idx];
-       }
-       mail_set_seq(mail, node->seq);
-       return TRUE;
-}
author	Timo Sirainen <tss@iki.fi>
	Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)
committer	Timo Sirainen <tss@iki.fi>
	Thu, 29 May 2008 03:16:06 +0000 (06:16 +0300)
src/lib-storage/index/Makefile.am		patch \| blob \| blame \| history
src/lib-storage/index/index-sort-private.h	[new file with mode: 0644]	patch \| blob
src/lib-storage/index/index-sort-string.c	[new file with mode: 0644]	patch \| blob
src/lib-storage/index/index-sort.c		patch \| blob \| blame \| history