]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
Initial code to support Apache Solr (Lucene indexing server).
authorTimo Sirainen <tss@iki.fi>
Thu, 10 Jul 2008 20:14:13 +0000 (01:44 +0530)
committerTimo Sirainen <tss@iki.fi>
Thu, 10 Jul 2008 20:14:13 +0000 (01:44 +0530)
--HG--
branch : HEAD

configure.in
src/plugins/Makefile.am
src/plugins/fts-solr/Makefile.am [new file with mode: 0644]
src/plugins/fts-solr/fts-backend-solr.c [new file with mode: 0644]
src/plugins/fts-solr/fts-solr-plugin.c [new file with mode: 0644]
src/plugins/fts-solr/fts-solr-plugin.h [new file with mode: 0644]
src/plugins/fts-solr/schema.xml [new file with mode: 0644]
src/plugins/fts-solr/solr-connection.c [new file with mode: 0644]
src/plugins/fts-solr/solr-connection.h [new file with mode: 0644]
src/plugins/fts/fts-storage.c

index f6df617713d78a5d612298910301bfe2199433b5..e1d09883d10df1d6651f4fa0b5e8a9ce9ec4d4b4 100644 (file)
@@ -263,6 +263,15 @@ AC_ARG_WITH(lucene,
        want_lucene=no)
 AM_CONDITIONAL(BUILD_LUCENE, test "$want_lucene" = "yes")
 
+AC_ARG_WITH(solr,
+[  --with-solr             Build with Solr full text search support],
+       if test x$withval = xno || test x$withval = xauto; then
+               want_solr=$withval
+       else
+               want_solr=yes
+       fi,
+       want_solr=no)
+
 AC_ARG_WITH(ssl,
 [  --with-ssl=gnutls|openssl Build with GNUTLS or OpenSSL (default)],
        if test x$withval = xno; then
@@ -2204,6 +2213,36 @@ if test -f /usr/include/rpcsvc/rquota.x && test -n "$RPCGEN"; then
 fi
 AM_CONDITIONAL(HAVE_RQUOTA, test "$have_rquota" = "yes")
 
+if test "$want_solr" != "no"; then
+  AC_CHECK_PROG(CURLCONFIG, curl-config, YES, NO)
+  if test $CURLCONFIG = YES; then
+    CURL_CFLAGS=`curl-config --cflags`
+    CURL_LIBS=`curl-config --libs`
+    
+    dnl libcurl found, also need libexpat
+    AC_CHECK_LIB(expat, XML_Parse, [
+      AC_CHECK_HEADER(expat.h, [
+        AC_SUBST(CURL_CFLAGS)
+        AC_SUBST(CURL_LIBS)
+       have_solr=yes
+      ], [
+       if test $want_solr = yes; then
+         AC_ERROR([Can't build with Solr support: expat.h not found])
+       fi
+      ])
+    ], [
+      if test $want_solr = yes; then
+       AC_ERROR([Can't build with Solr support: libexpat not found])
+      fi
+    ])
+  else
+    if test $want_solr = yes; then
+      AC_ERROR([Can't build with Solr support: curl-config not found])
+    fi
+  fi
+fi
+AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes")
+
 dnl **
 dnl ** capabilities
 dnl **
@@ -2264,6 +2303,7 @@ src/plugins/convert/Makefile
 src/plugins/expire/Makefile
 src/plugins/fts/Makefile
 src/plugins/fts-lucene/Makefile
+src/plugins/fts-solr/Makefile
 src/plugins/fts-squat/Makefile
 src/plugins/lazy-expunge/Makefile
 src/plugins/mail-log/Makefile
index 59187a3703e142a06776aeecb36021f97ed596de..a5d3f362458b99f6248dda17a05ca559e925065f 100644 (file)
@@ -6,7 +6,11 @@ if BUILD_LUCENE
 FTS_LUCENE = fts-lucene
 endif
 
+if BUILD_SOLR
+FTS_LUCENE = fts-solr
+endif
+
 SUBDIRS = \
        acl convert expire fts fts-squat lazy-expunge mail-log mbox-snarf \
        quota imap-quota trash virtual \
-       $(ZLIB) $(FTS_LUCENE)
+       $(ZLIB) $(FTS_LUCENE) $(FTS_SOLR)
diff --git a/src/plugins/fts-solr/Makefile.am b/src/plugins/fts-solr/Makefile.am
new file mode 100644 (file)
index 0000000..0827df0
--- /dev/null
@@ -0,0 +1,32 @@
+AM_CPPFLAGS = \
+       -I$(top_srcdir)/src/lib \
+       -I$(top_srcdir)/src/lib-mail \
+       -I$(top_srcdir)/src/lib-index \
+       -I$(top_srcdir)/src/lib-storage \
+       -I$(top_srcdir)/src/plugins/fts
+
+lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+       lib21_fts_solr_plugin.la
+
+lib21_fts_solr_plugin_la_LIBADD = \
+       $(CURL_CFLAGS) -lexpat
+
+lib21_fts_solr_plugin_la_SOURCES = \
+       fts-backend-solr.c \
+       fts-solr-plugin.c \
+       solr-connection.c
+
+noinst_HEADERS = \
+       fts-solr-plugin.h \
+       solr-connection.h
+
+EXTRA_DIST = schema.xml
+
+install-exec-local:
+       for d in imap lda; do \
+         $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
+         rm -f $(DESTDIR)$(moduledir)/$$d/lib21_fts_solr_plugin$(MODULE_SUFFIX); \
+         $(LN_S) ../lib21_fts_solr_plugin$(MODULE_SUFFIX) $(DESTDIR)$(moduledir)/$$d; \
+       done
diff --git a/src/plugins/fts-solr/fts-backend-solr.c b/src/plugins/fts-solr/fts-backend-solr.c
new file mode 100644 (file)
index 0000000..7454a01
--- /dev/null
@@ -0,0 +1,299 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "mail-storage-private.h"
+#include "solr-connection.h"
+#include "fts-solr-plugin.h"
+
+#include <stdlib.h>
+#include <curl/curl.h>
+
+struct solr_fts_backend_build_context {
+       struct fts_backend_build_context ctx;
+
+       struct solr_connection_post *post;
+       uint32_t prev_uid, uid_validity;
+       string_t *cmd;
+       bool headers;
+};
+
+static struct solr_connection *solr_conn = NULL;
+
+static void solr_quote_str(string_t *dest, const char *str)
+{
+       solr_connection_quote_str(solr_conn, dest, str);
+}
+
+static void xml_encode(string_t *dest, const char *str)
+{
+       for (; *str != '\0'; str++) {
+               switch (*str) {
+               case '&':
+                       str_append(dest, "&amp;");
+                       break;
+               case '<':
+                       str_append(dest, "&lt;");
+                       break;
+               case '>':
+                       str_append(dest, "&gt;");
+                       break;
+               default:
+                       str_append_c(dest, *str);
+                       break;
+               }
+       }
+}
+
+static struct fts_backend *
+fts_backend_solr_init(struct mailbox *box ATTR_UNUSED)
+{
+       struct fts_backend *backend;
+
+       if (solr_conn == NULL)
+               solr_conn = solr_connection_init(getenv("FTS_SOLR"));
+
+       backend = i_new(struct fts_backend, 1);
+       *backend = fts_backend_solr;
+       return backend;
+}
+
+static void fts_backend_solr_deinit(struct fts_backend *backend)
+{
+       i_free(backend);
+}
+
+static int fts_backend_solr_get_last_uid(struct fts_backend *backend,
+                                        uint32_t *last_uid_r)
+{
+       struct mailbox_status status;
+       ARRAY_TYPE(seq_range) uids;
+       const struct seq_range *uidvals;
+       unsigned int count;
+       string_t *str;
+
+       str = t_str_new(256);
+       str_append(str, "fl=uid&rows=1&sort=uid%20desc&q=");
+
+       mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+       str_printfa(str, "uidv:%u%%20box:", status.uidvalidity);
+       solr_quote_str(str, backend->box->name);
+       str_append(str, "%20user:");
+       solr_quote_str(str, backend->box->storage->user);
+
+       t_array_init(&uids, 1);
+       if (solr_connection_select(solr_conn, str_c(str), &uids) < 0)
+               return -1;
+
+       uidvals = array_get(&uids, &count);
+       if (count == 0) {
+               /* nothing indexed yet for this mailbox */
+               *last_uid_r = 0;
+       } else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
+               *last_uid_r = uidvals[0].seq1;
+       } else {
+               i_error("fts_solr: Last UID lookup returned multiple rows");
+               return -1;
+       }
+       return 0;
+}
+
+static int
+fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
+                           struct fts_backend_build_context **ctx_r)
+{
+       struct solr_fts_backend_build_context *ctx;
+       struct mailbox_status status;
+
+       *last_uid_r = (uint32_t)-1;
+
+       ctx = i_new(struct solr_fts_backend_build_context, 1);
+       ctx->ctx.backend = backend;
+       ctx->post = solr_connection_post_begin(solr_conn);
+       ctx->cmd = str_new(default_pool, 256);
+
+       mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+       ctx->uid_validity = status.uidvalidity;
+
+       *ctx_r = &ctx->ctx;
+       return 0;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+                           uint32_t uid, const unsigned char *data,
+                           size_t size, bool headers)
+{
+       struct solr_fts_backend_build_context *ctx =
+               (struct solr_fts_backend_build_context *)_ctx;
+       struct mailbox *box = _ctx->backend->box;
+       string_t *cmd = ctx->cmd;
+
+       /* body comes first, then headers */
+       if (ctx->prev_uid != uid) {
+               /* uid changed */
+               str_truncate(cmd, 0);
+               if (ctx->prev_uid == 0)
+                       str_append(cmd, "<add>");
+               else
+                       str_append(cmd, "</field></doc>");
+               ctx->prev_uid = uid;
+
+               str_printfa(cmd, "<doc>"
+                           "<field name=\"uid\">%u</field>"
+                           "<field name=\"uidv\">%u</field>",
+                           uid, ctx->uid_validity);
+
+               str_append(cmd, "<field name=\"box\">");
+               xml_encode(cmd, box->name);
+               str_append(cmd, "</field><field name=\"user\">");
+               xml_encode(cmd, box->storage->user);
+
+               str_printfa(cmd, "</field><field name=\"id\">%u/%u/",
+                           uid, ctx->uid_validity);
+               xml_encode(cmd, box->storage->user);
+               str_append_c(cmd, '/');
+               xml_encode(cmd, box->name);
+               str_append(cmd, "</field>");
+
+               ctx->headers = headers;
+               if (headers) {
+                       str_append(cmd, "<field name=\"hdr\">");
+               } else {
+                       str_append(cmd, "<field name=\"body\">");
+               }
+               solr_connection_post_more(ctx->post, str_data(cmd),
+                                         str_len(cmd));
+       } else if (headers && !ctx->headers) {
+               str_truncate(cmd, 0);
+               str_append(cmd, "</field><field name=\"hdr\">");
+               solr_connection_post_more(ctx->post, str_data(cmd),
+                                         str_len(cmd));
+       } else {
+               i_assert(!(!headers && ctx->headers));
+       }
+
+       solr_connection_post_more(ctx->post, data, size);
+       return 0;
+}
+
+static int
+fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx)
+{
+       struct solr_fts_backend_build_context *ctx =
+               (struct solr_fts_backend_build_context *)_ctx;
+       int ret = 0;
+
+       if (ctx->prev_uid != 0) {
+               str_truncate(ctx->cmd, 0);
+               str_append(ctx->cmd, "</field></doc></add>");
+               solr_connection_post_more(ctx->post, str_data(ctx->cmd),
+                                         str_len(ctx->cmd));
+               ret = solr_connection_end(ctx->post);
+               /* commit and wait until the documents we just indexed are
+                  visible to the following search */
+               if (solr_connection_post(solr_conn,
+                                        "<commit waitFlush=\"false\" "
+                                        "waitSearcher=\"true\"/>") < 0)
+                       ret = -1;
+       }
+       str_free(&ctx->cmd);
+       i_free(ctx);
+       return ret;
+}
+
+static void
+fts_backend_solr_expunge(struct fts_backend *backend ATTR_UNUSED,
+                        struct mail *mail)
+{
+       struct mailbox_status status;
+
+       mailbox_get_status(mail->box, STATUS_UIDVALIDITY, &status);
+
+       T_BEGIN {
+               string_t *cmd;
+
+               cmd = t_str_new(256);
+               str_printfa(cmd, "<delete><id>%u/%u/",
+                           mail->uid, status.uidvalidity);
+               xml_encode(cmd, mail->box->storage->user);
+               str_append_c(cmd, '/');
+               xml_encode(cmd, mail->box->name);
+               str_append(cmd, "</id></delete>");
+
+               (void)solr_connection_post(solr_conn, str_c(cmd));
+       } T_END;
+}
+
+static void
+fts_backend_solr_expunge_finish(struct fts_backend *backend ATTR_UNUSED,
+                               struct mailbox *box ATTR_UNUSED,
+                               bool committed ATTR_UNUSED)
+{
+}
+
+static int fts_backend_solr_lock(struct fts_backend *backend ATTR_UNUSED)
+{
+       return 1;
+}
+
+static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED)
+{
+}
+
+static int
+fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
+                       enum fts_lookup_flags flags,
+                       ARRAY_TYPE(seq_range) *definite_uids,
+                       ARRAY_TYPE(seq_range) *maybe_uids)
+{
+       struct mailbox_status status;
+       string_t *str;
+
+       i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
+
+       str = t_str_new(256);
+       str_append(str, "fl=uid&q=");
+       if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
+               /* body only */
+               i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
+               str_append(str, "body:");
+       } else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
+               /* header only */
+               str_append(str, "hdr:");
+       } else {
+               /* both */
+               str_append(str, "any:");
+       }
+       solr_quote_str(str, key);
+
+       mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+       str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
+       solr_quote_str(str, backend->box->name);
+       str_append(str, "%20user:");
+       solr_quote_str(str, backend->box->storage->user);
+
+       array_clear(maybe_uids);
+       return solr_connection_select(solr_conn, str_c(str), definite_uids);
+}
+
+struct fts_backend fts_backend_solr = {
+       MEMBER(name) "solr",
+       MEMBER(flags) 0,
+
+       {
+               fts_backend_solr_init,
+               fts_backend_solr_deinit,
+               fts_backend_solr_get_last_uid,
+               fts_backend_solr_build_init,
+               fts_backend_solr_build_more,
+               fts_backend_solr_build_deinit,
+               fts_backend_solr_expunge,
+               fts_backend_solr_expunge_finish,
+               fts_backend_solr_lock,
+               fts_backend_solr_unlock,
+               fts_backend_solr_lookup,
+               NULL
+       }
+};
diff --git a/src/plugins/fts-solr/fts-solr-plugin.c b/src/plugins/fts-solr/fts-solr-plugin.c
new file mode 100644 (file)
index 0000000..4544adb
--- /dev/null
@@ -0,0 +1,16 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "fts-solr-plugin.h"
+
+const char *fts_solr_plugin_version = PACKAGE_VERSION;
+
+void fts_solr_plugin_init(void)
+{
+       fts_backend_register(&fts_backend_solr);
+}
+
+void fts_solr_plugin_deinit(void)
+{
+       fts_backend_unregister(fts_backend_solr.name);
+}
diff --git a/src/plugins/fts-solr/fts-solr-plugin.h b/src/plugins/fts-solr/fts-solr-plugin.h
new file mode 100644 (file)
index 0000000..bd9465f
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef FTS_SOLR_PLUGIN_H
+#define FTS_SOLR_PLUGIN_H
+
+#include "fts-api-private.h"
+
+extern struct fts_backend fts_backend_solr;
+
+void fts_solr_plugin_init(void);
+void fts_solr_plugin_deinit(void);
+
+#endif
diff --git a/src/plugins/fts-solr/schema.xml b/src/plugins/fts-solr/schema.xml
new file mode 100644 (file)
index 0000000..18d4988
--- /dev/null
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+
+<schema name="dovecot" version="1.1">
+  <types>
+    <!-- IMAP has 32bit unsigned ints but java ints are signed, so use longs -->
+    <fieldType name="string" class="solr.StrField" omitNorms="true"/>
+    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
+    <fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
+    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
+
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+ </types>
+
+
+ <fields>
+   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="uid" type="slong" indexed="true" stored="true" required="true" /> 
+   <field name="uidv" type="long" indexed="true" stored="true" required="true" /> 
+   <field name="box" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="user" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="hdr" type="text" indexed="true" stored="false" /> 
+   <field name="body" type="text" indexed="true" stored="false" /> 
+   <field name="any" type="text" indexed="true" stored="false" multiValued="true" />
+ </fields>
+
+ <copyField source="hdr" dest="any" />
+ <copyField source="body" dest="any" />
+
+ <uniqueKey>id</uniqueKey>
+ <defaultSearchField>any</defaultSearchField>
+ <solrQueryParser defaultOperator="AND" />
+</schema>
diff --git a/src/plugins/fts-solr/solr-connection.c b/src/plugins/fts-solr/solr-connection.c
new file mode 100644 (file)
index 0000000..41a3926
--- /dev/null
@@ -0,0 +1,454 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+/* curl: 7.16.0 curl_multi_timeout */
+
+#include "lib.h"
+#include "str.h"
+#include "strescape.h"
+#include "solr-connection.h"
+
+#include <curl/curl.h>
+#include <expat.h>
+
+enum solr_xml_response_state {
+       SOLR_XML_RESPONSE_STATE_ROOT,
+       SOLR_XML_RESPONSE_STATE_RESPONSE,
+       SOLR_XML_RESPONSE_STATE_RESULT,
+       SOLR_XML_RESPONSE_STATE_DOC,
+       SOLR_XML_RESPONSE_STATE_CONTENT
+};
+
+enum solr_xml_content_state {
+       SOLR_XML_CONTENT_STATE_NONE = 0,
+       SOLR_XML_CONTENT_STATE_UID,
+       SOLR_XML_CONTENT_STATE_SCORE
+};
+
+struct solr_lookup_xml_context {
+       enum solr_xml_response_state state;
+       enum solr_xml_content_state content_state;
+       int depth;
+
+       ARRAY_TYPE(seq_range) *uids;
+};
+
+struct solr_connection_post {
+       struct solr_connection *conn;
+       const unsigned char *data;
+       size_t size, pos;
+
+       unsigned int failed:1;
+};
+
+struct solr_connection {
+       CURL *curl;
+       CURLM *curlm;
+
+       char curl_errorbuf[CURL_ERROR_SIZE];
+       struct curl_slist *headers, *headers_post;
+       XML_Parser xml_parser;
+
+       char *url;
+
+       unsigned int debug:1;
+       unsigned int posting:1;
+       unsigned int xml_failed:1;
+};
+
+static void
+solr_conn_init_settings(struct solr_connection *conn, const char *str)
+{
+       const char *const *tmp;
+
+       if (str == NULL)
+               return;
+
+       for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) {
+               if (strncmp(*tmp, "url=", 4) == 0) {
+                       i_free(conn->url);
+                       conn->url = i_strdup(*tmp + 4);
+               } else if (strcmp(*tmp, "debug") == 0) {
+                       conn->debug = TRUE;
+               } else {
+                       i_fatal("fts_solr: Invalid setting: %s", *tmp);
+               }
+       }
+       if (conn->url == NULL)
+               i_fatal("fts_solr: url setting missing");
+}
+
+static size_t
+curl_output_func(void *data, size_t element_size, size_t nmemb, void *context)
+{
+       struct solr_connection_post *post = context;
+       size_t size = element_size * nmemb;
+
+       /* @UNSAFE */
+       if (size > post->size - post->pos)
+               size = post->size - post->pos;
+
+       memcpy(data, post->data + post->pos, size);
+       post->pos += size;
+       return size;
+}
+
+static int solr_xml_parse(struct solr_connection *conn,
+                         const void *data, size_t size, bool done)
+{
+       enum XML_Error err;
+       int line;
+
+       if (conn->xml_failed)
+               return -1;
+
+       if (XML_Parse(conn->xml_parser, data, size, done))
+               return 0;
+
+       err = XML_GetErrorCode(conn->xml_parser);
+       if (err != XML_ERROR_FINISHED) {
+               line = XML_GetCurrentLineNumber(conn->xml_parser);
+               i_error("fts_solr: Invalid XML input at line %d: %s",
+                       line, XML_ErrorString(err));
+               conn->xml_failed = TRUE;
+               return -1;
+       }
+       return 0;
+}
+
+static size_t
+curl_input_func(void *data, size_t element_size, size_t nmemb, void *context)
+{
+       struct solr_connection *conn = context;
+       size_t size = element_size * nmemb;
+
+       (void)solr_xml_parse(conn, data, size, FALSE);
+       return size;
+}
+
+struct solr_connection *solr_connection_init(const char *settings)
+{
+       struct solr_connection *conn;
+
+       conn = i_new(struct solr_connection, 1);
+       solr_conn_init_settings(conn, settings);
+
+       conn->curlm = curl_multi_init();
+       conn->curl = curl_easy_init();
+       if (conn->curl == NULL || conn->curlm == NULL) {
+               i_fatal_status(FATAL_OUTOFMEM,
+                              "fts_solr: Failed to allocate curl");
+       }
+
+       /* set global curl options */
+       curl_easy_setopt(conn->curl, CURLOPT_ERRORBUFFER, conn->curl_errorbuf);
+       if (conn->debug)
+               curl_easy_setopt(conn->curl, CURLOPT_VERBOSE, 1L);
+
+       curl_easy_setopt(conn->curl, CURLOPT_NOPROGRESS, 1L);
+       curl_easy_setopt(conn->curl, CURLOPT_NOSIGNAL, 1L);
+       curl_easy_setopt(conn->curl, CURLOPT_READFUNCTION, curl_output_func);
+       curl_easy_setopt(conn->curl, CURLOPT_WRITEFUNCTION, curl_input_func);
+       curl_easy_setopt(conn->curl, CURLOPT_WRITEDATA, conn);
+
+       conn->headers = curl_slist_append(NULL, "Content-Type: text/xml");
+       conn->headers_post = curl_slist_append(NULL, "Content-Type: text/xml");
+       conn->headers_post = curl_slist_append(conn->headers_post,
+                                              "Transfer-Encoding: chunked");
+       conn->headers_post = curl_slist_append(conn->headers_post,
+                                              "Expect:");
+       curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers);
+
+       conn->xml_parser = XML_ParserCreate("UTF-8");
+       if (conn->xml_parser == NULL) {
+               i_fatal_status(FATAL_OUTOFMEM,
+                              "fts_solr: Failed to allocate XML parser");
+       }
+       return conn;
+}
+
+void solr_connection_deinit(struct solr_connection *conn)
+{
+       curl_slist_free_all(conn->headers);
+       curl_slist_free_all(conn->headers_post);
+       curl_multi_cleanup(conn->curlm);
+       curl_easy_cleanup(conn->curl);
+       i_free(conn->url);
+       i_free(conn);
+}
+
+void solr_connection_quote_str(struct solr_connection *conn, string_t *dest,
+                              const char *str)
+{
+       char *encoded;
+
+       encoded = curl_easy_escape(conn->curl, str_escape(str), 0);
+       str_printfa(dest, "%%22%s%%22", encoded);
+       curl_free(encoded);
+}
+
+static const char *attrs_get_name(const char **attrs)
+{
+       for (; *attrs != NULL; attrs += 2) {
+               if (strcmp(attrs[0], "name") == 0)
+                       return attrs[1];
+       }
+       return "";
+}
+
+static void
+solr_lookup_xml_start(void *context, const char *name, const char **attrs)
+{
+       struct solr_lookup_xml_context *ctx = context;
+       const char *name_attr;
+
+       i_assert(ctx->depth >= (int)ctx->state);
+
+       ctx->depth++;
+       if (ctx->depth - 1 > (int)ctx->state) {
+               /* skipping over unwanted elements */
+               return;
+       }
+
+       /* response -> result -> doc */
+       switch (ctx->state) {
+       case SOLR_XML_RESPONSE_STATE_ROOT:
+               if (strcmp(name, "response") == 0)
+                       ctx->state++;
+               break;
+       case SOLR_XML_RESPONSE_STATE_RESPONSE:
+               if (strcmp(name, "result") == 0)
+                       ctx->state++;
+               break;
+       case SOLR_XML_RESPONSE_STATE_RESULT:
+               if (strcmp(name, "doc") == 0)
+                       ctx->state++;
+               break;
+       case SOLR_XML_RESPONSE_STATE_DOC:
+               name_attr = attrs_get_name(attrs);
+               if (strcmp(name_attr, "uid") == 0)
+                       ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
+               else if (strcmp(name_attr, "score") == 0)
+                       ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
+               else 
+                       break;
+               ctx->state++;
+               break;
+       case SOLR_XML_RESPONSE_STATE_CONTENT:
+               break;
+       }
+}
+
+static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED)
+{
+       struct solr_lookup_xml_context *ctx = context;
+
+       i_assert(ctx->depth >= (int)ctx->state);
+
+       if (ctx->depth == (int)ctx->state) {
+               ctx->state--;
+               ctx->content_state = SOLR_XML_CONTENT_STATE_NONE;
+       }
+       ctx->depth--;
+}
+
+static void solr_lookup_xml_data(void *context, const char *str, int len)
+{
+       struct solr_lookup_xml_context *ctx = context;
+       uint32_t uid;
+       int i;
+
+       switch (ctx->content_state) {
+       case SOLR_XML_CONTENT_STATE_NONE:
+               break;
+       case SOLR_XML_CONTENT_STATE_UID:
+               for (i = 0, uid = 0; i < len; i++) {
+                       if (str[i] < '0' || str[i] > '9')
+                               break;
+                       uid = uid*10 + str[i]-'0';
+               }
+               if (i != len) {
+                       i_error("fts_solr: received invalid uid");
+                       break;
+               }
+               seq_range_array_add(ctx->uids, 0, uid);
+               break;
+       case SOLR_XML_CONTENT_STATE_SCORE:
+               /* FIXME */
+               break;
+       }
+}
+
+int solr_connection_select(struct solr_connection *conn, const char *query,
+                          ARRAY_TYPE(seq_range) *uids)
+{
+       struct solr_lookup_xml_context solr_lookup_context;
+       string_t *str;
+       CURLcode ret;
+
+       i_assert(!conn->posting);
+
+       memset(&solr_lookup_context, 0, sizeof(solr_lookup_context));
+       solr_lookup_context.uids = uids;
+
+       conn->xml_failed = FALSE;
+       XML_SetElementHandler(conn->xml_parser,
+                             solr_lookup_xml_start, solr_lookup_xml_end);
+       XML_SetCharacterDataHandler(conn->xml_parser, solr_lookup_xml_data);
+       XML_SetUserData(conn->xml_parser, &solr_lookup_context);
+
+       str = t_str_new(256);
+       str_append(str, conn->url);
+       str_append(str, "select?");
+       str_append(str, query);
+
+       curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str));
+       ret = curl_easy_perform(conn->curl);
+       if (ret != 0) {
+               i_error("fts_solr: HTTP GET failed: %s",
+                       conn->curl_errorbuf);
+               return -1;
+       }
+       return solr_xml_parse(conn, NULL, 0, TRUE);
+}
+
+struct solr_connection_post *
+solr_connection_post_begin(struct solr_connection *conn)
+{
+       struct solr_connection_post *post;
+       CURLMcode merr;
+       string_t *str;
+
+       post = i_new(struct solr_connection_post, 1);
+       post->conn = conn;
+
+       i_assert(!conn->posting);
+       conn->posting = TRUE;
+
+       curl_easy_setopt(conn->curl, CURLOPT_READDATA, post);
+       merr = curl_multi_add_handle(conn->curlm, conn->curl);
+       if (merr != CURLM_OK) {
+               i_error("fts_solr: curl_multi_add_handle() failed: %s",
+                       curl_multi_strerror(merr));
+               post->failed = TRUE;
+       } else {
+               str = t_str_new(256);
+               str_append(str, conn->url);
+               str_append(str, "update");
+
+               curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str));
+               curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER,
+                                conn->headers_post);
+               curl_easy_setopt(conn->curl, CURLOPT_POST, (long)1);
+       }
+       return post;
+}
+
+void solr_connection_post_more(struct solr_connection_post *post,
+                              const unsigned char *data, size_t size)
+{
+       fd_set fdread;
+       fd_set fdwrite;
+       fd_set fdexcep;
+       struct timeval timeout_tv;
+       long timeout;
+       CURLMcode merr;
+       int ret, handles, maxfd;
+
+       i_assert(post->conn->posting);
+
+       if (post->failed)
+               return;
+
+       post->data = data;
+       post->size = size;
+       post->pos = 0;
+
+       for (;;) {
+               merr = curl_multi_perform(post->conn->curlm, &handles);
+               if (merr == CURLM_CALL_MULTI_PERFORM)
+                       continue;
+               if (merr != CURLM_OK) {
+                       i_error("fts_solr: curl_multi_perform() failed: %s",
+                               curl_multi_strerror(merr));
+                       break;
+               }
+               if ((post->pos == post->size && post->size != 0) ||
+                   (handles == 0 && post->size == 0)) {
+                       /* everything sent successfully */
+                       return;
+               }
+
+               /* everything wasn't sent - wait. just use select,
+                  since libcurl interface is easiest with it. */
+               FD_ZERO(&fdread);
+               FD_ZERO(&fdwrite);
+               FD_ZERO(&fdexcep);
+
+               merr = curl_multi_fdset(post->conn->curlm, &fdread, &fdwrite,
+                                       &fdexcep, &maxfd);
+               if (merr != CURLM_OK) {
+                       i_error("fts_solr: curl_multi_fdset() failed: %s",
+                               curl_multi_strerror(merr));
+                       break;
+               }
+               i_assert(maxfd >= 0);
+
+               merr = curl_multi_timeout(post->conn->curlm, &timeout);
+               if (merr != CURLM_OK) {
+                       i_error("fts_solr: curl_multi_timeout() failed: %s",
+                               curl_multi_strerror(merr));
+                       break;
+               }
+
+               if (timeout < 0) {
+                       timeout_tv.tv_sec = 1;
+                       timeout_tv.tv_usec = 0;
+               } else {
+                       timeout_tv.tv_sec = timeout / 1000;
+                       timeout_tv.tv_usec = (timeout % 1000) * 1000;
+               }
+               ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout_tv);
+               if (ret < 0) {
+                       i_error("fts_solr: select() failed: %m");
+                       break;
+               }
+       }
+       post->failed = TRUE;
+}
+
+int solr_connection_end(struct solr_connection_post *post)
+{
+       struct solr_connection *conn = post->conn;
+       long httpret;
+       int ret = post->failed ? -1 : 0;
+
+       i_assert(conn->posting);
+
+       solr_connection_post_more(post, NULL, 0);
+
+       curl_easy_getinfo(post->conn->curl, CURLINFO_RESPONSE_CODE, &httpret);
+       if (httpret != 200 && ret == 0) {
+               i_error("fts_solr: Indexing failed with %ld", httpret);
+               ret = -1;
+       }
+
+       curl_easy_setopt(conn->curl, CURLOPT_READDATA, NULL);
+       curl_easy_setopt(conn->curl, CURLOPT_POST, (long)0);
+       curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers);
+
+       (void)curl_multi_remove_handle(conn->curlm, conn->curl);
+       i_free(post);
+
+       conn->posting = FALSE;
+       return ret;
+}
+
+int solr_connection_post(struct solr_connection *conn, const char *cmd)
+{
+       struct solr_connection_post *post;
+
+       post = solr_connection_post_begin(conn);
+       solr_connection_post_more(post, (const unsigned char *)cmd,
+                                 strlen(cmd));
+       return solr_connection_end(post);
+}
diff --git a/src/plugins/fts-solr/solr-connection.h b/src/plugins/fts-solr/solr-connection.h
new file mode 100644 (file)
index 0000000..5d83dc7
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef SOLR_CONNECTION_H
+#define SOLR_CONNECTION_H
+
+#include "seq-range-array.h"
+
+struct solr_connection *solr_connection_init(const char *settings);
+void solr_connection_deinit(struct solr_connection *conn);
+
+void solr_connection_quote_str(struct solr_connection *conn, string_t *dest,
+                              const char *str);
+
+int solr_connection_select(struct solr_connection *conn, const char *query,
+                          ARRAY_TYPE(seq_range) *uids);
+int solr_connection_post(struct solr_connection *conn, const char *cmd);
+
+struct solr_connection_post *
+solr_connection_post_begin(struct solr_connection *conn);
+void solr_connection_post_more(struct solr_connection_post *post,
+                              const unsigned char *data, size_t size);
+int solr_connection_end(struct solr_connection_post *post);
+
+#endif
index 247d5b915820cde03abac5f0e60bfd3b7fc4fac6..4a2142ce0d209c43d3e99730d37d0e41ee79f338 100644 (file)
@@ -188,7 +188,7 @@ static int fts_build_init(struct fts_search_context *fctx)
 
        if (fts_backend_build_init(backend, &last_uid_locked, &build) < 0)
                return -1;
-       if (last_uid != last_uid_locked) {
+       if (last_uid != last_uid_locked && last_uid_locked != (uint32_t)-1) {
                /* changed, need to get again the sequences */
                i_assert(last_uid < last_uid_locked);