]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
plugins: fts-solr: solr-response - Make lookup response parser a separate entity.
authorStephan Bosch <stephan.bosch@open-xchange.com>
Tue, 8 Oct 2019 16:57:00 +0000 (18:57 +0200)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Thu, 9 Jan 2020 12:52:43 +0000 (12:52 +0000)
This improves the structure of the code and allows writing a unit test for the
parser, which is included in this commit.

src/plugins/fts-solr/Makefile.am
src/plugins/fts-solr/solr-connection.c
src/plugins/fts-solr/solr-response.c
src/plugins/fts-solr/solr-response.h
src/plugins/fts-solr/test-solr-response.c [new file with mode: 0644]

index e44452017fb5f394d54ba4787824f7e79d2147aa..cd1e17b8b9cba6e272350b0b94f305531b63698f 100644 (file)
@@ -26,12 +26,39 @@ lib21_fts_solr_plugin_la_SOURCES = \
        fts-backend-solr.c \
        fts-backend-solr-old.c \
        fts-solr-plugin.c \
+       solr-response.c \
        solr-connection.c
 
-EXTRA_DIST = \
-       solr-response.c
-
 noinst_HEADERS = \
        fts-solr-plugin.h \
        solr-response.h \
        solr-connection.h
+
+test_programs = \
+       test-solr-response
+
+test_libs = \
+       ../../lib-test/libtest.la \
+       ../../lib-charset/libcharset.la \
+       ../../lib/liblib.la \
+       $(MODULE_LIBS)
+
+noinst_PROGRAMS = test-solr-response
+
+test_solr_response_CPPFLAGS = \
+       $(AM_CPPFLAGS) \
+       -I$(top_srcdir)/src/lib-test
+test_solr_response_SOURCES = \
+       solr-response.c \
+       test-solr-response.c
+test_solr_response_LDADD = \
+       $(test_libs) -lexpat
+
+pkginc_libdir=$(pkgincludedir)
+pkginc_lib_HEADERS = $(headers)
+
+check: check-am check-test
+check-test: all-am
+       for bin in $(test_programs); do \
+         if ! $(RUN_TEST) ./$$bin; then exit 1; fi; \
+       done
index 4ee5482161d12a5c8295c311bb37e6e01210c0f1..ae720b5e2870a852c1b6c440939e3c7c0fa72b5c 100644 (file)
 
 #include <expat.h>
 
-struct solr_response_parser;
-
 struct solr_lookup_context {
-       struct solr_connection *conn;
-
+       pool_t result_pool;
        struct istream *payload;
        struct io *io;
 
        int request_status;
 
        struct solr_response_parser *parser;
+       struct solr_result **results;
 };
 
 struct solr_connection_post {
@@ -49,8 +47,6 @@ struct solr_connection {
        bool http_ssl:1;
 };
 
-#include "solr-response.c"
-
 /* Regardless of the specified URL, make sure path ends in '/' */
 static char *solr_connection_create_http_base_url(struct http_url *http_url)
 {
@@ -128,27 +124,24 @@ void solr_connection_deinit(struct solr_connection **_conn)
 
 static void solr_connection_payload_input(struct solr_lookup_context *lctx)
 {
-       const unsigned char *data;
-       size_t size;
        int ret;
 
        /* read payload */
-       while ((ret = i_stream_read_more(lctx->payload, &data, &size)) > 0) {
-               (void)solr_xml_parse(lctx->parser, data, size, FALSE);
-               i_stream_skip(lctx->payload, size);
-       }
+       ret = solr_response_parse(lctx->parser, &lctx->results);
 
        if (ret == 0) {
                /* we will be called again for more data */
        } else {
                if (lctx->payload->stream_errno != 0) {
+                       i_assert(ret < 0);
                        i_error("fts_solr: "
                                "failed to read payload from HTTP server: %s",
                                i_stream_get_error(lctx->payload));
-                       lctx->request_status = -1;
                }
+               if (ret < 0)
+                       lctx->request_status = -1;
+               solr_response_parser_deinit(&lctx->parser);
                io_remove(&lctx->io);
-               i_stream_unref(&lctx->payload);
        }
 }
 
@@ -169,7 +162,8 @@ solr_connection_select_response(const struct http_response *response,
                return;
        }
 
-       i_stream_ref(response->payload);
+       lctx->parser = solr_response_parser_init(lctx->result_pool,
+                                                response->payload);
        lctx->payload = response->payload;
        lctx->io = io_add_istream(response->payload,
                                  solr_connection_payload_input, lctx);
@@ -179,17 +173,12 @@ solr_connection_select_response(const struct http_response *response,
 int solr_connection_select(struct solr_connection *conn, const char *query,
                           pool_t pool, struct solr_result ***box_results_r)
 {
-       struct solr_response_parser parser;
        struct solr_lookup_context lctx;
        struct http_client_request *http_req;
        const char *url;
-       int parse_ret;
 
        i_zero(&lctx);
-       lctx.conn = conn;
-
-       solr_response_parser_init(&parser, pool);
-       lctx.parser = &parser;
+       lctx.result_pool = pool;
 
        i_free_and_null(conn->http_failure);
        url = t_strconcat(conn->http_base_url, "select?", query, NULL);
@@ -209,16 +198,11 @@ int solr_connection_select(struct solr_connection *conn, const char *query,
        lctx.request_status = 0;
        http_client_wait(solr_http_client);
 
-       if (lctx.request_status < 0 ||
-           parser.content_state == SOLR_XML_CONTENT_STATE_ERROR)
+       if (lctx.request_status < 0)
                return -1;
 
-       parse_ret = solr_xml_parse(&parser, "", 0, TRUE);
-       solr_response_parser_deinit(&parser);
-
-       array_append_zero(&parser.results);
-       *box_results_r = array_front_modifiable(&parser.results);
-       return parse_ret;
+       *box_results_r = lctx.results;
+       return 0;
 }
 
 static void
index e0022c1ac64425169724bb0e902a909fc7220056..1a16b69e0a51fb13d14f97d6bbe518853091cc42 100644 (file)
@@ -1,6 +1,10 @@
 /* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
+#include "array.h"
+#include "hash.h"
+#include "str.h"
+#include "istream.h"
 #include "solr-response.h"
 
 #include <expat.h>
@@ -25,6 +29,7 @@ enum solr_xml_content_state {
 
 struct solr_response_parser {
        XML_Parser xml_parser;
+       struct istream *input;
 
        enum solr_xml_response_state state;
        enum solr_xml_content_state content_state;
@@ -284,10 +289,12 @@ static void solr_lookup_xml_data(void *context, const char *str, int len)
        }
 }
 
-void solr_response_parser_init(struct solr_response_parser *parser,
-                              pool_t result_pool)
+struct solr_response_parser *
+solr_response_parser_init(pool_t result_pool, struct istream *input)
 {
-       i_zero(parser);
+       struct solr_response_parser *parser;
+
+       parser = i_new(struct solr_response_parser, 1);
 
        parser->xml_parser = XML_ParserCreate("UTF-8");
        if (parser->xml_parser == NULL) {
@@ -299,17 +306,69 @@ void solr_response_parser_init(struct solr_response_parser *parser,
                          str_hash, strcmp);
 
        parser->result_pool = result_pool;
+       pool_ref(result_pool);
        p_array_init(&parser->results, result_pool, 32);
 
+       parser->input = input;
+       i_stream_ref(input);
+
        parser->xml_failed = FALSE;
        XML_SetElementHandler(parser->xml_parser,
                              solr_lookup_xml_start, solr_lookup_xml_end);
        XML_SetCharacterDataHandler(parser->xml_parser, solr_lookup_xml_data);
        XML_SetUserData(parser->xml_parser, parser);
+
+       return parser;
 }
 
-void solr_response_parser_deinit(struct solr_response_parser *parser)
+void solr_response_parser_deinit(struct solr_response_parser **_parser)
 {
+       struct solr_response_parser *parser = *_parser;
+
+       *_parser = NULL;
+
+       if (parser == NULL)
+               return;
+
        hash_table_destroy(&parser->mailboxes);
        XML_ParserFree(parser->xml_parser);
+       i_stream_unref(&parser->input);
+       pool_unref(&parser->result_pool);
+       i_free(parser);
+}
+
+int solr_response_parse(struct solr_response_parser *parser,
+                       struct solr_result ***box_results_r)
+{
+       const unsigned char *data;
+       size_t size;
+       int stream_errno, ret;
+
+       i_assert(parser->input != NULL);
+       i_zero(box_results_r);
+
+       /* read payload */
+       while ((ret = i_stream_read_more(parser->input, &data, &size)) > 0) {
+               (void)solr_xml_parse(parser, data, size, FALSE);
+               i_stream_skip(parser->input, size);
+       }
+
+       if (ret == 0) {
+               /* we will be called again for more data */
+               return 0;
+       }
+
+       stream_errno = parser->input->stream_errno;
+       i_stream_unref(&parser->input);
+
+       if (parser->content_state == SOLR_XML_CONTENT_STATE_ERROR)
+               return -1;
+       if (stream_errno != 0)
+               return -1;
+
+       ret = solr_xml_parse(parser, "", 0, TRUE);
+
+       array_append_zero(&parser->results);
+       *box_results_r = array_front_modifiable(&parser->results);
+       return (ret == 0 ? 1 : -1);
 }
index aabc9ba1162ae974c2ad63c5e4926b7f68cf5f3f..1d5cdd5ee25cfabc8ac1f952262baed6353ba820 100644 (file)
@@ -13,8 +13,11 @@ struct solr_result {
        ARRAY_TYPE(fts_score_map) scores;
 };
 
-void solr_response_parser_init(struct solr_response_parser *parser,
-                              pool_t result_pool);
-void solr_response_parser_deinit(struct solr_response_parser *parser);
+struct solr_response_parser *
+solr_response_parser_init(pool_t result_pool, struct istream *input);
+void solr_response_parser_deinit(struct solr_response_parser **_parser);
+
+int solr_response_parse(struct solr_response_parser *parser,
+                       struct solr_result ***box_results_r);
 
 #endif
diff --git a/src/plugins/fts-solr/test-solr-response.c b/src/plugins/fts-solr/test-solr-response.c
new file mode 100644 (file)
index 0000000..630632d
--- /dev/null
@@ -0,0 +1,277 @@
+/* Copyright (c) 2019 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "array.h"
+#include "istream.h"
+#include "solr-response.h"
+#include "test-common.h"
+
+#include <unistd.h>
+
+static bool debug = FALSE;
+
+struct solr_response_test_result {
+       const char *box_id;
+       struct fts_score_map *scores;
+};
+
+struct solr_response_test {
+       const char *input;
+
+       struct solr_response_test_result *results;
+};
+
+struct fts_score_map test_results1_scores[] = {
+       { .score = 0.042314477, .uid = 1 },
+       { .score = 0.06996078, .uid = 2, },
+       { .score = 0.020381179, .uid = 3 },
+       { .score = 0.020381179, .uid = 4 },
+       { .score = 5.510487E-4, .uid = 6 },
+       { .score = 0.0424253, .uid = 7 },
+       { .score = 0.04215967, .uid = 8 },
+       { .score = 0.02470572, .uid = 9 },
+       { .score = 0.05936369, .uid = 10 },
+       { .score = 0.048221838, .uid = 11 },
+       { .score = 7.793006E-4, .uid = 12 },
+       { .score = 2.7900032E-4, .uid = 13 },
+       { .score = 0.02088323, .uid = 14 },
+       { .score = 0.011646388, .uid = 15 },
+       { .score = 1.3776218E-4, .uid = 17 },
+       { .score = 2.386111E-4, .uid = 19 },
+       { .score = 2.7552436E-4, .uid = 20 },
+       { .score = 4.772222E-4, .uid = 23 },
+       { .score = 4.772222E-4, .uid = 24 },
+       { .score = 5.965277E-4, .uid = 25 },
+       { .score = 0.0471366, .uid = 26 },
+       { .score = 0.0471366, .uid = 50 },
+       { .score = 0.047274362, .uid = 51 },
+       { .score = 0.053303234, .uid = 56 },
+       { .score = 5.445528E-4, .uid = 62 },
+       { .score = 2.922377E-4, .uid = 66 },
+       { .score = 0.02623833, .uid = 68 },
+       { .score = 3.4440547E-4, .uid = 70 },
+       { .score = 2.922377E-4, .uid = 74 },
+       { .score = 2.7552436E-4, .uid = 76 },
+       { .score = 1.3776218E-4, .uid = 77 },
+       { .score = 0, .uid = 0 },
+};
+
+struct solr_response_test_result test_results1[] = {
+       {
+               .box_id = "",
+               .scores = test_results1_scores,
+       },
+       {
+               .box_id = NULL
+       }
+};
+
+static const struct solr_response_test tests[] = {
+       {
+               .input =
+                       "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+                       "<response>\n"
+                       "<lst name=\"responseHeader\"><int name=\"status\""
+                       ">0</int><int name=\"QTime\">3</int><lst name=\"pa"
+                       "rams\"><str name=\"wt\">xml</str><str name=\"fl\""
+                       ">uid,score</str><str name=\"rows\">4023</str><str"
+                       " name=\"sort\">uid asc</str><str name=\"q\">{!luc"
+                       "ene q.op=AND}subject:pierreserveur OR from:pierre"
+                       "serveur OR to:pierreserveur OR cc:pierreserveur O"
+                       "R bcc:pierreserveur OR body:pierreserveur</str><s"
+                       "tr name=\"fq\">+box:fa74101044cb607d5f0900001de14"
+                       "712 +user:jpierreserveur</str></lst></lst><result"
+                       " name=\"response\" numFound=\"31\" start=\"0\" ma"
+                       "xScore=\"0.06996078\"><doc><float name=\"score\">"
+                       "0.042314477</float><long name=\"uid\">1</long></d"
+                       "oc><doc><float name=\"score\">0.06996078</float><"
+                       "long name=\"uid\">2</long></doc><doc><float name="
+                       "\"score\">0.020381179</float><long name=\"uid\">3"
+                       "</long></doc><doc><float name=\"score\">0.0203811"
+                       "79</float><long name=\"uid\">4</long></doc><doc><"
+                       "float name=\"score\">5.510487E-4</float><long nam"
+                       "e=\"uid\">6</long></doc><doc><float name=\"score\""
+                       ">0.0424253</float><long name=\"uid\">7</long></do"
+                       "c><doc><float name=\"score\">0.04215967</float><l"
+                       "ong name=\"uid\">8</long></doc><doc><float name=\""
+                       "score\">0.02470572</float><long name=\"uid\">9</l"
+                       "ong></doc><doc><float name=\"score\">0.05936369</"
+                       "float><long name=\"uid\">10</long></doc><doc><flo"
+                       "at name=\"score\">0.048221838</float><long name=\""
+                       "uid\">11</long></doc><doc><float name=\"score\">7"
+                       ".793006E-4</float><long name=\"uid\">12</long></d"
+                       "oc><doc><float name=\"score\">2.7900032E-4</float"
+                       "><long name=\"uid\">13</long></doc><doc><float na"
+                       "me=\"score\">0.02088323</float><long name=\"uid\""
+                       ">14</long></doc><doc><float name=\"score\">0.0116"
+                       "46388</float><long name=\"uid\">15</long></doc><d"
+                       "oc><float name=\"score\">1.3776218E-4</float><lon"
+                       "g name=\"uid\">17</long></doc><doc><float name=\""
+                       "score\">2.386111E-4</float><long name=\"uid\">19<"
+                       "/long></doc><doc><float name=\"score\">2.7552436E"
+                       "-4</float><long name=\"uid\">20</long></doc><doc>"
+                       "<float name=\"score\">4.772222E-4</float><long na"
+                       "me=\"uid\">23</long></doc><doc><float name=\"scor"
+                       "e\">4.772222E-4</float><long name=\"uid\">24</lon"
+                       "g></doc><doc><float name=\"score\">5.965277E-4</f"
+                       "loat><long name=\"uid\">25</long></doc><doc><floa"
+                       "t name=\"score\">0.0471366</float><long name=\"ui"
+                       "d\">26</long></doc><doc><float name=\"score\">0.0"
+                       "471366</float><long name=\"uid\">50</long></doc><"
+                       "doc><float name=\"score\">0.047274362</float><lon"
+                       "g name=\"uid\">51</long></doc><doc><float name=\""
+                       "score\">0.053303234</float><long name=\"uid\">56<"
+                       "/long></doc><doc><float name=\"score\">5.445528E-"
+                       "4</float><long name=\"uid\">62</long></doc><doc><"
+                       "float name=\"score\">2.922377E-4</float><long nam"
+                       "e=\"uid\">66</long></doc><doc><float name=\"score"
+                       "\">0.02623833</float><long name=\"uid\">68</long>"
+                       "</doc><doc><float name=\"score\">3.4440547E-4</fl"
+                       "oat><long name=\"uid\">70</long></doc><doc><float"
+                       " name=\"score\">2.922377E-4</float><long name=\"u"
+                       "id\">74</long></doc><doc><float name=\"score\">2."
+                       "7552436E-4</float><long name=\"uid\">76</long></d"
+                       "oc><doc><float name=\"score\">1.3776218E-4</float"
+                       "><long name=\"uid\">77</long></doc></result>\n"
+                       "</response>\n",
+               .results = test_results1,
+       },
+};
+
+static const unsigned tests_count = N_ELEMENTS(tests);
+
+static void
+test_solr_result(const struct solr_response_test_result *test_results,
+                struct solr_result **parse_results)
+{
+       unsigned int rcount, i;
+
+       for (i = 0; test_results[i].box_id != NULL; i++);
+       rcount = i;
+
+       for (i = 0; parse_results[i] != NULL; i++);
+
+       test_out_quiet("result count equal", i == rcount);
+       if (test_has_failed())
+               return;
+
+       for (i = 0; i < rcount; i++) {
+               unsigned int scount, j;
+               const struct fts_score_map *tscores = test_results[i].scores;
+               const struct fts_score_map *pscores =
+                       array_get(&parse_results[i]->scores, &scount);
+
+               test_out_quiet(t_strdup_printf("box id equal[%u]", i),
+                              strcmp(test_results[i].box_id,
+                                     parse_results[i]->box_id) == 0);
+
+               for (j = 0; tscores[j].uid != 0; j++);
+               test_out_quiet(t_strdup_printf("scores count equal[%u]", i),
+                              j == scount);
+               if (j != scount)
+                       continue;
+
+               for (j = 0; j < scount; j++) {
+                       test_out_quiet(
+                               t_strdup_printf("score uid equal[%u/%u]", i, j),
+                               pscores[j].uid == tscores[j].uid);
+                       test_out_quiet(
+                               t_strdup_printf("score value equal[%u/%u]", i, j),
+                               pscores[j].score == tscores[j].score);
+               }
+       }
+}
+
+static void test_solr_response_parser(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < tests_count; i++) T_BEGIN {
+               const struct solr_response_test *test;
+               const char *text;
+               unsigned int text_len;
+               struct istream *input;
+               struct solr_response_parser *parser;
+               struct solr_result **box_results;
+               const char *error = NULL;
+               pool_t pool;
+               int ret = 0;
+
+               test = &tests[i];
+               text = test->input;
+               text_len = strlen(text);
+
+               test_begin(t_strdup_printf("solr response [%d]", i));
+
+               input = test_istream_create_data(text, text_len);
+               pool = pool_alloconly_create("solr response", 4096);
+               parser = solr_response_parser_init(pool, input);
+
+               ret = solr_response_parse(parser, &box_results);
+
+               test_out_reason("parse ok (buffer)", ret > 0, error);
+               if (ret > 0)
+                       test_solr_result(test->results, box_results);
+
+               solr_response_parser_deinit(&parser);
+               pool_unref(&pool);
+               i_stream_unref(&input);
+
+               test_end();
+
+       } T_END;
+}
+
+static void test_solr_response_file(const char *file)
+{
+       pool_t pool;
+       struct istream *input;
+       struct solr_response_parser *parser;
+       struct solr_result **box_results;
+       int ret = 0;
+
+       pool = pool_alloconly_create("solr response", 4096);
+       input = i_stream_create_file(file, 1024);
+       parser = solr_response_parser_init(pool, input);
+
+       while ((ret = solr_response_parse(parser, &box_results)) == 0);
+
+       if (ret < 0)
+               i_fatal("Failed to read response");
+
+       solr_response_parser_deinit(&parser);
+       i_stream_unref(&input);
+       pool_unref(&pool);
+}
+
+int main(int argc, char *argv[])
+{
+       int c;
+
+       static void (*test_functions[])(void) = {
+               test_solr_response_parser,
+               NULL
+       };
+
+       while ((c = getopt(argc, argv, "D")) > 0) {
+               switch (c) {
+               case 'D':
+                       debug = TRUE;
+                       break;
+               default:
+                       i_fatal("Usage: %s [-D]", argv[0]);
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc > 0) {
+               test_solr_response_file(argv[0]);
+               return 0;
+       }
+
+       return test_run(test_functions);
+}
+
+