From: Stephan Bosch Date: Tue, 8 Oct 2019 16:57:00 +0000 (+0200) Subject: plugins: fts-solr: solr-response - Make lookup response parser a separate entity. X-Git-Tag: 2.3.10~148 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aecdefa5b09594f7271315afa72b1fe51983926c;p=thirdparty%2Fdovecot%2Fcore.git plugins: fts-solr: solr-response - Make lookup response parser a separate entity. This improves the structure of the code and allows writing a unit test for the parser, which is included in this commit. --- diff --git a/src/plugins/fts-solr/Makefile.am b/src/plugins/fts-solr/Makefile.am index e44452017f..cd1e17b8b9 100644 --- a/src/plugins/fts-solr/Makefile.am +++ b/src/plugins/fts-solr/Makefile.am @@ -26,12 +26,39 @@ lib21_fts_solr_plugin_la_SOURCES = \ fts-backend-solr.c \ fts-backend-solr-old.c \ fts-solr-plugin.c \ + solr-response.c \ solr-connection.c -EXTRA_DIST = \ - solr-response.c - noinst_HEADERS = \ fts-solr-plugin.h \ solr-response.h \ solr-connection.h + +test_programs = \ + test-solr-response + +test_libs = \ + ../../lib-test/libtest.la \ + ../../lib-charset/libcharset.la \ + ../../lib/liblib.la \ + $(MODULE_LIBS) + +noinst_PROGRAMS = test-solr-response + +test_solr_response_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_srcdir)/src/lib-test +test_solr_response_SOURCES = \ + solr-response.c \ + test-solr-response.c +test_solr_response_LDADD = \ + $(test_libs) -lexpat + +pkginc_libdir=$(pkgincludedir) +pkginc_lib_HEADERS = $(headers) + +check: check-am check-test +check-test: all-am + for bin in $(test_programs); do \ + if ! $(RUN_TEST) ./$$bin; then exit 1; fi; \ + done diff --git a/src/plugins/fts-solr/solr-connection.c b/src/plugins/fts-solr/solr-connection.c index 4ee5482161..ae720b5e28 100644 --- a/src/plugins/fts-solr/solr-connection.c +++ b/src/plugins/fts-solr/solr-connection.c @@ -14,17 +14,15 @@ #include -struct solr_response_parser; - struct solr_lookup_context { - struct solr_connection *conn; - + pool_t result_pool; struct istream *payload; struct io *io; int request_status; struct solr_response_parser *parser; + struct solr_result **results; }; struct solr_connection_post { @@ -49,8 +47,6 @@ struct solr_connection { bool http_ssl:1; }; -#include "solr-response.c" - /* Regardless of the specified URL, make sure path ends in '/' */ static char *solr_connection_create_http_base_url(struct http_url *http_url) { @@ -128,27 +124,24 @@ void solr_connection_deinit(struct solr_connection **_conn) static void solr_connection_payload_input(struct solr_lookup_context *lctx) { - const unsigned char *data; - size_t size; int ret; /* read payload */ - while ((ret = i_stream_read_more(lctx->payload, &data, &size)) > 0) { - (void)solr_xml_parse(lctx->parser, data, size, FALSE); - i_stream_skip(lctx->payload, size); - } + ret = solr_response_parse(lctx->parser, &lctx->results); if (ret == 0) { /* we will be called again for more data */ } else { if (lctx->payload->stream_errno != 0) { + i_assert(ret < 0); i_error("fts_solr: " "failed to read payload from HTTP server: %s", i_stream_get_error(lctx->payload)); - lctx->request_status = -1; } + if (ret < 0) + lctx->request_status = -1; + solr_response_parser_deinit(&lctx->parser); io_remove(&lctx->io); - i_stream_unref(&lctx->payload); } } @@ -169,7 +162,8 @@ solr_connection_select_response(const struct http_response *response, return; } - i_stream_ref(response->payload); + lctx->parser = solr_response_parser_init(lctx->result_pool, + response->payload); lctx->payload = response->payload; lctx->io = io_add_istream(response->payload, solr_connection_payload_input, lctx); @@ -179,17 +173,12 @@ solr_connection_select_response(const struct http_response *response, int solr_connection_select(struct solr_connection *conn, const char *query, pool_t pool, struct solr_result ***box_results_r) { - struct solr_response_parser parser; struct solr_lookup_context lctx; struct http_client_request *http_req; const char *url; - int parse_ret; i_zero(&lctx); - lctx.conn = conn; - - solr_response_parser_init(&parser, pool); - lctx.parser = &parser; + lctx.result_pool = pool; i_free_and_null(conn->http_failure); url = t_strconcat(conn->http_base_url, "select?", query, NULL); @@ -209,16 +198,11 @@ int solr_connection_select(struct solr_connection *conn, const char *query, lctx.request_status = 0; http_client_wait(solr_http_client); - if (lctx.request_status < 0 || - parser.content_state == SOLR_XML_CONTENT_STATE_ERROR) + if (lctx.request_status < 0) return -1; - parse_ret = solr_xml_parse(&parser, "", 0, TRUE); - solr_response_parser_deinit(&parser); - - array_append_zero(&parser.results); - *box_results_r = array_front_modifiable(&parser.results); - return parse_ret; + *box_results_r = lctx.results; + return 0; } static void diff --git a/src/plugins/fts-solr/solr-response.c b/src/plugins/fts-solr/solr-response.c index e0022c1ac6..1a16b69e0a 100644 --- a/src/plugins/fts-solr/solr-response.c +++ b/src/plugins/fts-solr/solr-response.c @@ -1,6 +1,10 @@ /* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" +#include "array.h" +#include "hash.h" +#include "str.h" +#include "istream.h" #include "solr-response.h" #include @@ -25,6 +29,7 @@ enum solr_xml_content_state { struct solr_response_parser { XML_Parser xml_parser; + struct istream *input; enum solr_xml_response_state state; enum solr_xml_content_state content_state; @@ -284,10 +289,12 @@ static void solr_lookup_xml_data(void *context, const char *str, int len) } } -void solr_response_parser_init(struct solr_response_parser *parser, - pool_t result_pool) +struct solr_response_parser * +solr_response_parser_init(pool_t result_pool, struct istream *input) { - i_zero(parser); + struct solr_response_parser *parser; + + parser = i_new(struct solr_response_parser, 1); parser->xml_parser = XML_ParserCreate("UTF-8"); if (parser->xml_parser == NULL) { @@ -299,17 +306,69 @@ void solr_response_parser_init(struct solr_response_parser *parser, str_hash, strcmp); parser->result_pool = result_pool; + pool_ref(result_pool); p_array_init(&parser->results, result_pool, 32); + parser->input = input; + i_stream_ref(input); + parser->xml_failed = FALSE; XML_SetElementHandler(parser->xml_parser, solr_lookup_xml_start, solr_lookup_xml_end); XML_SetCharacterDataHandler(parser->xml_parser, solr_lookup_xml_data); XML_SetUserData(parser->xml_parser, parser); + + return parser; } -void solr_response_parser_deinit(struct solr_response_parser *parser) +void solr_response_parser_deinit(struct solr_response_parser **_parser) { + struct solr_response_parser *parser = *_parser; + + *_parser = NULL; + + if (parser == NULL) + return; + hash_table_destroy(&parser->mailboxes); XML_ParserFree(parser->xml_parser); + i_stream_unref(&parser->input); + pool_unref(&parser->result_pool); + i_free(parser); +} + +int solr_response_parse(struct solr_response_parser *parser, + struct solr_result ***box_results_r) +{ + const unsigned char *data; + size_t size; + int stream_errno, ret; + + i_assert(parser->input != NULL); + i_zero(box_results_r); + + /* read payload */ + while ((ret = i_stream_read_more(parser->input, &data, &size)) > 0) { + (void)solr_xml_parse(parser, data, size, FALSE); + i_stream_skip(parser->input, size); + } + + if (ret == 0) { + /* we will be called again for more data */ + return 0; + } + + stream_errno = parser->input->stream_errno; + i_stream_unref(&parser->input); + + if (parser->content_state == SOLR_XML_CONTENT_STATE_ERROR) + return -1; + if (stream_errno != 0) + return -1; + + ret = solr_xml_parse(parser, "", 0, TRUE); + + array_append_zero(&parser->results); + *box_results_r = array_front_modifiable(&parser->results); + return (ret == 0 ? 1 : -1); } diff --git a/src/plugins/fts-solr/solr-response.h b/src/plugins/fts-solr/solr-response.h index aabc9ba116..1d5cdd5ee2 100644 --- a/src/plugins/fts-solr/solr-response.h +++ b/src/plugins/fts-solr/solr-response.h @@ -13,8 +13,11 @@ struct solr_result { ARRAY_TYPE(fts_score_map) scores; }; -void solr_response_parser_init(struct solr_response_parser *parser, - pool_t result_pool); -void solr_response_parser_deinit(struct solr_response_parser *parser); +struct solr_response_parser * +solr_response_parser_init(pool_t result_pool, struct istream *input); +void solr_response_parser_deinit(struct solr_response_parser **_parser); + +int solr_response_parse(struct solr_response_parser *parser, + struct solr_result ***box_results_r); #endif diff --git a/src/plugins/fts-solr/test-solr-response.c b/src/plugins/fts-solr/test-solr-response.c new file mode 100644 index 0000000000..630632d8f4 --- /dev/null +++ b/src/plugins/fts-solr/test-solr-response.c @@ -0,0 +1,277 @@ +/* Copyright (c) 2019 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "array.h" +#include "istream.h" +#include "solr-response.h" +#include "test-common.h" + +#include + +static bool debug = FALSE; + +struct solr_response_test_result { + const char *box_id; + struct fts_score_map *scores; +}; + +struct solr_response_test { + const char *input; + + struct solr_response_test_result *results; +}; + +struct fts_score_map test_results1_scores[] = { + { .score = 0.042314477, .uid = 1 }, + { .score = 0.06996078, .uid = 2, }, + { .score = 0.020381179, .uid = 3 }, + { .score = 0.020381179, .uid = 4 }, + { .score = 5.510487E-4, .uid = 6 }, + { .score = 0.0424253, .uid = 7 }, + { .score = 0.04215967, .uid = 8 }, + { .score = 0.02470572, .uid = 9 }, + { .score = 0.05936369, .uid = 10 }, + { .score = 0.048221838, .uid = 11 }, + { .score = 7.793006E-4, .uid = 12 }, + { .score = 2.7900032E-4, .uid = 13 }, + { .score = 0.02088323, .uid = 14 }, + { .score = 0.011646388, .uid = 15 }, + { .score = 1.3776218E-4, .uid = 17 }, + { .score = 2.386111E-4, .uid = 19 }, + { .score = 2.7552436E-4, .uid = 20 }, + { .score = 4.772222E-4, .uid = 23 }, + { .score = 4.772222E-4, .uid = 24 }, + { .score = 5.965277E-4, .uid = 25 }, + { .score = 0.0471366, .uid = 26 }, + { .score = 0.0471366, .uid = 50 }, + { .score = 0.047274362, .uid = 51 }, + { .score = 0.053303234, .uid = 56 }, + { .score = 5.445528E-4, .uid = 62 }, + { .score = 2.922377E-4, .uid = 66 }, + { .score = 0.02623833, .uid = 68 }, + { .score = 3.4440547E-4, .uid = 70 }, + { .score = 2.922377E-4, .uid = 74 }, + { .score = 2.7552436E-4, .uid = 76 }, + { .score = 1.3776218E-4, .uid = 77 }, + { .score = 0, .uid = 0 }, +}; + +struct solr_response_test_result test_results1[] = { + { + .box_id = "", + .scores = test_results1_scores, + }, + { + .box_id = NULL + } +}; + +static const struct solr_response_test tests[] = { + { + .input = + "\n" + "\n" + "03xmluid,score4023uid asc{!luc" + "ene q.op=AND}subject:pierreserveur OR from:pierre" + "serveur OR to:pierreserveur OR cc:pierreserveur O" + "R bcc:pierreserveur OR body:pierreserveur+box:fa74101044cb607d5f0900001de14" + "712 +user:jpierreserveur" + "0.04231447710.06996078<" + "long name=\"uid\">20.0203811793" + "0.0203811" + "794<" + "float name=\"score\">5.510487E-460.042425370.0421596780.0247057290.05936369100.048221838117" + ".793006E-4122.7900032E-4130.02088323140.0116" + "46388151.3776218E-4172.386111E-419<" + "/long>2.7552436E" + "-420" + "4.772222E-4234.772222E-4245.965277E-4250.0471366260.0" + "47136650<" + "doc>0.047274362510.05330323456<" + "/long>5.445528E-" + "462<" + "float name=\"score\">2.922377E-4660.0262383368" + "3.4440547E-4702.922377E-4742." + "7552436E-4761.3776218E-477\n" + "\n", + .results = test_results1, + }, +}; + +static const unsigned tests_count = N_ELEMENTS(tests); + +static void +test_solr_result(const struct solr_response_test_result *test_results, + struct solr_result **parse_results) +{ + unsigned int rcount, i; + + for (i = 0; test_results[i].box_id != NULL; i++); + rcount = i; + + for (i = 0; parse_results[i] != NULL; i++); + + test_out_quiet("result count equal", i == rcount); + if (test_has_failed()) + return; + + for (i = 0; i < rcount; i++) { + unsigned int scount, j; + const struct fts_score_map *tscores = test_results[i].scores; + const struct fts_score_map *pscores = + array_get(&parse_results[i]->scores, &scount); + + test_out_quiet(t_strdup_printf("box id equal[%u]", i), + strcmp(test_results[i].box_id, + parse_results[i]->box_id) == 0); + + for (j = 0; tscores[j].uid != 0; j++); + test_out_quiet(t_strdup_printf("scores count equal[%u]", i), + j == scount); + if (j != scount) + continue; + + for (j = 0; j < scount; j++) { + test_out_quiet( + t_strdup_printf("score uid equal[%u/%u]", i, j), + pscores[j].uid == tscores[j].uid); + test_out_quiet( + t_strdup_printf("score value equal[%u/%u]", i, j), + pscores[j].score == tscores[j].score); + } + } +} + +static void test_solr_response_parser(void) +{ + unsigned int i; + + for (i = 0; i < tests_count; i++) T_BEGIN { + const struct solr_response_test *test; + const char *text; + unsigned int text_len; + struct istream *input; + struct solr_response_parser *parser; + struct solr_result **box_results; + const char *error = NULL; + pool_t pool; + int ret = 0; + + test = &tests[i]; + text = test->input; + text_len = strlen(text); + + test_begin(t_strdup_printf("solr response [%d]", i)); + + input = test_istream_create_data(text, text_len); + pool = pool_alloconly_create("solr response", 4096); + parser = solr_response_parser_init(pool, input); + + ret = solr_response_parse(parser, &box_results); + + test_out_reason("parse ok (buffer)", ret > 0, error); + if (ret > 0) + test_solr_result(test->results, box_results); + + solr_response_parser_deinit(&parser); + pool_unref(&pool); + i_stream_unref(&input); + + test_end(); + + } T_END; +} + +static void test_solr_response_file(const char *file) +{ + pool_t pool; + struct istream *input; + struct solr_response_parser *parser; + struct solr_result **box_results; + int ret = 0; + + pool = pool_alloconly_create("solr response", 4096); + input = i_stream_create_file(file, 1024); + parser = solr_response_parser_init(pool, input); + + while ((ret = solr_response_parse(parser, &box_results)) == 0); + + if (ret < 0) + i_fatal("Failed to read response"); + + solr_response_parser_deinit(&parser); + i_stream_unref(&input); + pool_unref(&pool); +} + +int main(int argc, char *argv[]) +{ + int c; + + static void (*test_functions[])(void) = { + test_solr_response_parser, + NULL + }; + + while ((c = getopt(argc, argv, "D")) > 0) { + switch (c) { + case 'D': + debug = TRUE; + break; + default: + i_fatal("Usage: %s [-D]", argv[0]); + } + } + argc -= optind; + argv += optind; + + if (argc > 0) { + test_solr_response_file(argv[0]); + return 0; + } + + return test_run(test_functions); +} + +