]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-http: client: Fixed peer reconnection failure handling.
authorStephan Bosch <stephan.bosch@dovecot.fi>
Thu, 2 Feb 2017 00:36:50 +0000 (01:36 +0100)
committerGitLab <gitlab@git.dovecot.net>
Thu, 2 Feb 2017 17:07:15 +0000 (19:07 +0200)
The addressed problem occurs in a very specific situation in which the original successful connection is dropped, yet a new connection fails.
It manifests as an assertion failure or panic:

Panic: file ioloop-epoll.c: line 189 (io_loop_handler_run_internal): assertion failed: (msecs >= 0)
Panic: BUG: No IOs or timeouts set. Not waiting for infinity.

The timing is very critical. However, this doesn't mean that the occurrence of this problem is very unlikely; it can happen frequently under high load.

src/lib-http/http-client-queue.c
src/lib-http/test-http-client-errors.c

index bf1abb538da18b8c477815b1cb197d2e25ca3159..ba3e668d5f6bcb2c04ab9279d1e662d3da943890 100644 (file)
@@ -478,18 +478,6 @@ http_client_queue_connection_failure(struct http_client_queue *queue,
        struct http_client_peer *failed_peer;
        struct http_client_peer *const *peer_idx;
 
-       if (queue->cur_peer != NULL) {
-               /* The peer still has some working connections, which means that
-                  pending requests wait until they're picked up by those connections
-                  or the remaining connections fail as well. In the latter case,
-                  connecting to different peer can resolve the situation, but only
-                  if there is more than one IP. In any other case, the requests will
-                  eventually fail. In the future we could start connections to the next
-                  IP at this point already, but that is no small change. */
-               i_assert(array_count(&queue->pending_peers) == 0);
-               return;
-       }
-
        http_client_queue_debug(queue,
                "Failed to set up connection to %s%s: %s "
                "(%u peers pending, %u requests pending)",
@@ -499,25 +487,44 @@ http_client_queue_connection_failure(struct http_client_queue *queue,
                reason, array_count(&queue->pending_peers),
                array_count(&queue->requests));
 
-       /* we're still doing the initial connections to this hport. if
-                we're also doing parallel connections with soft timeouts
-                (pending_peer_count>1), wait for them to finish
-                first. */
-       failed_peer = NULL;
-       array_foreach(&queue->pending_peers, peer_idx) {
-               if (http_client_peer_addr_cmp(&(*peer_idx)->addr, addr) == 0) {
-                       failed_peer = *peer_idx;
-                       array_delete(&queue->pending_peers,
-                               array_foreach_idx(&queue->pending_peers, peer_idx), 1);
-                       break;
+       if (queue->cur_peer != NULL) {
+               if (http_client_peer_is_connected(queue->cur_peer)) {
+                       /* The peer still has some working connections, which means that
+                                pending requests wait until they're picked up by those connections
+                                or the remaining connections fail as well. In the latter case,
+                                connecting to different peer can resolve the situation, but only
+                                if there is more than one IP. In any other case, the requests will
+                                eventually fail. In the future we could start connections to the next
+                                IP at this point already, but that is no small change. */
+                       i_assert(array_count(&queue->pending_peers) == 0);
+                       return;
+               }
+
+               failed_peer = queue->cur_peer;
+               http_client_peer_unlink_queue(queue->cur_peer, queue);
+               queue->cur_peer = NULL;
+
+       } else {
+               /* we're still doing the initial connections to this hport. if
+                        we're also doing parallel connections with soft timeouts
+                        (pending_peer_count>1), wait for them to finish
+                        first. */
+               failed_peer = NULL;
+               array_foreach(&queue->pending_peers, peer_idx) {
+                       if (http_client_peer_addr_cmp(&(*peer_idx)->addr, addr) == 0) {
+                               failed_peer = *peer_idx;
+                               array_delete(&queue->pending_peers,
+                                       array_foreach_idx(&queue->pending_peers, peer_idx), 1);
+                               break;
+                       }
+               }
+               i_assert(failed_peer != NULL);
+               if (array_count(&queue->pending_peers) > 0) {
+                       http_client_queue_debug(queue,
+                               "Waiting for remaining pending peers.");
+                       http_client_peer_unlink_queue(failed_peer, queue);
+                       return;
                }
-       }
-       i_assert(failed_peer != NULL);
-       if (array_count(&queue->pending_peers) > 0) {
-               http_client_queue_debug(queue,
-                       "Waiting for remaining pending peers.");
-               http_client_peer_unlink_queue(failed_peer, queue);
-               return;
        }
 
        /* one of the connections failed. if we're not using soft timeouts,
index bae89ba136190a359d94d1bf88d6204dbb0fe707..59c6ebb892b2c686285becbd36b8492af4f618ca 100644 (file)
@@ -2453,6 +2453,157 @@ static void test_peer_reuse_failure(void)
        test_end();
 }
 
+/*
+ * Reconnect failure
+ */
+
+/* dns */
+
+static void
+test_dns_reconnect_failure_input(struct server_connection *conn)
+{
+       static unsigned int count = 0;
+       const char *line;
+
+       while ((line=i_stream_read_next_line(conn->conn.input)) != NULL) {
+               if (debug)
+                       i_debug("DNS REQUEST %u: %s", count, line);
+
+               if (count == 0) {
+                       o_stream_nsend_str(conn->conn.output,
+                               "0 1\n127.0.0.1\n");
+               } else {
+                       o_stream_nsend_str(conn->conn.output,
+                               t_strdup_printf("%d\n", EAI_FAIL));
+                       if (count > 4) {
+                               server_connection_deinit(&conn);
+                               return;
+                       }
+               }
+               count++;
+       }
+}
+
+static void test_dns_reconnect_failure(void)
+{
+       test_server_input = test_dns_reconnect_failure_input;
+       test_server_run(0);
+}
+/* server */
+
+static void
+test_reconnect_failure_input(struct server_connection *conn)
+{
+       static const char *resp =
+               "HTTP/1.1 200 OK\r\n"
+               "Content-Length: 18\r\n"
+               "\r\n"
+               "Everything is OK\r\n";
+
+       o_stream_nsend_str(conn->conn.output, resp);
+       i_close_fd(&fd_listen);
+       sleep(500);
+}
+
+static void test_server_reconnect_failure(unsigned int index)
+{
+       test_server_input = test_reconnect_failure_input;
+       test_server_run(index);
+}
+
+/* client */
+
+struct _reconnect_failure_ctx {
+       struct timeout *to;
+};
+
+static void
+test_client_reconnect_failure_response2(
+       const struct http_response *resp,
+       struct _reconnect_failure_ctx *ctx)
+{
+       if (debug)
+               i_debug("RESPONSE: %u %s", resp->status, resp->reason);
+
+       test_assert(resp->status == 9002);
+       test_assert(resp->reason != NULL && *resp->reason != '\0');
+
+       io_loop_stop(ioloop);
+       i_free(ctx);
+}
+
+static void
+test_client_reconnect_failure_next(
+       struct _reconnect_failure_ctx *ctx)
+{
+       struct http_client_request *hreq;
+
+       if (debug)
+               i_debug("NEXT REQUEST");
+
+       timeout_remove(&ctx->to);
+
+       hreq = http_client_request(http_client,
+               "GET", "example.com", "/reconnect-failure-2.txt",
+               test_client_reconnect_failure_response2, ctx);
+       http_client_request_set_port(hreq, bind_ports[0]);
+       http_client_request_submit(hreq);
+}
+
+static void
+test_client_reconnect_failure_response1(
+       const struct http_response *resp,
+       struct _reconnect_failure_ctx *ctx)
+{
+       if (debug)
+               i_debug("RESPONSE: %u %s", resp->status, resp->reason);
+
+       test_assert(resp->status == 200);
+       test_assert(resp->reason != NULL && *resp->reason != '\0');
+
+       ctx->to = timeout_add_short(999,
+               test_client_reconnect_failure_next, ctx);
+}
+
+static bool
+test_client_reconnect_failure(const struct http_client_settings *client_set)
+{
+       struct http_client_request *hreq;
+       struct _reconnect_failure_ctx *ctx;
+
+       ctx = i_new(struct _reconnect_failure_ctx, 1);
+       
+       http_client = http_client_init(client_set);
+
+       hreq = http_client_request(http_client,
+               "GET", "example.com", "/reconnect-failure-1.txt",
+               test_client_reconnect_failure_response1, ctx);
+       http_client_request_set_port(hreq, bind_ports[0]);
+       http_client_request_submit(hreq);
+
+       return TRUE;
+}
+
+/* test */
+
+static void test_reconnect_failure(void)
+{
+       struct http_client_settings http_client_set;
+
+       test_client_defaults(&http_client_set);
+       http_client_set.dns_client_socket_path = "./dns-test";
+       http_client_set.dns_ttl_msecs = 2000;
+       http_client_set.max_idle_time_msecs = 1000;
+       http_client_set.max_attempts = 1;
+       http_client_set.request_timeout_msecs = 1000;
+
+       test_begin("reconnect failure");
+       test_run_client_server(&http_client_set,
+               test_client_reconnect_failure,
+               test_server_reconnect_failure, 1,
+               test_dns_reconnect_failure);
+       test_end();
+}
 
 /*
  * All tests
@@ -2482,6 +2633,7 @@ static void (*const test_functions[])(void) = {
        test_dns_lookup_failure,
        test_dns_lookup_ttl,
        test_peer_reuse_failure,
+       test_reconnect_failure,
        NULL
 };