]> git.ipfire.org Git - thirdparty/tor.git/commitdiff
Fix cases where edge connections can stall.
authorDavid Goulet <dgoulet@torproject.org>
Sun, 2 Apr 2023 21:18:25 +0000 (21:18 +0000)
committerMike Perry <mikeperry-git@torproject.org>
Thu, 6 Apr 2023 15:57:11 +0000 (15:57 +0000)
We discovered two cases where edge connections can stall during testing:
  1. Due to final data sitting in the edge inbuf when it was resumed
  2. Due to flag synchronization between the token bucket and XON/XOFF

The first issue has always existed in C-Tor, but we were able to tickle it
in scp testing. If the last data from the protocol is able to fit in the
inbuf, but not large enough to send, if an XOFF or connection block comes in
at exactly that point, when the edge connection resumes, there will be no
data to read from the socket, but the inbuf can just sit there, never
draining.

We noticed the second issue along the way to finding the first. It seems
wrong, but it didn't seem to affect anything in practice.

These are extremely rare in normal operation, but with conflux, XON/XOFF
activity is more common, so we hit these.

Signed-off-by: David Goulet <dgoulet@torproject.org>
src/core/mainloop/connection.c
src/core/mainloop/connection.h
src/core/mainloop/mainloop.c

index 7204b69e54b44e2ccaa14eb9d1384ec86aaf1d76..5a769f38bec6f45e5c1a4aee47f3c401af37df5b 100644 (file)
@@ -187,7 +187,6 @@ static int connection_reached_eof(connection_t *conn);
 static int connection_buf_read_from_socket(connection_t *conn,
                                            ssize_t *max_to_read,
                                            int *socket_error);
-static int connection_process_inbuf(connection_t *conn, int package_partial);
 static void client_check_address_changed(tor_socket_t sock);
 static void set_constrained_socket_buffers(tor_socket_t sock, int size);
 
@@ -3744,9 +3743,16 @@ void
 connection_read_bw_exhausted(connection_t *conn, bool is_global_bw)
 {
   (void)is_global_bw;
-  conn->read_blocked_on_bw = 1;
-  connection_stop_reading(conn);
-  reenable_blocked_connection_schedule();
+  // Double-calls to stop-reading are correlated with stalling for
+  // ssh uploads. Might as well prevent this from happening,
+  // especially the read_blocked_on_bw flag. That was clearly getting
+  // set when it should not be, during an already-blocked XOFF
+  // condition.
+  if (!CONN_IS_EDGE(conn) || !TO_EDGE_CONN(conn)->xoff_received) {
+    conn->read_blocked_on_bw = 1;
+    connection_stop_reading(conn);
+    reenable_blocked_connection_schedule();
+  }
 }
 
 /**
@@ -3923,10 +3929,17 @@ reenable_blocked_connections_cb(mainloop_event_t *ev, void *arg)
   (void)ev;
   (void)arg;
   SMARTLIST_FOREACH_BEGIN(get_connection_array(), connection_t *, conn) {
-    if (conn->read_blocked_on_bw == 1) {
+    /* For conflux, we noticed logs of connection_start_reading() called
+     * multiple times while we were blocked from a previous XOFF, and this
+     * was log was correlated with stalls during ssh uploads. So we added
+     * this additional check, to avoid connection_start_reading() without
+     * getting an XON. The most important piece is always allowing
+     * the read_blocked_on_bw to get cleared, either way. */
+    if (conn->read_blocked_on_bw == 1 &&
+        (!CONN_IS_EDGE(conn) || !TO_EDGE_CONN(conn)->xoff_received)) {
       connection_start_reading(conn);
-      conn->read_blocked_on_bw = 0;
     }
+    conn->read_blocked_on_bw = 0;
     if (conn->write_blocked_on_bw == 1) {
       connection_start_writing(conn);
       conn->write_blocked_on_bw = 0;
@@ -5198,7 +5211,7 @@ set_constrained_socket_buffers(tor_socket_t sock, int size)
  * connection_*_process_inbuf() function. It also passes in
  * package_partial if wanted.
  */
-static int
+int
 connection_process_inbuf(connection_t *conn, int package_partial)
 {
   tor_assert(conn);
index 8b378b15a4fe1ddaf3cb9b203535da7499c083bb..88bf47a4976f83b7f58de68657873cf4d9f54e39 100644 (file)
@@ -256,6 +256,7 @@ int connection_wants_to_flush(struct connection_t *conn);
 int connection_outbuf_too_full(struct connection_t *conn);
 int connection_handle_write(struct connection_t *conn, int force);
 int connection_flush(struct connection_t *conn);
+int connection_process_inbuf(struct connection_t *conn, int package_partial);
 
 MOCK_DECL(void, connection_write_to_buf_impl_,
           (const char *string, size_t len, struct connection_t *conn,
index e1c9786b2ecf9062c1dfe562b7d7d8640dd4f068..3702f3a74ed19d6ce5dd31dc20dcb1f8587478f6 100644 (file)
@@ -645,6 +645,16 @@ connection_start_reading,(connection_t *conn))
                "to watched: %s",
                (int)conn->s,
                tor_socket_strerror(tor_socket_errno(conn->s)));
+
+    /* Process the inbuf if it is not empty because the only way to empty it is
+     * through a read event or a SENDME which might not come if the package
+     * window is proper or if the application has nothing more for us to read.
+     *
+     * If this is not done here, we risk having data lingering in the inbuf
+     * forever. */
+    if (conn->inbuf && buf_datalen(conn->inbuf) > 0) {
+      connection_process_inbuf(conn, 1);
+    }
   }
 }