From e6a3ff699fecb7d6fd83c037119101be676d06f6 Mon Sep 17 00:00:00 2001
From: Alex Rousskov <rousskov@measurement-factory.com>
Date: Sat, 9 Dec 2023 04:46:55 +0000
Subject: [PATCH] Bug 5274: Successful tunnels logged as TCP_TUNNEL/500 (#1608)

Stop calling retryOrBail() when the tunneled Squid-server connection
(that we have committed to use) closes. Our retryOrBail() is dedicated
to handling errors. Most[^1] serverClosed() calls are _not_ related to
errors because our tunneling code abuses asynchronous connection closure
callbacks for TunnelStateData work termination. Depending on the
transaction details (e.g., TLS interception vs. true CONNECT), calling
retryOrBail() on these no-error code paths may result in retryOrBail()
"catch all other errors" code creating bogus ERR_CANNOT_FORWARD errors.

Most tunneling errors are already detailed, and retryOrBail() does not
have enough information to correctly detail the remaining ones anyway.
Removing this retryOrBail() call selects the arguably lesser evil.

The client-Squid connection closure callback, clientClosed(), already
uses the same logic.

This change does not resurrect Bug 5132 fixed by commit 752fa20 that
added the now-replaced retryOrBail() call to serverClosed(). That commit
fixed the leak by calling deleteThis() (via retryOrBail()). Our
finishWritingAndDelete() call preserves that logic. That commit also
claimed to allow more retries, but that claim was a mistake: To-server
closure callback registration (e.g. commitToServer()) bans retries.

[^1]: The fact that severClosed() is called for both successful and
problematic outcomes prevents TunnelStateData from properly handling
certain (rare) errors. We tried to fix that as well, but the changes
quickly snowballed, so we left a few XXXs instead.
---
 src/tunnel.cc | 54 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/tunnel.cc b/src/tunnel.cc
index af3d50ffeb..72487ba55e 100644
--- a/src/tunnel.cc
+++ b/src/tunnel.cc
@@ -260,6 +260,7 @@ private:
     /// resumes operations after the (possibly failed) HTTP CONNECT exchange
     void tunnelEstablishmentDone(Http::TunnelerAnswer &answer);
 
+    void finishWritingAndDelete(Connection &);
     void deleteThis();
 
     void cancelStep(const char *reason);
@@ -309,7 +310,9 @@ TunnelStateData::serverClosed()
 {
     server.noteClosure();
 
-    retryOrBail(__FUNCTION__);
+    request->hier.stopPeerClock(false);
+
+    finishWritingAndDelete(client);
 }
 
 /// TunnelStateData::clientClosed() wrapper
@@ -324,12 +327,48 @@ void
 TunnelStateData::clientClosed()
 {
     client.noteClosure();
+    finishWritingAndDelete(server);
+}
 
+/// Gracefully handles non-retriable connection closure. If necessary, either
+/// starts closing the given connection or waits for its pending write to
+/// finish. Otherwise, immediately destroys the tunnel object.
+/// \prec The other Connection is not open.
+void
+TunnelStateData::finishWritingAndDelete(Connection &remainingConnection)
+{
     if (noConnections())
         return deleteThis();
 
-    if (!server.writer)
-        server.conn->close();
+    // XXX: The code below should precede the noConnections() check above. When
+    // there is no writer, we should trigger an immediate noConnections()
+    // outcome instead of waiting for an asynchronous call to our own closure
+    // callback (that will call this method again). We should not move this code
+    // until a noConnections() outcome guarantees a nil writer because such a
+    // move will unnecessary delay deleteThis().
+
+    if (remainingConnection.writer) {
+        debugs(26, 5, "waiting to finish writing to " << remainingConnection.conn);
+        // the write completion callback must close its remainingConnection
+        // after noticing that the other connection is gone
+        return;
+    }
+
+    // XXX: Stop abusing connection closure callback for terminating tunneling
+    // in cases like this, where our code knows that tunneling must end. The
+    // closure callback should be dedicated to handling rare connection closures
+    // originated _outside_ of TunnelStateData (e.g., during shutdown). In all
+    // other cases, our own close()-calling code must detail the
+    // closure-triggering error (if any) _and_ clear all callbacks: Our code
+    // does not need to be (asynchronously) notified of the closure that it
+    // itself has initiated! Until that (significant) refactoring,
+    // serverClosed() and clientClosed() callbacks will continue to mishandle
+    // those rare closures as regular ones, and access.log records will continue
+    // to lack some tunneling error indicators/details.
+    //
+    // This asynchronous close() leads to another finishWritingAndDelete() call
+    // but with true noConnections() that finally triggers deleteThis().
+    remainingConnection.conn->close();
 }
 
 /// destroys the tunnel (after performing potentially-throwing cleanup)
@@ -452,14 +491,7 @@ TunnelStateData::retryOrBail(const char *context)
         return sendError(savedError, bailDescription ? bailDescription : context);
     *status_ptr = savedError->httpStatus;
 
-    if (noConnections())
-        return deleteThis();
-
-    // This is a "Comm::IsConnOpen(client.conn) but !canSendError" case.
-    // Closing the connection (after finishing writing) is the best we can do.
-    if (!client.writer)
-        client.conn->close();
-    // else writeClientDone() must notice a closed server and close the client
+    finishWritingAndDelete(client);
 }
 
 int
-- 
2.39.5