From: Eduard Bagdasaryan Date: Sun, 30 Jun 2019 10:57:58 +0000 (+0000) Subject: Prevent TLS transaction stalls by preserving flags.read_pending (#423) X-Git-Tag: SQUID_5_0_1~74 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ed4c68636f77c402436c7580199cf158606d09d3;p=thirdparty%2Fsquid.git Prevent TLS transaction stalls by preserving flags.read_pending (#423) DoSelect() cleared flags.read_pending just like it clears the read handler, but that was wrong: Unlike the read handler, the "pending read" state is not managed by the read _handler_. It is managed by the reading _method_. DoSelect() calls read handler so it has the right to clear the handler before that call -- the called handler is given an opportunity to set another read handler as needed. DoSelect() does not call the reading method. In most cases, the reading method is called by the reading handler so the code appears to "work". However, if the reading handler has to defer the read, it will not call the reading method. Deferred reads are naturally ignorant about the high-level read_pending flag so they cannot (and should not) attempt to preserve that flag. The flag and the associated "I have buffered data to give to the reader" state get lost. When the deferred read is kicked, it starts waiting for socket I/O. The loss of the flag usually leads to (minor) response time increases because the peer socket will have more data available for reading, triggering a call to the read handler which (this time) calls the reading method which supplies the previously buffered data. Thus, the code usually appears to "work" even when the bug is triggered. However, when the buffered data contained the _last_ response byte, the peer socket often remains idle (not ready for reading), the transaction gets stalled and eventually timeouts/aborts without delivering the last byte(s) to the client. Such bugs have been observed in the wild. This change delegates read_pending management to the reading method. When the method is not called (e.g., because reading is deferred), the flag remains set and continues to reflect the current reading state. When the reading method is "downgraded" from tls_read_method() to default_read_method(), the new code clears flags.read_pending to avoid endless "this FD is ready for reading" looping. The downgrade itself is a dangerous operation because it may result in the loss of the already buffered data. I hope Squid does not lose data when downgrading (and added comments to justify that hope), but I cannot guarantee that. This change itself does not introduce data losses. I also made fde I/O methods private to improve our chances of preventing a similar bug if future code changes carelessly reset them. --- diff --git a/src/client_side.cc b/src/client_side.cc index 0510409760..4b885ce992 100644 --- a/src/client_side.cc +++ b/src/client_side.cc @@ -3058,11 +3058,10 @@ ConnStateData::splice() { // normally we can splice here, because we just got client hello message - if (fd_table[clientConnection->fd].ssl.get()) { - // Restore default read methods - fd_table[clientConnection->fd].read_method = &default_read_method; - fd_table[clientConnection->fd].write_method = &default_write_method; - } + // fde::ssl/tls_read_method() probably reads from our own inBuf. If so, then + // we should not lose any raw bytes when switching to raw I/O here. + if (fd_table[clientConnection->fd].ssl.get()) + fd_table[clientConnection->fd].useDefaultIo(); // XXX: assuming that there was an HTTP/1.1 CONNECT to begin with... // reset the current protocol to HTTP/1.1 (was "HTTPS" for the bumping process) diff --git a/src/comm/ModDevPoll.cc b/src/comm/ModDevPoll.cc index 2ee4d790b7..15845d3d8d 100644 --- a/src/comm/ModDevPoll.cc +++ b/src/comm/ModDevPoll.cc @@ -384,7 +384,6 @@ Comm::DoSelect(int msec) HERE << "Calling read handler on FD " << fd ); PROF_start(comm_read_handler); - F->flags.read_pending = 0; F->read_handler = NULL; hdl(fd, F->read_data); PROF_stop(comm_read_handler); diff --git a/src/comm/ModEpoll.cc b/src/comm/ModEpoll.cc index 976059d9ea..b456fdfaa3 100644 --- a/src/comm/ModEpoll.cc +++ b/src/comm/ModEpoll.cc @@ -263,7 +263,6 @@ Comm::DoSelect(int msec) if ((hdl = F->read_handler) != NULL) { debugs(5, DEBUG_EPOLL ? 0 : 8, HERE << "Calling read handler on FD " << fd); PROF_start(comm_write_handler); - F->flags.read_pending = 0; F->read_handler = NULL; hdl(fd, F->read_data); PROF_stop(comm_write_handler); diff --git a/src/comm/ModKqueue.cc b/src/comm/ModKqueue.cc index 4cb99eb5d2..0fb9188427 100644 --- a/src/comm/ModKqueue.cc +++ b/src/comm/ModKqueue.cc @@ -262,7 +262,6 @@ Comm::DoSelect(int msec) if (ke[i].filter == EVFILT_READ || F->flags.read_pending) { if ((hdl = F->read_handler) != NULL) { F->read_handler = NULL; - F->flags.read_pending = 0; hdl(fd, F->read_data); } } diff --git a/src/comm/ModPoll.cc b/src/comm/ModPoll.cc index 2c3a9041a0..cd0d13b909 100644 --- a/src/comm/ModPoll.cc +++ b/src/comm/ModPoll.cc @@ -476,7 +476,6 @@ Comm::DoSelect(int msec) if ((hdl = F->read_handler)) { PROF_start(comm_read_handler); F->read_handler = NULL; - F->flags.read_pending = false; hdl(fd, F->read_data); PROF_stop(comm_read_handler); ++ statCounter.select_fds; diff --git a/src/comm/ModSelect.cc b/src/comm/ModSelect.cc index 594b239d0d..a7466677ac 100644 --- a/src/comm/ModSelect.cc +++ b/src/comm/ModSelect.cc @@ -512,7 +512,6 @@ Comm::DoSelect(int msec) (void) 0; else { F->read_handler = NULL; - F->flags.read_pending = 0; commUpdateReadBits(fd, NULL); hdl(fd, F->read_data); ++ statCounter.select_fds; diff --git a/src/comm/ModSelectWin32.cc b/src/comm/ModSelectWin32.cc index 7a655b0c53..f83fee1fc9 100644 --- a/src/comm/ModSelectWin32.cc +++ b/src/comm/ModSelectWin32.cc @@ -503,7 +503,6 @@ Comm::DoSelect(int msec) if ((hdl = F->read_handler)) { F->read_handler = NULL; - F->flags.read_pending = 0; commUpdateReadBits(fd, NULL); hdl(fd, F->read_data); ++ statCounter.select_fds; diff --git a/src/fd.cc b/src/fd.cc index 53f6403320..beb83cfcbd 100644 --- a/src/fd.cc +++ b/src/fd.cc @@ -208,15 +208,13 @@ fd_open(int fd, unsigned int type, const char *desc) case FD_SOCKET: case FD_PIPE: - F->read_method = &socket_read_method; - F->write_method = &socket_write_method; + F->setIo(&socket_read_method, &socket_write_method); break; case FD_FILE: case FD_LOG: - F->read_method = &file_read_method; - F->write_method = &file_write_method; + F->setIo(&file_read_method, &file_write_method); break; default: @@ -227,13 +225,11 @@ fd_open(int fd, unsigned int type, const char *desc) switch (type) { case FD_MSGHDR: - F->read_method = &msghdr_read_method; - F->write_method = &msghdr_write_method; + F->setIo(&msghdr_read_method, &msghdr_write_method); break; default: - F->read_method = &default_read_method; - F->write_method = &default_write_method; + F->setIo(&default_read_method, &default_write_method); break; } diff --git a/src/fde.cc b/src/fde.cc index 16326db690..3ffee5562a 100644 --- a/src/fde.cc +++ b/src/fde.cc @@ -6,10 +6,11 @@ * Please see the COPYING and CONTRIBUTORS files for details. */ -/* DEBUG: none FDE */ +/* DEBUG: section 05 Comm */ #include "squid.h" #include "comm/Read.h" +#include "Debug.h" #include "fd.h" #include "fde.h" #include "globals.h" @@ -18,6 +19,47 @@ fde *fde::Table = nullptr; +void +fde::setIo(READ_HANDLER *reader, WRITE_HANDLER *writer) +{ + assert(reader); + assert(writer); + assert(!flags.read_pending); // this method is only meant for new FDs + + readMethod_ = reader; + writeMethod_ = writer; +} + +void +fde::useDefaultIo() +{ + debugs(5, 7, "old read_pending=" << flags.read_pending); + + // Some buffering readers are using external-to-them buffers (e.g., inBuf) + // and might leave true flags.read_pending behind without losing data. We + // must clear the flag here because default I/O methods do not know about it + // and will leave it set forever, resulting in I/O loops. + flags.read_pending = false; + + readMethod_ = default_read_method; + writeMethod_ = default_write_method; +} + +/// use I/O methods that maintain an internal-to-them buffer +void +fde::useBufferedIo(READ_HANDLER *bufferingReader, WRITE_HANDLER *bufferingWriter) +{ + debugs(5, 7, "read_pending=" << flags.read_pending); + + assert(bufferingReader); + assert(bufferingWriter); + // flags.read_pending ought to be false here, but these buffering methods + // can handle a stale true flag so we do not check or reset it + + readMethod_ = bufferingReader; + writeMethod_ = bufferingWriter; +} + bool fde::readPending(int fdNumber) const { diff --git a/src/fde.h b/src/fde.h index 1021e9d362..0b57293acf 100644 --- a/src/fde.h +++ b/src/fde.h @@ -55,8 +55,8 @@ public: *desc = 0; read_handler = nullptr; write_handler = nullptr; - read_method = nullptr; - write_method = nullptr; + readMethod_ = nullptr; + writeMethod_ = nullptr; } /// Clear the fde class back to NULL equivalent. @@ -65,6 +65,19 @@ public: /// True if comm_close for this fd has been called bool closing() const { return flags.close_request; } + /// set I/O methods for a freshly opened descriptor + void setIo(READ_HANDLER *, WRITE_HANDLER *); + + /// Use default I/O methods. When called after useBufferedIo(), the caller + /// is responsible for any (unread or unwritten) buffered data. + void useDefaultIo(); + + /// use I/O methods that maintain an internal-to-them buffer + void useBufferedIo(READ_HANDLER *, WRITE_HANDLER *); + + int read(int fd, char *buf, int len) { return readMethod_(fd, buf, len); } + int write(int fd, const char *buf, int len) { return writeMethod_(fd, buf, len); } + /* NOTE: memset is used on fdes today. 20030715 RBC */ static void DumpStats(StoreEntry *); @@ -103,6 +116,7 @@ public: bool called_connect = false; bool nodelay = false; bool close_on_exec = false; + /// buffering readMethod_ has data to give (regardless of socket state) bool read_pending = false; //bool write_pending; //XXX seems not to be used bool transparent = false; @@ -133,8 +147,6 @@ public: void *lifetime_data = nullptr; AsyncCall::Pointer closeHandler; AsyncCall::Pointer halfClosedReader; /// read handler for half-closed fds - READ_HANDLER *read_method; - WRITE_HANDLER *write_method; Security::SessionPointer ssl; Security::ContextPointer dynamicTlsContext; ///< cached and then freed when fd is closed #if _SQUID_WINDOWS_ @@ -152,14 +164,28 @@ public: nfmarkToServer in that this is the value we *receive* from the, connection, whereas nfmarkToServer is the value to set on packets *leaving* Squid. */ + +private: + // I/O methods connect Squid to the device/stack/library fde represents + READ_HANDLER *readMethod_ = nullptr; ///< imports bytes into Squid + WRITE_HANDLER *writeMethod_ = nullptr; ///< exports Squid bytes }; #define fd_table fde::Table int fdNFree(void); -#define FD_READ_METHOD(fd, buf, len) (*fde::Table[fd].read_method)(fd, buf, len) -#define FD_WRITE_METHOD(fd, buf, len) (*fde::Table[fd].write_method)(fd, buf, len) +inline int +FD_READ_METHOD(int fd, char *buf, int len) +{ + return fd_table[fd].read(fd, buf, len); +} + +inline int +FD_WRITE_METHOD(int fd, const char *buf, int len) +{ + return fd_table[fd].write(fd, buf, len); +} #endif /* SQUID_FDE_H */ diff --git a/src/security/Session.cc b/src/security/Session.cc index 6698d26457..e60f82382e 100644 --- a/src/security/Session.cc +++ b/src/security/Session.cc @@ -159,9 +159,9 @@ CreateSession(const Security::ContextPointer &ctx, const Comm::ConnectionPointer #endif debugs(83, 5, "link FD " << fd << " to TLS session=" << (void*)session.get()); + fd_table[fd].ssl = session; - fd_table[fd].read_method = &tls_read_method; - fd_table[fd].write_method = &tls_write_method; + fd_table[fd].useBufferedIo(&tls_read_method, &tls_write_method); fd_note(fd, squidCtx); return true; } diff --git a/src/tunnel.cc b/src/tunnel.cc index c3bd96baa9..0406207a97 100644 --- a/src/tunnel.cc +++ b/src/tunnel.cc @@ -1174,8 +1174,8 @@ switchToTunnel(HttpRequest *request, Comm::ConnectionPointer &clientConn, Comm:: TunnelStateData *tunnelState = new TunnelStateData(context->http); - fd_table[clientConn->fd].read_method = &default_read_method; - fd_table[clientConn->fd].write_method = &default_write_method; + // tunnelStartShoveling() drains any buffered from-client data (inBuf) + fd_table[clientConn->fd].useDefaultIo(); request->hier.resetPeerNotes(srvConn, tunnelState->getHost()); @@ -1199,8 +1199,9 @@ switchToTunnel(HttpRequest *request, Comm::ConnectionPointer &clientConn, Comm:: AsyncCall::Pointer timeoutCall = commCbCall(5, 4, "tunnelTimeout", CommTimeoutCbPtrFun(tunnelTimeout, tunnelState)); commSetConnTimeout(srvConn, Config.Timeout.read, timeoutCall); - fd_table[srvConn->fd].read_method = &default_read_method; - fd_table[srvConn->fd].write_method = &default_write_method; + + // we drain any already buffered from-server data below (rBufData) + fd_table[srvConn->fd].useDefaultIo(); auto ssl = fd_table[srvConn->fd].ssl.get(); assert(ssl);