From: Thomas Markwalder Date: Wed, 6 Nov 2019 19:00:10 +0000 (-0500) Subject: [#964] Applied patch from master to fix HA stuck socket spin X-Git-Tag: Kea-1.6.1~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4c081d0ba048ecf0e415a343049bfb6321fb558c;p=thirdparty%2Fkea.git [#964] Applied patch from master to fix HA stuck socket spin --- diff --git a/ChangeLog b/ChangeLog index 05542e0ab9..b7c4f8298f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +1659. [bug]* tmark + Added logic to core code and HA hook lib to allow HA peers + to detect and handle out of bandwidth socket events. This + corrects a defect introduced in Kea 1.6.0 that can cause + an HA server to become unresponsive when an HA socket has + been closed by a peer. Note that there is a change to the + signature of the external socket callback handler invoked + by IfaceMgr. Custome hook libraries happen register external + sockets with IfaceMgr will require modification and + recompilation. + (Gitlab #964) + Kea 1.6.0 released on Aug 28, 2019 1658. [bug] tmark diff --git a/src/bin/dhcp4/dhcp4to6_ipc.cc b/src/bin/dhcp4/dhcp4to6_ipc.cc index 1c698abffd..b78f41e957 100644 --- a/src/bin/dhcp4/dhcp4to6_ipc.cc +++ b/src/bin/dhcp4/dhcp4to6_ipc.cc @@ -52,7 +52,7 @@ void Dhcp4to6Ipc::open() { } } -void Dhcp4to6Ipc::handler() { +void Dhcp4to6Ipc::handler(int /* fd */) { Dhcp4to6Ipc& ipc = Dhcp4to6Ipc::instance(); Pkt6Ptr pkt; diff --git a/src/bin/dhcp4/dhcp4to6_ipc.h b/src/bin/dhcp4/dhcp4to6_ipc.h index 6b098585c5..ba2e1a6a93 100644 --- a/src/bin/dhcp4/dhcp4to6_ipc.h +++ b/src/bin/dhcp4/dhcp4to6_ipc.h @@ -47,7 +47,7 @@ public: /// /// The handler processes the DHCPv4-query DHCPv6 packet and /// sends the DHCPv4-response DHCPv6 packet back to the DHCPv6 server - static void handler(); + static void handler(int /* fd */); }; } // namespace isc diff --git a/src/bin/dhcp6/dhcp6to4_ipc.cc b/src/bin/dhcp6/dhcp6to4_ipc.cc index 02e275db80..0022aced20 100644 --- a/src/bin/dhcp6/dhcp6to4_ipc.cc +++ b/src/bin/dhcp6/dhcp6to4_ipc.cc @@ -54,7 +54,7 @@ void Dhcp6to4Ipc::open() { } } -void Dhcp6to4Ipc::handler() { +void Dhcp6to4Ipc::handler(int /* fd */) { Dhcp6to4Ipc& ipc = Dhcp6to4Ipc::instance(); Pkt6Ptr pkt; diff --git a/src/bin/dhcp6/dhcp6to4_ipc.h b/src/bin/dhcp6/dhcp6to4_ipc.h index d039e8f1b4..1d393e9884 100644 --- a/src/bin/dhcp6/dhcp6to4_ipc.h +++ b/src/bin/dhcp6/dhcp6to4_ipc.h @@ -44,7 +44,7 @@ public: /// @brief On receive handler /// /// The handler sends the DHCPv6 packet back to the remote address - static void handler(); + static void handler(int /* fd */); /// @param client_port UDP port where all responses are sent to. /// Not zero is mostly useful for testing purposes. diff --git a/src/hooks/dhcp/high_availability/ha_service.cc b/src/hooks/dhcp/high_availability/ha_service.cc index 4589a8ad57..77106bef18 100644 --- a/src/hooks/dhcp/high_availability/ha_service.cc +++ b/src/hooks/dhcp/high_availability/ha_service.cc @@ -1632,7 +1632,9 @@ HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_nat // run by an explicit call IOService ready in kea-dhcp code. // We are registerin the socket only to interrupt main-thread // select(). - IfaceMgr::instance().addExternalSocket(tcp_native_fd, 0); + IfaceMgr::instance().addExternalSocket(tcp_native_fd, + boost::bind(&HAService::socketReadyHandler, this, _1) + ); } // If ec.value() == boost::asio::error::already_connected, we should already @@ -1642,6 +1644,15 @@ HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_nat return (true); } +void +HAService::socketReadyHandler(int tcp_native_fd) { + // If the socket is ready but does not belong to one of our client's + // ongoing transactions, we close it. This will unregister it from + // IfaceMgr and ensure the client starts over with a fresh connection + // if it needs to do so. + client_.closeIfOutOfBandwidth(tcp_native_fd); +} + void HAService::clientCloseHandler(int tcp_native_fd) { if (tcp_native_fd >= 0) { diff --git a/src/hooks/dhcp/high_availability/ha_service.h b/src/hooks/dhcp/high_availability/ha_service.h index 6765f7b0ad..14486e1a80 100644 --- a/src/hooks/dhcp/high_availability/ha_service.h +++ b/src/hooks/dhcp/high_availability/ha_service.h @@ -738,6 +738,24 @@ protected: /// error we want Connection logic to process it. bool clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd); + /// @brief IfaceMgr external socket ready callback handler + /// + /// IfaceMgr invokes this call back when a registered socket has been + /// flagged as ready to read. It is installed by the invocation to + /// register the socket with IfaceMgr made in @ref clientConnectHandler. + /// + /// The handler calls @ref HttpClient::closeIfOutOfBandwidth() to catch + /// and close any sockets that have gone ready outside of transactions. + /// + /// We do this in case the other peer closed the socket (e.g. idle timeout), + /// as this will cause the socket to appear ready to read to the + /// IfaceMgr::select(). If this happens while no transcations are + /// in progess, we won't have anything to deal with the socket event. + /// This causes IfaceMgr::select() to endlessly interrupt on the socket. + /// + /// @param tcp_native_fd socket descriptor of the ready socket + void socketReadyHandler(int tcp_native_fd); + /// @brief HttpClient close callback handler /// /// Passed into HttpClient calls to allow unregistration of client's diff --git a/src/lib/dhcp/iface_mgr.cc b/src/lib/dhcp/iface_mgr.cc index c7d4bdd227..142d3d45b3 100644 --- a/src/lib/dhcp/iface_mgr.cc +++ b/src/lib/dhcp/iface_mgr.cc @@ -1090,7 +1090,7 @@ Pkt4Ptr IfaceMgr::receive4Indirect(uint32_t timeout_sec, uint32_t timeout_usec / // layer access without integrating any specific features // in IfaceMgr if (s.callback_) { - s.callback_(); + s.callback_(s.socket_); } return (Pkt4Ptr()); @@ -1185,7 +1185,7 @@ Pkt4Ptr IfaceMgr::receive4Direct(uint32_t timeout_sec, uint32_t timeout_usec /* // layer access without integrating any specific features // in IfaceMgr if (s.callback_) { - s.callback_(); + s.callback_(s.socket_); } return (Pkt4Ptr()); @@ -1314,7 +1314,7 @@ IfaceMgr::receive6Direct(uint32_t timeout_sec, uint32_t timeout_usec /* = 0 */ ) // layer access without integrating any specific features // in IfaceMgr if (s.callback_) { - s.callback_(); + s.callback_(s.socket_); } return (Pkt6Ptr()); @@ -1431,7 +1431,7 @@ IfaceMgr::receive6Indirect(uint32_t timeout_sec, uint32_t timeout_usec /* = 0 */ // layer access without integrating any specific features // in IfaceMgr if (s.callback_) { - s.callback_(); + s.callback_(s.socket_); } return (Pkt6Ptr()); diff --git a/src/lib/dhcp/iface_mgr.h b/src/lib/dhcp/iface_mgr.h index 450ff6d395..5557433701 100644 --- a/src/lib/dhcp/iface_mgr.h +++ b/src/lib/dhcp/iface_mgr.h @@ -478,7 +478,8 @@ boost::function IfaceMgrErrorMsgCallback; class IfaceMgr : public boost::noncopyable { public: /// Defines callback used when data is received over external sockets. - typedef boost::function SocketCallback; + /// @param fd socket descriptor of the ready socket + typedef boost::function SocketCallback; /// Keeps callback information for external sockets. struct SocketCallbackInfo { diff --git a/src/lib/dhcp/tests/iface_mgr_unittest.cc b/src/lib/dhcp/tests/iface_mgr_unittest.cc index e372c758b9..8c732d19df 100644 --- a/src/lib/dhcp/tests/iface_mgr_unittest.cc +++ b/src/lib/dhcp/tests/iface_mgr_unittest.cc @@ -695,14 +695,18 @@ public: int pipefd[2]; EXPECT_TRUE(pipe(pipefd) == 0); EXPECT_NO_THROW(ifacemgr->addExternalSocket(pipefd[0], - [&callback_ok](){ callback_ok = true; })); + [&callback_ok, &pipefd](int fd) { + callback_ok = (pipefd[0] == fd); + })); // Let's create a second pipe and register it as well int secondpipe[2]; EXPECT_TRUE(pipe(secondpipe) == 0); EXPECT_NO_THROW(ifacemgr->addExternalSocket(secondpipe[0], - [&callback2_ok](){ callback2_ok = true; })); + [&callback2_ok, &secondpipe](int fd) { + callback2_ok = (secondpipe[0] == fd); + })); // Verify a call with no data and normal external sockets works ok. Pkt4Ptr pkt4; @@ -778,14 +782,18 @@ public: int pipefd[2]; EXPECT_TRUE(pipe(pipefd) == 0); EXPECT_NO_THROW(ifacemgr->addExternalSocket(pipefd[0], - [&callback_ok](){ callback_ok = true; })); + [&callback_ok, &pipefd](int fd) { + callback_ok = (pipefd[0] == fd); + })); // Let's create a second pipe and register it as well int secondpipe[2]; EXPECT_TRUE(pipe(secondpipe) == 0); EXPECT_NO_THROW(ifacemgr->addExternalSocket(secondpipe[0], - [&callback2_ok](){ callback2_ok = true; })); + [&callback2_ok, &secondpipe](int fd) { + callback2_ok = (secondpipe[0] == fd); + })); // Verify a call with no data and normal external sockets works ok. Pkt6Ptr pkt6; @@ -2749,11 +2757,11 @@ TEST_F(IfaceMgrTest, detectIfaces) { volatile bool callback_ok; volatile bool callback2_ok; -void my_callback(void) { +void my_callback(int /* fd */) { callback_ok = true; } -void my_callback2(void) { +void my_callback2(int /* fd */) { callback2_ok = true; } diff --git a/src/lib/http/client.cc b/src/lib/http/client.cc index 50a581fcfb..cc7d6b7fc7 100644 --- a/src/lib/http/client.cc +++ b/src/lib/http/client.cc @@ -139,6 +139,13 @@ public: /// @return true if transaction has been initiated, false otherwise. bool isTransactionOngoing() const; + /// @brief Checks if a socket descriptor belongs to this connection. + /// + /// @param socket_fd socket descriptor to check + /// + /// @return True if the socket fd belongs to this connection. + bool isMySocket(int socket_fd) const; + /// @brief Checks and logs if premature transaction timeout is suspected. /// /// There are cases when the premature timeout occurs, e.g. as a result of @@ -439,6 +446,44 @@ public: queue_.clear(); } + /// @brief Closes a connection if it has an out-of-bandwidth socket event + /// + /// If the pool contains a connection using the given socket and that + /// connection is currently in a transaction the method returns as this + /// indicates a normal ready event. If the connection is not in an + /// ongoing transaction, then the connection is closed. + /// + /// This is method is intended to be used to detect and clean up then + /// sockets that are marked ready outside of transactions. The most comman + /// case is the other end of the socket being closed. + /// + /// @param socket_fd socket descriptor to check + void closeIfOutOfBandwidth(int socket_fd) { + // First we look for a connection with the socket. + for (auto conns_it = conns_.begin(); conns_it != conns_.end(); + ++conns_it) { + + if (!conns_it->second->isMySocket(socket_fd)) { + // Not this connection. + continue; + } + + if (conns_it->second->isTransactionOngoing()) { + // Matches but is in a transaction, all is well. + return; + } + + // Socket has no transaction, so any ready event is + // out-of-bandwidth (other end probably closed), so + // let's close it. Note we do not remove any queued + // requests, as this might somehow be occurring in + // between them. + conns_it->second->close(); + conns_.erase(conns_it); + break; + } + } + private: /// @brief Holds reference to the IO service. @@ -607,6 +652,11 @@ Connection::isTransactionOngoing() const { return (static_cast(current_request_)); } +bool +Connection::isMySocket(int socket_fd) const { + return (socket_.getNative() == socket_fd); +} + bool Connection::checkPrematureTimeout(const uint64_t transid) { // If there is no transaction but the handlers are invoked it means @@ -923,6 +973,11 @@ HttpClient::asyncSendRequest(const Url& url, const HttpRequestPtr& request, request_callback, connect_callback, close_callback); } +void +HttpClient::closeIfOutOfBandwidth(int socket_fd) { + return (impl_->conn_pool_->closeIfOutOfBandwidth(socket_fd)); +} + void HttpClient::stop() { impl_->conn_pool_->closeAll(); diff --git a/src/lib/http/client.h b/src/lib/http/client.h index b72605222a..9b9aff751f 100644 --- a/src/lib/http/client.h +++ b/src/lib/http/client.h @@ -191,6 +191,20 @@ public: /// @brief Closes all connections. void stop(); + /// @brief Closes a connection if it has an out-of-bandwidth socket event + /// + /// If the client owns a connection using the given socket and that + /// connection is currently in a transaction the method returns as this + /// indicates a normal ready event. If the connection is not in an + /// ongoing transaction, then the connection is closed. + /// + /// This is method is intended to be used to detect and clean up then + /// sockets that are marked ready outside of transactions. The most comman + /// case is the other end of the socket being closed. + /// + /// @param socket_fd socket descriptor to check + void closeIfOutOfBandwidth(int socket_fd); + private: /// @brief Pointer to the HTTP client implementation. diff --git a/src/lib/http/tests/server_client_unittests.cc b/src/lib/http/tests/server_client_unittests.cc index dd846eb131..7db256f2ea 100644 --- a/src/lib/http/tests/server_client_unittests.cc +++ b/src/lib/http/tests/server_client_unittests.cc @@ -1402,6 +1402,7 @@ public: if (++resp_num > 1) { io_service_.stop(); } + EXPECT_FALSE(ec); }, HttpClient::RequestTimeout(10000), @@ -1458,6 +1459,140 @@ public: EXPECT_EQ(-1, monitor.registered_fd_); } + /// @brief Tests detection and handling out-of-bandwidth socket events + /// + /// It initiates a transacation and verifies that a mid-transacation call + /// to HttpClient::closeIfOutOfBandwidth() has no affect on the connection. + /// After succesful completion of the transaction, a second call is made + /// HttpClient::closeIfOutOfBandwidth(). This should result in the connection + /// being closed. + /// This step is repeated to verify that after an OOB closure, transactions + /// to the same destination can be processed. + /// + /// Lastly, we verify that HttpClient::stop() closes the connection correctly. + /// + /// @param version HTTP version to be used. + void testCloseIfOutOfBandwidth(const HttpVersion& version) { + // Start the server. + ASSERT_NO_THROW(listener_.start()); + + // Create a client and specify the URL on which the server can be reached. + HttpClient client(io_service_); + Url url("http://127.0.0.1:18123"); + + // Initiate request to the server. + PostHttpRequestJsonPtr request1 = createRequest("sequence", 1, version); + HttpResponseJsonPtr response1(new HttpResponseJson()); + unsigned resp_num = 0; + ExternalMonitor monitor; + + ASSERT_NO_THROW(client.asyncSendRequest(url, request1, response1, + [this, &client, &resp_num, &monitor](const boost::system::error_code& ec, + const HttpResponsePtr&, + const std::string&) { + if (++resp_num == 1) { + io_service_.stop(); + } + + EXPECT_EQ(1, monitor.connect_cnt_); // We should have 1 connect. + EXPECT_EQ(0, monitor.close_cnt_); // We should have 0 closes + ASSERT_GT(monitor.registered_fd_, -1); // We should have a valid fd. + int orig_fd = monitor.registered_fd_; + + // Test our socket for OOBness. + client.closeIfOutOfBandwidth(monitor.registered_fd_); + + // Since we're in a transaction, we should have no closes and + // the same valid fd. + EXPECT_EQ(0, monitor.close_cnt_); + ASSERT_EQ(monitor.registered_fd_, orig_fd); + + EXPECT_FALSE(ec); + }, + HttpClient::RequestTimeout(10000), + boost::bind(&ExternalMonitor::connectHandler, &monitor, _1, _2), + boost::bind(&ExternalMonitor::closeHandler, &monitor, _1) + )); + + // Actually trigger the requests. The requests should be handlded by the + // server one after another. While the first request is being processed + // the server should queue another one. + ASSERT_NO_THROW(runIOService()); + + // Make sure that we received a response. + ASSERT_TRUE(response1); + ConstElementPtr sequence1 = response1->getJsonElement("sequence"); + ASSERT_TRUE(sequence1); + EXPECT_EQ(1, sequence1->intValue()); + + // We should have had 1 connect invocations, no closes + // and a valid registered fd + EXPECT_EQ(1, monitor.connect_cnt_); + EXPECT_EQ(0, monitor.close_cnt_); + EXPECT_GT(monitor.registered_fd_, -1); + + // Test our socket for OOBness. + client.closeIfOutOfBandwidth(monitor.registered_fd_); + + // Since we're in a transaction, we should have no closes and + // the same valid fd. + EXPECT_EQ(1, monitor.close_cnt_); + EXPECT_EQ(-1, monitor.registered_fd_); + + // Now let's do another request to the destination to verify that + // we'll reopen the connection without issue. + PostHttpRequestJsonPtr request2 = createRequest("sequence", 2, version); + HttpResponseJsonPtr response2(new HttpResponseJson()); + resp_num = 0; + ASSERT_NO_THROW(client.asyncSendRequest(url, request2, response2, + [this, &client, &resp_num, &monitor](const boost::system::error_code& ec, + const HttpResponsePtr&, + const std::string&) { + if (++resp_num == 1) { + io_service_.stop(); + } + + EXPECT_EQ(2, monitor.connect_cnt_); // We should have 1 connect. + EXPECT_EQ(1, monitor.close_cnt_); // We should have 0 closes + ASSERT_GT(monitor.registered_fd_, -1); // We should have a valid fd. + int orig_fd = monitor.registered_fd_; + + // Test our socket for OOBness. + client.closeIfOutOfBandwidth(monitor.registered_fd_); + + // Since we're in a transaction, we should have no closes and + // the same valid fd. + EXPECT_EQ(1, monitor.close_cnt_); + ASSERT_EQ(monitor.registered_fd_, orig_fd); + + EXPECT_FALSE(ec); + }, + HttpClient::RequestTimeout(10000), + boost::bind(&ExternalMonitor::connectHandler, &monitor, _1, _2), + boost::bind(&ExternalMonitor::closeHandler, &monitor, _1) + )); + + // Actually trigger the requests. The requests should be handlded by the + // server one after another. While the first request is being processed + // the server should queue another one. + ASSERT_NO_THROW(runIOService()); + + // Make sure that we received the second response. + ASSERT_TRUE(response2); + ConstElementPtr sequence2 = response2->getJsonElement("sequence"); + ASSERT_TRUE(sequence2); + EXPECT_EQ(2, sequence2->intValue()); + + // Stopping the client the close the connection. + client.stop(); + + // We should have had 2 connect invocations, 2 closes + // and an invalid registered fd + EXPECT_EQ(2, monitor.connect_cnt_); + EXPECT_EQ(2, monitor.close_cnt_); + EXPECT_EQ(-1, monitor.registered_fd_); + } + /// @brief Simulates external registery of Connection TCP sockets /// /// Provides methods compatible with Connection callbacks for connnect @@ -1827,4 +1962,9 @@ TEST_F(HttpClientTest, connectCloseCallbacks) { ASSERT_NO_FATAL_FAILURE(testConnectCloseCallbacks(HttpVersion(1, 1))); } +/// Tests that HttpClient::closeIfOutOfBandwidth works correctly. +TEST_F(HttpClientTest, closeIfOutOfBandwidth) { + ASSERT_NO_FATAL_FAILURE(testCloseIfOutOfBandwidth(HttpVersion(1, 1))); +} + }