From a27fcaed83e77e1299e0d88883c123b84a7b78d4 Mon Sep 17 00:00:00 2001 From: Christos Tsantilas Date: Thu, 8 Sep 2016 12:08:02 +0300 Subject: [PATCH] Squid crashes on shutdown while cleaning up idle ICAP connections. The global Adaptation::Icap::TheConfig object is automatically destroyed when Squid exits. Its destructor destroys Icap::ServiceRep objects that, in turn, close all open connections in the idle connections pool. Since this happens after comm_exit has destroyed all Comm structures associated with those connections, Squid crashes. This patch uses updated RegisteredRunners API to close all connections in existing connections pools before Squid cleans up the Comm subsystem. Also added a new IndependentRunner class to the RegistersRunner API, which must be used for runners that are destroyed by external forces, possibly while still being registered. IndependentRunner objects unregister themselves from Runners registry when they are destroyed. The finishShutdown() method is now virtual and may be overloaded to implement independent runner cleanup during main loop (and/or to support complex cleanup actions that require calling other virtual methods). The method is still called for all registered runners but it no longer destroys the runner. For dependent runners, the destruction happens soon after the finishShutdown event ends but also during the main loop (unless the runner has managed to register after the main loop ended). This patch replaces the r14575 temporary fix. This is a Measurement Factory project. --- src/base/RunnersRegistry.cc | 82 ++++++++++++++++++++++++++----------- src/base/RunnersRegistry.h | 23 +++++++---- src/client_side.cc | 5 --- src/client_side.h | 2 +- src/pconn.cc | 43 +++++++++++-------- src/pconn.h | 8 ++-- 6 files changed, 106 insertions(+), 57 deletions(-) diff --git a/src/base/RunnersRegistry.cc b/src/base/RunnersRegistry.cc index 4c50cdead6..4b86cb7e26 100644 --- a/src/base/RunnersRegistry.cc +++ b/src/base/RunnersRegistry.cc @@ -14,44 +14,78 @@ typedef std::set Runners; /// all known runners static Runners *TheRunners = NULL; +/// used to avoid re-creating deleted TheRunners after shutdown finished. +static bool RunnersGone = false; -/// safely returns registered runners, initializing structures as needed -static Runners & -GetRunners() +/// creates the registered runners container if needed +/// \return either registered runners (if they should exist) or nil (otherwise) +static Runners * +FindRunners() { - if (!TheRunners) + if (!TheRunners && !RunnersGone) TheRunners = new Runners; - return *TheRunners; + return TheRunners; } -int -RegisterRunner(RegisteredRunner *rr) +static inline void +GetRidOfRunner(RegisteredRunner *rr) { - Runners &runners = GetRunners(); - runners.insert(rr); - return runners.size(); + if (!dynamic_cast(rr)) + delete rr; + // else ignore; IndependentRunner } -int -DeregisterRunner(RegisteredRunner *rr) +bool +RegisterRunner(RegisteredRunner *rr) { - Runners &runners = GetRunners(); - runners.erase(rr); - return runners.size(); + if (Runners *runners = FindRunners()) { + runners->insert(rr); + return true; + } + + // past finishShutdown + GetRidOfRunner(rr); + return false; } void -RunRegistered(const RegisteredRunner::Method &m) +RunRegistered(const RegisteredRunner::Method &event) { - Runners &runners = GetRunners(); - typedef Runners::iterator RRI; - for (RRI i = runners.begin(); i != runners.end(); ++i) - ((*i)->*m)(); - - if (m == &RegisteredRunner::finishShutdown) { - delete TheRunners; - TheRunners = NULL; + if (Runners *runners = FindRunners()) { + // Many things may happen during the loop below. We copy to withstand + // runner removal/addition and avoid surprises due to registrations from + // parent constructors (with a half-baked "this"!). This copy also + // simplifies overall RR logic as it guarantees that registering a + // runner during event X loop does not execute runner::X(). + Runners oldRunners(*runners); + for (auto runner: oldRunners) { + if (runners->find(runner) != runners->end()) // still registered + (runner->*event)(); + } } + + if (event != &RegisteredRunner::finishShutdown) + return; + + // this is the last event; delete registry-dependent runners (and only them) + if (Runners *runners = FindRunners()) { + RunnersGone = true; + TheRunners = nullptr; + // from now on, no runners can be registered or unregistered + for (auto runner: *runners) + GetRidOfRunner(runner); // leaves a dangling pointer in runners + delete runners; + } +} + +/* IndependentRunner */ + +void +IndependentRunner::unregisterRunner() +{ + if (Runners *runners = FindRunners()) + runners->erase(this); + // else it is too late, finishShutdown() has been called } bool diff --git a/src/base/RunnersRegistry.h b/src/base/RunnersRegistry.h index 63d6d46fcf..5af14cfb29 100644 --- a/src/base/RunnersRegistry.h +++ b/src/base/RunnersRegistry.h @@ -77,24 +77,33 @@ public: /// Called after stopping the main loop and before releasing memory. /// Meant for quick/basic cleanup that does not require any other modules. virtual ~RegisteredRunner() {} - /// exists to simplify caller interface; override the destructor instead - void finishShutdown() { delete this; } + + /// Meant for cleanup of services needed by the already destroyed objects. + virtual void finishShutdown() {} /// a pointer to one of the above notification methods typedef void (RegisteredRunner::*Method)(); }; -/// registers a given runner with the given registry and returns registry count -int RegisterRunner(RegisteredRunner *rr); - -/// de-registers a given runner with the given registry and returns registry count -int DeregisterRunner(RegisteredRunner *rr); +/// registers a given runner with the given registry and returns true on success +bool RegisterRunner(RegisteredRunner *rr); /// Calls a given method of all runners. /// All runners are destroyed after the finishShutdown() call. void RunRegistered(const RegisteredRunner::Method &m); +/// A RegisteredRunner with lifetime determined by forces outside the Registry. +class IndependentRunner: public RegisteredRunner +{ +public: + virtual ~IndependentRunner() { unregisterRunner(); } + +protected: + void registerRunner() {RegisterRunner(this);} + void unregisterRunner(); ///< unregisters self; safe to call multiple times +}; + /// convenience macro to describe/debug the caller and the method being called #define RunRegisteredHere(m) \ debugs(1, 2, "running " # m); \ diff --git a/src/client_side.cc b/src/client_side.cc index 1d83a43fb0..417259abeb 100644 --- a/src/client_side.cc +++ b/src/client_side.cc @@ -587,7 +587,6 @@ ConnStateData::swanSong() checkLogging(); flags.readMore = false; - DeregisterRunner(this); clientdbEstablished(clientConnection->remote, -1); /* decrement */ pipeline.terminateAll(0); @@ -1043,10 +1042,6 @@ ConnStateData::endingShutdown() // swanSong() in the close handler will cleanup. if (Comm::IsConnOpen(clientConnection)) clientConnection->close(); - - // deregister now to ensure finalShutdown() does not kill us prematurely. - // fd_table purge will cleanup if close handler was not fast enough. - DeregisterRunner(this); } char * diff --git a/src/client_side.h b/src/client_side.h index 9fa181faea..1cad88c704 100644 --- a/src/client_side.h +++ b/src/client_side.h @@ -62,7 +62,7 @@ class ServerBump; * managing, or for graceful half-close use the stopReceiving() or * stopSending() methods. */ -class ConnStateData : public Server, public HttpControlMsgSink, public RegisteredRunner +class ConnStateData : public Server, public HttpControlMsgSink, private IndependentRunner { public: diff --git a/src/pconn.cc b/src/pconn.cc index 5b18f42e29..a038cec5b6 100644 --- a/src/pconn.cc +++ b/src/pconn.cc @@ -31,14 +31,19 @@ CBDATA_CLASS_INIT(IdleConnList); /* ========== IdleConnList ============================================ */ -IdleConnList::IdleConnList(const char *key, PconnPool *thePool) : +IdleConnList::IdleConnList(const char *aKey, PconnPool *thePool) : capacity_(PCONN_FDS_SZ), size_(0), parent_(thePool) { - hash.key = xstrdup(key); - hash.next = NULL; + //Initialize hash_link members + key = xstrdup(aKey); + next = NULL; + theList_ = new Comm::ConnectionPointer[capacity_]; + + RegisterRunner(this); + // TODO: re-attach to MemPools. WAS: theList = (?? *)pconn_fds_pool->alloc(); } @@ -54,7 +59,7 @@ IdleConnList::~IdleConnList() delete[] theList_; - xfree(hash.key); + xfree(key); } /** Search the list. Matches by FD socket number. @@ -95,7 +100,7 @@ IdleConnList::removeAt(int index) if (parent_) { parent_->noteConnectionRemoved(); if (size_ == 0) { - debugs(48, 3, HERE << "deleting " << hashKeyStr(&hash)); + debugs(48, 3, "deleting " << hashKeyStr(this)); delete this; } } @@ -146,7 +151,7 @@ IdleConnList::closeN(size_t n) } if (parent_ && size_ == 0) { - debugs(48, 3, HERE << "deleting " << hashKeyStr(&hash)); + debugs(48, 3, "deleting " << hashKeyStr(this)); delete this; } } @@ -237,13 +242,13 @@ IdleConnList::pop() * quite a bit of CPU. Just keep it in mind. */ Comm::ConnectionPointer -IdleConnList::findUseable(const Comm::ConnectionPointer &key) +IdleConnList::findUseable(const Comm::ConnectionPointer &aKey) { assert(size_); // small optimization: do the constant bool tests only once. - const bool keyCheckAddr = !key->local.isAnyAddr(); - const bool keyCheckPort = key->local.port() > 0; + const bool keyCheckAddr = !aKey->local.isAnyAddr(); + const bool keyCheckPort = aKey->local.port() > 0; for (int i=size_-1; i>=0; --i) { @@ -251,11 +256,11 @@ IdleConnList::findUseable(const Comm::ConnectionPointer &key) continue; // local end port is required, but dont match. - if (keyCheckPort && key->local.port() != theList_[i]->local.port()) + if (keyCheckPort && aKey->local.port() != theList_[i]->local.port()) continue; // local address is required, but does not match. - if (keyCheckAddr && key->local.matchIPAddr(theList_[i]->local) != 0) + if (keyCheckAddr && aKey->local.matchIPAddr(theList_[i]->local) != 0) continue; // our connection timeout handler is scheduled to run already. unsafe for now. @@ -314,6 +319,12 @@ IdleConnList::Timeout(const CommTimeoutCbParams &io) list->findAndClose(io.conn); } +void +IdleConnList::endingShutdown() +{ + closeN(size_); +} + /* ========== PconnPool PRIVATE FUNCTIONS ============================================ */ const char * @@ -411,10 +422,10 @@ PconnPool::push(const Comm::ConnectionPointer &conn, const char *domain) if (list == NULL) { list = new IdleConnList(aKey, this); - debugs(48, 3, HERE << "new IdleConnList for {" << hashKeyStr(&list->hash) << "}" ); - hash_join(table, &list->hash); + debugs(48, 3, "new IdleConnList for {" << hashKeyStr(list) << "}" ); + hash_join(table, list); } else { - debugs(48, 3, HERE << "found IdleConnList for {" << hashKeyStr(&list->hash) << "}" ); + debugs(48, 3, "found IdleConnList for {" << hashKeyStr(list) << "}" ); } list->push(conn); @@ -442,7 +453,7 @@ PconnPool::pop(const Comm::ConnectionPointer &dest, const char *domain, bool kee notifyManager("pop failure"); return Comm::ConnectionPointer(); } else { - debugs(48, 3, HERE << "found " << hashKeyStr(&list->hash) << + debugs(48, 3, "found " << hashKeyStr(list) << (keepOpen ? " to use" : " to kill")); } @@ -489,7 +500,7 @@ PconnPool::unlinkList(IdleConnList *list) { theCount -= list->count(); assert(theCount >= 0); - hash_remove_link(table, &list->hash); + hash_remove_link(table, list); } void diff --git a/src/pconn.h b/src/pconn.h index decd505060..63bd8ed3bb 100644 --- a/src/pconn.h +++ b/src/pconn.h @@ -10,6 +10,7 @@ #define SQUID_PCONN_H #include "base/CbcPointer.h" +#include "base/RunnersRegistry.h" #include "mgr/forward.h" #include @@ -35,7 +36,7 @@ class PeerPoolMgr; /** \ingroup PConnAPI * A list of connections currently open to a particular destination end-point. */ -class IdleConnList +class IdleConnList: public hash_link, private IndependentRunner { CBDATA_CLASS(IdleConnList); @@ -62,6 +63,8 @@ public: int count() const { return size_; } void closeN(size_t count); + // IndependentRunner API + virtual void endingShutdown(); private: bool isAvailable(int i) const; bool removeAt(int index); @@ -70,9 +73,6 @@ private: static IOCB Read; static CTCB Timeout; -public: - hash_link hash; /** must be first */ - private: /** List of connections we are holding. * Sorted as FIFO list for most efficient speeds on pop() and findUsable() -- 2.47.2