From: Eduard Bagdasaryan Date: Fri, 10 Jul 2020 16:45:50 +0000 (+0000) Subject: Report SMP store queues state (mgr:store_queues) (#690) X-Git-Tag: 4.15-20210522-snapshot~79 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d8ee9e8dff4831fa49d2a5ed661ca3ad81b7100e;p=thirdparty%2Fsquid.git Report SMP store queues state (mgr:store_queues) (#690) The state of two Store queues is reported: Transients notification queue (a.k.a. Collapsed Forwarding queue) and SMP I/O request queue (used for communication with rock diskers). Each worker and disker kid reports its view of that kid's incoming and outgoing queues state, including a small sample (up to 7 items) of queued messages. These kid-specific reports are YAML-compliant. With the exception of a field labeled "other", each queue report is self-consistent despite accessing data shared among kids -- the reported combination of values did exist at the snapshot collection time. The special "other" field represents a message counter maintained by the other side of the queue. In most cases, that field will be close to its correct value, but, due to modifications by the other process of a non-atomic value, it may be virtually anything. We think it is better to report (without officially naming) this field because it may be useful in triage regardless of these caveats. Making the counter atomic just for these occasional reports is not worth the performance overheads. Also fixed testStore linking problems (on some platforms) that were caused by the wrong order of libdiskio and libipc dependencies. --- diff --git a/src/CollapsedForwarding.cc b/src/CollapsedForwarding.cc index 9fc2c09047..1169aa7582 100644 --- a/src/CollapsedForwarding.cc +++ b/src/CollapsedForwarding.cc @@ -35,6 +35,9 @@ class CollapsedForwardingMsg public: CollapsedForwardingMsg(): sender(-1), xitIndex(-1) {} + /// prints message parameters; suitable for cache manager reports + void stat(std::ostream &); + public: int sender; ///< kid ID of sending process @@ -42,6 +45,12 @@ public: sfileno xitIndex; }; +void +CollapsedForwardingMsg::stat(std::ostream &os) +{ + os << "sender: " << sender << ", xitIndex: " << xitIndex; +} + // CollapsedForwarding void @@ -137,6 +146,15 @@ CollapsedForwarding::HandleNotification(const Ipc::TypedMsgHdr &msg) HandleNewData("after notification"); } +void +CollapsedForwarding::StatQueue(std::ostream &os) +{ + if (queue.get()) { + os << "Transients queues:\n"; + queue->stat(os); + } +} + /// initializes shared queue used by CollapsedForwarding class CollapsedForwardingRr: public Ipc::Mem::RegisteredRunner { diff --git a/src/CollapsedForwarding.h b/src/CollapsedForwarding.h index 4c2e1af3e8..e7b226fffd 100644 --- a/src/CollapsedForwarding.h +++ b/src/CollapsedForwarding.h @@ -42,6 +42,9 @@ public: /// handle queue push notifications from worker or disker static void HandleNotification(const Ipc::TypedMsgHdr &msg); + /// prints IPC message queue state; suitable for cache manager reports + static void StatQueue(std::ostream &); + private: typedef Ipc::MultiQueue Queue; static std::unique_ptr queue; ///< IPC queue diff --git a/src/DiskIO/IpcIo/IpcIoFile.cc b/src/DiskIO/IpcIo/IpcIoFile.cc index 00aec4d8d4..e97487de59 100644 --- a/src/DiskIO/IpcIo/IpcIoFile.cc +++ b/src/DiskIO/IpcIo/IpcIoFile.cc @@ -67,9 +67,30 @@ std::ostream & operator <<(std::ostream &os, const SipcIo &sio) { return os << "ipcIo" << sio.worker << '.' << sio.msg.requestId << - (sio.msg.command == IpcIo::cmdRead ? 'r' : 'w') << sio.disker; + sio.msg.command << sio.disker; } +/* IpcIo::Command */ + +std::ostream & +operator <<(std::ostream &os, const IpcIo::Command command) +{ + switch (command) { + case IpcIo::cmdNone: + return os << '-'; + case IpcIo::cmdOpen: + return os << 'o'; + case IpcIo::cmdRead: + return os << 'r'; + case IpcIo::cmdWrite: + return os << 'w'; + } + // unreachable code + return os << static_cast(command); +} + +/* IpcIoFile */ + IpcIoFile::IpcIoFile(char const *aDb): dbName(aDb), diskId(-1), error_(false), lastRequestId(0), olderRequests(&requestMap1), newerRequests(&requestMap2), @@ -501,6 +522,15 @@ IpcIoFile::HandleNotification(const Ipc::TypedMsgHdr &msg) HandleResponses("after notification"); } +void +IpcIoFile::StatQueue(std::ostream &os) +{ + if (queue.get()) { + os << "SMP disk I/O queues:\n"; + queue->stat(os); + } +} + /// handles open request timeout void IpcIoFile::OpenTimeout(void *const param) @@ -632,6 +662,21 @@ IpcIoMsg::IpcIoMsg(): start.tv_usec = 0; } +void +IpcIoMsg::stat(std::ostream &os) +{ + timeval elapsedTime; + tvSub(elapsedTime, start, current_time); + os << "id: " << requestId << + ", offset: " << offset << + ", size: " << len << + ", page: " << page << + ", command: " << command << + ", start: " << start << + ", elapsed: " << elapsedTime << + ", errno: " << xerrno; +} + /* IpcIoPendingRequest */ IpcIoPendingRequest::IpcIoPendingRequest(const IpcIoFile::Pointer &aFile): diff --git a/src/DiskIO/IpcIo/IpcIoFile.h b/src/DiskIO/IpcIo/IpcIoFile.h index 2589f29263..1c5e5c4ae8 100644 --- a/src/DiskIO/IpcIo/IpcIoFile.h +++ b/src/DiskIO/IpcIo/IpcIoFile.h @@ -34,12 +34,17 @@ typedef enum { cmdNone, cmdOpen, cmdRead, cmdWrite } Command; } // namespace IpcIo +std::ostream &operator <<(std::ostream &, IpcIo::Command); + /// converts DiskIO requests to IPC queue messages class IpcIoMsg { public: IpcIoMsg(); + /// prints message parameters; suitable for cache manager reports + void stat(std::ostream &); + public: unsigned int requestId; ///< unique for requestor; matches request w/ response @@ -86,6 +91,9 @@ public: /// handle queue push notifications from worker or disker static void HandleNotification(const Ipc::TypedMsgHdr &msg); + /// prints IPC message queue state; suitable for cache manager reports + static void StatQueue(std::ostream &); + DiskFile::Config config; ///< supported configuration options protected: diff --git a/src/Makefile.am b/src/Makefile.am index 6452db0abd..8b2278def8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1595,12 +1595,12 @@ tests_testStore_LDADD= \ ip/libip.la \ fs/libfs.la \ mgr/libmgr.la \ - ipc/libipc.la \ anyp/libanyp.la \ mem/libmem.la \ store/libstore.la \ sbuf/libsbuf.la \ DiskIO/libdiskio.la \ + ipc/libipc.la \ $(top_builddir)/lib/libmisccontainers.la \ $(top_builddir)/lib/libmiscencoding.la \ $(top_builddir)/lib/libmiscutil.la \ @@ -1951,6 +1951,7 @@ tests_test_http_range_SOURCES = \ event.cc \ tests/stub_external_acl.cc \ tests/stub_fatal.cc \ + tests/stub_IpcIoFile.cc \ fatal.h \ fd.cc \ fd.h \ @@ -2368,6 +2369,7 @@ tests_testHttpRequest_SOURCES = \ tests/stub_libsecurity.cc \ tests/stub_libstore.cc \ tests/stub_main_cc.cc \ + tests/stub_IpcIoFile.cc \ mem_node.cc \ mime.cc \ mime.h \ @@ -2670,6 +2672,7 @@ tests_testCacheManager_SOURCES = \ tests/stub_libsecurity.cc \ tests/stub_libstore.cc \ tests/stub_main_cc.cc \ + tests/stub_IpcIoFile.cc \ mem_node.cc \ mime.cc \ mime.h \ @@ -2992,6 +2995,7 @@ tests_testEvent_SOURCES = \ tests/stub_libsecurity.cc \ tests/stub_libstore.cc \ tests/stub_main_cc.cc \ + tests/stub_IpcIoFile.cc \ mem_node.cc \ mime.cc \ mime.h \ diff --git a/src/MemObject.cc b/src/MemObject.cc index 00fc25589b..e54aa04372 100644 --- a/src/MemObject.cc +++ b/src/MemObject.cc @@ -171,7 +171,7 @@ MemObject::dump() const debugs(20, DBG_IMPORTANT, "MemObject->data.origin_offset: " << (data_hdr.head ? data_hdr.head->nodeBuffer.offset : 0)); #endif - debugs(20, DBG_IMPORTANT, "MemObject->start_ping: " << start_ping.tv_sec << "."<< std::setfill('0') << std::setw(6) << start_ping.tv_usec); + debugs(20, DBG_IMPORTANT, "MemObject->start_ping: " << start_ping); debugs(20, DBG_IMPORTANT, "MemObject->inmem_hi: " << data_hdr.endOffset()); debugs(20, DBG_IMPORTANT, "MemObject->inmem_lo: " << inmem_lo); debugs(20, DBG_IMPORTANT, "MemObject->nclients: " << nclients); diff --git a/src/SquidTime.h b/src/SquidTime.h index eda43890b8..111e9fd20d 100644 --- a/src/SquidTime.h +++ b/src/SquidTime.h @@ -14,6 +14,7 @@ #include "rfc1123.h" #include +#include /* NP: sys/time.h is provided by libcompat */ /* Use uint64_t to store milliseconds */ @@ -97,6 +98,9 @@ operator ==(const timeval &a, const timeval &b) return !(a != b); } +/// prints . +std::ostream &operator <<(std::ostream &, const timeval &); + namespace Time { diff --git a/src/icmp/Icmp.cc b/src/icmp/Icmp.cc index 39a4408a7e..c695b029ca 100644 --- a/src/icmp/Icmp.cc +++ b/src/icmp/Icmp.cc @@ -87,9 +87,7 @@ Icmp::ipHops(int ttl) void Icmp::Log(const Ip::Address &addr, const uint8_t type, const char* pkt_str, const int rtt, const int hops) { - debugs(42, 2, "pingerLog: " << std::setw(9) << current_time.tv_sec << - "." << std::setfill('0') << std::setw(6) << - current_time.tv_usec << " " << std::left << std::setfill(' ') << + debugs(42, 2, "pingerLog: " << current_time << " " << std::left << std::setw(45) << addr << " " << type << " " << std::setw(15) << pkt_str << " " << rtt << "ms " << hops << " hops"); diff --git a/src/ipc/Queue.cc b/src/ipc/Queue.cc index 20e8586df2..6184f85de9 100644 --- a/src/ipc/Queue.cc +++ b/src/ipc/Queue.cc @@ -95,6 +95,25 @@ Ipc::OneToOneUniQueue::Items2Bytes(const unsigned int maxItemSize, const int siz return sizeof(OneToOneUniQueue) + maxItemSize * size; } +/// start state reporting (by reporting queue parameters) +/// The labels reflect whether the caller owns theIn or theOut data member and, +/// hence, cannot report the other value reliably. +void +Ipc::OneToOneUniQueue::statOpen(std::ostream &os, const char *inLabel, const char *outLabel, const uint32_t count) const +{ + os << "{ size: " << count << + ", capacity: " << theCapacity << + ", " << inLabel << ": " << theIn << + ", " << outLabel << ": " << theOut; +} + +/// end state reporting started by statOpen() +void +Ipc::OneToOneUniQueue::statClose(std::ostream &os) const +{ + os << "}\n"; +} + /* OneToOneUniQueues */ Ipc::OneToOneUniQueues::OneToOneUniQueues(const int aCapacity, const unsigned int maxItemSize, const int queueCapacity): theCapacity(aCapacity) diff --git a/src/ipc/Queue.h b/src/ipc/Queue.h index 4bd87bf3f6..5ada9fdb33 100644 --- a/src/ipc/Queue.h +++ b/src/ipc/Queue.h @@ -15,6 +15,7 @@ #include "ipc/mem/Pointer.h" #include "util.h" +#include #include class String; @@ -114,10 +115,21 @@ public: /// returns true iff the value was set; the value may be stale! template bool peek(Value &value) const; + /// prints incoming queue state; suitable for cache manager reports + template void statIn(std::ostream &, int localProcessId, int remoteProcessId) const; + /// prints outgoing queue state; suitable for cache manager reports + template void statOut(std::ostream &, int localProcessId, int remoteProcessId) const; + private: + void statOpen(std::ostream &, const char *inLabel, const char *outLabel, uint32_t count) const; + void statClose(std::ostream &) const; + template void statSamples(std::ostream &, unsigned int start, uint32_t size) const; + template void statRange(std::ostream &, unsigned int start, uint32_t n) const; - unsigned int theIn; ///< input index, used only in push() - unsigned int theOut; ///< output index, used only in pop() + // optimization: these non-std::atomic data members are in shared memory, + // but each is used only by one process (aside from obscured reporting) + unsigned int theIn; ///< current push() position; reporting aside, used only in push() + unsigned int theOut; ///< current pop() position; reporting aside, used only in pop()/peek() std::atomic theSize; ///< number of items in the queue const unsigned int theMaxItemSize; ///< maximum item size @@ -167,6 +179,9 @@ public: /// peeks at the item likely to be pop()ed next template bool peek(int &remoteProcessId, Value &value) const; + /// prints current state; suitable for cache manager reports + template void stat(std::ostream &) const; + /// returns local reader's balance QueueReader::Balance &localBalance() { return localReader().balance; } @@ -410,6 +425,92 @@ OneToOneUniQueue::push(const Value &value, QueueReader *const reader) return wasEmpty && (!reader || reader->raiseSignal()); } +template +void +OneToOneUniQueue::statIn(std::ostream &os, const int localProcessId, const int remoteProcessId) const +{ + os << " kid" << localProcessId << " receiving from kid" << remoteProcessId << ": "; + // Nobody can modify our theOut so, after capturing some valid theSize value + // in count, we can reliably report all [theOut, theOut+count) items that + // were queued at theSize capturing time. We will miss new items push()ed by + // the other side, but it is OK -- we report state at the capturing time. + const auto count = theSize.load(); + statOpen(os, "other", "popIndex", count); + statSamples(os, theOut, count); + statClose(os); +} + +template +void +OneToOneUniQueue::statOut(std::ostream &os, const int localProcessId, const int remoteProcessId) const +{ + os << " kid" << localProcessId << " sending to kid" << remoteProcessId << ": "; + // Nobody can modify our theIn so, after capturing some valid theSize value + // in count, we can reliably report all [theIn-count, theIn) items that were + // queued at theSize capturing time. We may report items already pop()ed by + // the other side, but that is OK because pop() does not modify items -- it + // only increments theOut. + const auto count = theSize.load(); + statOpen(os, "pushIndex", "other", count); + statSamples(os, theIn - count, count); // unsigned offset underflow OK + statClose(os); +} + +/// report a sample of [start, start + size) items +template +void +OneToOneUniQueue::statSamples(std::ostream &os, const unsigned int start, const uint32_t count) const +{ + if (!count) { + os << " "; + return; + } + + os << ", items: [\n"; + // report a few leading and trailing items, without repetitions + const auto sampleSize = std::min(3U, count); // leading (and max) sample + statRange(os, start, sampleSize); + if (sampleSize < count) { // the first sample did not show some items + // The `start` offset aside, the first sample reported all items + // below the sampleSize offset. The second sample needs to report + // the last sampleSize items (i.e. starting at count-sampleSize + // offset) except those already reported by the first sample. + const auto secondSampleOffset = std::max(sampleSize, count - sampleSize); + const auto secondSampleSize = std::min(sampleSize, count - sampleSize); + + // but first we print a sample separator, unless there are no items + // between the samples or the separator hides the only unsampled item + const auto bothSamples = sampleSize + secondSampleSize; + if (bothSamples + 1U == count) + statRange(os, start + sampleSize, 1); + else if (count > bothSamples) + os << " # ... " << (count - bothSamples) << " items not shown ...\n"; + + statRange(os, start + secondSampleOffset, secondSampleSize); + } + os << " ]"; +} + +/// statSamples() helper that reports n items from start +template +void +OneToOneUniQueue::statRange(std::ostream &os, const unsigned int start, const uint32_t n) const +{ + assert(sizeof(Value) <= theMaxItemSize); + auto offset = start; + for (uint32_t i = 0; i < n; ++i) { + // XXX: Throughout this C++ header, these overflow wrapping tricks work + // only because theCapacity currently happens to be a power of 2 (e.g., + // the highest offset (0xF...FFF) % 3 is 0 and so is the next offset). + const auto pos = (offset++ % theCapacity) * theMaxItemSize; + Value value; + memcpy(&value, theBuffer + pos, sizeof(value)); + os << " { "; + value.stat(os); + os << " },\n"; + } +} + // OneToOneUniQueues inline OneToOneUniQueue & @@ -474,6 +575,23 @@ BaseMultiQueue::peek(int &remoteProcessId, Value &value) const return false; // most likely, no process had anything to pop } +template +void +BaseMultiQueue::stat(std::ostream &os) const +{ + for (int processId = remotesIdOffset(); processId < remotesIdOffset() + remotesCount(); ++processId) { + const auto &queue = inQueue(processId); + queue.statIn(os, theLocalProcessId, processId); + } + + os << "\n"; + + for (int processId = remotesIdOffset(); processId < remotesIdOffset() + remotesCount(); ++processId) { + const auto &queue = outQueue(processId); + queue.statOut(os, theLocalProcessId, processId); + } +} + // FewToFewBiQueue template diff --git a/src/log/access_log.cc b/src/log/access_log.cc index e64dd76414..38887dd10e 100644 --- a/src/log/access_log.cc +++ b/src/log/access_log.cc @@ -302,8 +302,7 @@ HierarchyLogEntry::startPeerClock() void HierarchyLogEntry::stopPeerClock(const bool force) { - debugs(46, 5, "First connection started: " << firstConnStart_.tv_sec << "." << - std::setfill('0') << std::setw(6) << firstConnStart_.tv_usec << + debugs(46, 5, "First connection started: " << firstConnStart_ << ", current total response time value: " << (totalResponseTime_.tv_sec * 1000 + totalResponseTime_.tv_usec/1000) << (force ? ", force fixing" : "")); if (!force && totalResponseTime_.tv_sec != -1) diff --git a/src/store.cc b/src/store.cc index c69468f822..094c00c1a2 100644 --- a/src/store.cc +++ b/src/store.cc @@ -10,11 +10,16 @@ #include "squid.h" #include "base/AsyncCbdataCalls.h" +#include "base/PackableStream.h" #include "base/TextException.h" #include "CacheDigest.h" #include "CacheManager.h" +#include "CollapsedForwarding.h" #include "comm/Connection.h" #include "comm/Read.h" +#if HAVE_DISKIO_MODULE_IPCIO +#include "DiskIO/IpcIo/IpcIoFile.h" +#endif #include "ETag.h" #include "event.h" #include "fde.h" @@ -122,6 +127,20 @@ Store::Stats(StoreEntry * output) Root().stat(*output); } +/// reports the current state of Store-related queues +static void +StatQueues(StoreEntry *e) +{ + assert(e); + PackableStream stream(*e); + CollapsedForwarding::StatQueue(stream); + #if HAVE_DISKIO_MODULE_IPCIO + stream << "\n"; + IpcIoFile::StatQueue(stream); + #endif + stream.flush(); +} + // XXX: new/delete operators need to be replaced with MEMPROXY_CLASS // definitions but doing so exposes bug 4370, and maybe 4354 and 4355 void * @@ -1285,6 +1304,7 @@ storeRegisterWithCacheManager(void) Mgr::RegisterAction("store_io", "Store IO Interface Stats", &Mgr::StoreIoAction::Create, 0, 1); Mgr::RegisterAction("store_check_cachable_stats", "storeCheckCachable() Stats", storeCheckCachableStats, 0, 1); + Mgr::RegisterAction("store_queues", "SMP Transients and Caching Queues", StatQueues, 0, 1); } void diff --git a/src/tests/stub_CollapsedForwarding.cc b/src/tests/stub_CollapsedForwarding.cc index ff5453f876..b5a1dd1f94 100644 --- a/src/tests/stub_CollapsedForwarding.cc +++ b/src/tests/stub_CollapsedForwarding.cc @@ -14,4 +14,5 @@ void CollapsedForwarding::Broadcast(StoreEntry const&, const bool) STUB void CollapsedForwarding::Broadcast(const sfileno, const bool) STUB +void CollapsedForwarding::StatQueue(std::ostream &) STUB diff --git a/src/tests/stub_IpcIoFile.cc b/src/tests/stub_IpcIoFile.cc new file mode 100644 index 0000000000..b8554e11c9 --- /dev/null +++ b/src/tests/stub_IpcIoFile.cc @@ -0,0 +1,21 @@ +/* + * Copyright (C) 1996-2020 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +#include "squid.h" + +#if HAVE_DISKIO_MODULE_IPCIO +#include "DiskIO/IpcIo/IpcIoFile.h" + +#define STUB_API "DiskIO/IocIo/IpcIoFile.cc" + +#include "tests/STUB.h" + +void IpcIoFile::StatQueue(std::ostream &) STUB + +#endif /* HAVE_DISKIO_MODULE_IPCIO */ + diff --git a/src/tests/stub_time.cc b/src/tests/stub_time.cc index a5a8e6b466..35e1bdf24f 100644 --- a/src/tests/stub_time.cc +++ b/src/tests/stub_time.cc @@ -27,3 +27,5 @@ const char * Time::FormatHttpd(time_t ) STUB_RETVAL("") void TimeEngine::tick() STUB TimeEngine::~TimeEngine() {STUB_NOP} +std::ostream &operator <<(std::ostream &os, const timeval &) STUB_RETVAL(os) + diff --git a/src/time.cc b/src/time.cc index 815b1c75e0..51ae3db5c0 100644 --- a/src/time.cc +++ b/src/time.cc @@ -11,6 +11,9 @@ #include "squid.h" #include "SquidTime.h" +#include +#include + struct timeval current_time; double current_dtime; time_t squid_curtime = 0; @@ -78,6 +81,16 @@ TimeEngine::tick() getCurrentTime(); } +std::ostream & +operator <<(std::ostream &os, const timeval &t) +{ + os << t.tv_sec << "."; + const auto savedFill = os.fill('0'); + os << std::setw(6) << t.tv_usec; + os.fill(savedFill); + return os; +} + const char * Time::FormatStrf(time_t t) {