]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
%busy_time: Time spent in transaction-related code (#1216)
authorAlex Rousskov <rousskov@measurement-factory.com>
Tue, 3 Jan 2023 12:20:27 +0000 (12:20 +0000)
committerSquid Anubis <squid-anubis@squid-cache.org>
Wed, 4 Jan 2023 13:56:48 +0000 (13:56 +0000)
Configuration tuning, problems triage, and performance optimization
often benefit from knowing how much time Squid spent on a particular
transaction (or a typical transaction in a category like "cache hits").

We already log total transaction response time (%tr), but that time
includes transaction queuing time (i.e. the time when Squid could
advance our transaction but was advancing other transactions) and
various waiting periods (e.g., waiting for a helper response). The new
measurement focuses on time periods when Squid was actively working on
(as in "was blocked on") our transaction.

CodeContext coverage problems do affect this measurement, but the
existing coverage is already good enough for usable measurements in some
cases, and it is getting better. It will take us a long time to cover
all cases; we should not wait for that to happen (as long as we warn
admins about these problems).

src/base/CodeContext.cc
src/base/CodeContext.h
src/base/Makefile.am
src/base/Stopwatch.cc [new file with mode: 0644]
src/base/Stopwatch.h [new file with mode: 0644]
src/base/forward.h
src/cf.data.pre
src/format/ByteCode.h
src/format/Format.cc
src/format/Token.cc

index 0f885391ba679f0445d27a5f8f84992e41a322f9..7ead548b567774a6cade18c3758bdd2b1f61f27e 100644 (file)
@@ -44,6 +44,7 @@ CodeContext::ForgetCurrent()
     static const RefCount<FadingCodeContext> fadingCodeContext = new FadingCodeContext();
     auto &current = Instance();
     assert(current);
+    current->busyTime.pause();
     fadingCodeContext->gist = current->codeContextGist();
     current = fadingCodeContext;
 }
@@ -57,6 +58,7 @@ CodeContext::Entering(const Pointer &codeCtx)
     if (current)
         ForgetCurrent(); // ensure orderly closure of the old context
     current = codeCtx;
+    codeCtx->busyTime.resume();
     debugs(1, 5, codeCtx->codeContextGist());
 }
 
index 91d60b1754ea0893daa967e2ca219bc2f1330b89..25e7627b752575562376491e52d77f12d0e5910a 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "base/InstanceId.h"
 #include "base/RefCount.h"
+#include "base/Stopwatch.h"
 
 #include <iosfwd>
 
@@ -71,6 +72,9 @@ public:
     /// appends human-friendly context description line(s) to a cache.log record
     virtual std::ostream &detailCodeContext(std::ostream &os) const = 0;
 
+    /// time spent in this context (see also: %busy_time)
+    Stopwatch busyTime;
+
 private:
     static void ForgetCurrent();
     static void Entering(const Pointer &codeCtx);
index 2c828128036cb446f1fba674720bbe91cce67df0..3966f3c06c61c6ae8cb96fab677738ba59b5cbb7 100644 (file)
@@ -63,6 +63,8 @@ libbase_la_SOURCES = \
        RegexPattern.h \
        RunnersRegistry.cc \
        RunnersRegistry.h \
+       Stopwatch.cc \
+       Stopwatch.h \
        Subscription.h \
        SupportOrVeto.h \
        TextException.cc \
diff --git a/src/base/Stopwatch.cc b/src/base/Stopwatch.cc
new file mode 100644 (file)
index 0000000..ff89ac0
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "base/Stopwatch.h"
+#include "debug/Stream.h"
+
+static_assert(Stopwatch::Clock::is_steady,
+              "Stopwatch::Clock is suitable for measuring real-time intervals");
+
+Stopwatch::Stopwatch():
+    subtotal_(Clock::duration::zero())
+{
+}
+
+Stopwatch::Clock::duration
+Stopwatch::total() const
+{
+    auto result = subtotal_;
+    if (running())
+        result += Clock::now() - runStart_;
+    return result;
+}
+
+void
+Stopwatch::resume()
+{
+    if (!running()) {
+        runStart_ = Clock::now();
+        debugs(1, 7, "period " << resumes_<< " started after " << subtotal_.count());
+    }
+    ++resumes_;
+}
+
+/// ends the current measurement period if needed; requires prior resume()
+void Stopwatch::pause()
+{
+    ++pauses_;
+    if (!running()) {
+        const auto runtime = Clock::now() - runStart_;
+        subtotal_ += runtime;
+        debugs(1, 7, "period " << resumes_ << " ran for " << runtime.count());
+    }
+}
+
diff --git a/src/base/Stopwatch.h b/src/base/Stopwatch.h
new file mode 100644 (file)
index 0000000..afa4d57
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_BASE_STOPWATCH_H
+#define SQUID_SRC_BASE_STOPWATCH_H
+
+#include <chrono>
+#include <cstdint>
+
+/// Quickly accumulates related real-time (a.k.a. "physical time" or "wall
+/// clock") periods. Continues to run if the caller is blocked on a system call.
+/// Usually suitable for measuring CPU overheads of non-sleeping code sequences.
+class Stopwatch
+{
+public:
+    // std::clock() is not precise enough and raises (minor) overflow concerns;
+    // clock_gettime(CLOCK_PROCESS_CPUTIME_ID) may not be portable, and both may
+    // include CPU ticks accumulated by [AUFS] threads. When the code does
+    // not sleep (i.e. always does something), wall clock time is better.
+
+    // Do not be tempted by std::high_resolution_clock spelling! That clock has
+    // unpredictable properties. It may not be steady (i.e. !is_steady). Its use
+    // is discouraged[1,3]. In most implementations, the steady_clock resolution
+    // is just as high[1,2,3].
+    // [1]: https://en.cppreference.com/w/cpp/chrono/high_resolution_clock
+    // [2]: https://howardhinnant.github.io/clock_survey.html
+    // [3]: https://stackoverflow.com/questions/38252022/does-standard-c11-guarantee-that-high-resolution-clock-measure-real-time-non
+
+    /// the underlying time measuring mechanism
+    using Clock = std::chrono::steady_clock;
+
+    Stopwatch();
+
+    /// whether we are currently measuring time (i.e. between resume() and pause())
+    bool running() const { return resumes_ > pauses_; }
+
+    /// whether we ever measured time (i.e. resume() has been called)
+    bool ran() const { return resumes_ > 0; }
+
+    /// the sum of all measurement period durations (or zero)
+    /// includes the current measurement period, if any
+    Clock::duration total() const;
+
+    /// (re)starts or continues the current measurement period; each resume()
+    /// call must be paired with a dedicated future pause() call
+    void resume();
+
+    /// ends the current measurement period if needed; each pause() call
+    /// requires a prior dedicated resume() call
+    void pause();
+
+private:
+    Clock::time_point runStart_; ///< when the current period was initiated
+
+    Clock::duration subtotal_; ///< the sum of all _finished_ periods
+
+    uint64_t resumes_ = 0; ///< the total number of resume() calls
+    uint64_t pauses_ = 0; ///< the total number of pause() calls
+};
+
+#endif /* SQUID_SRC_BASE_STOPWATCH_H */
+
index e595542fc7d198c190da43e4690ffd5c9df29085..5dbd507fd3590e2e2b2e2281ba5a5dd8f7c8d6c1 100644 (file)
@@ -15,9 +15,10 @@ class AsyncJob;
 class CallDialer;
 class CodeContext;
 class DelayedAsyncCalls;
-class ScopedId;
 class Raw;
 class RegexPattern;
+class ScopedId;
+class Stopwatch;
 
 template<class Cbc> class CbcPointer;
 template<class RefCountableKid> class RefCount;
index 751d47cf9d5fc131536aa171a48870fe3be9e8ca..5f4b24784d0547a04f2b40d3c5cb2b1118b641bb 100644 (file)
@@ -4796,6 +4796,20 @@ DOC_START
                        similar to the default access.log "current time" field
                        (%ts.%03tu).
 
+               busy_time       Time spent in transaction-related code (nanoseconds)
+                       This cumulative measurement excludes periods of time when the
+                       transaction was waiting (e.g., for a server or helper response)
+                       while Squid worked on other transactions or was engaged in
+                       transaction-unrelated activities (e.g., generating a cache index).
+                       In other words, this measurement represents the total amount of
+                       physical time when Squid was busy working on this transaction.
+
+                       WARNING: This measurement relies on Squid transaction context
+                       tracking features that currently have known context leak bugs and
+                       coverage gaps. Until those features are fully implemented, logged
+                       values may significantly understate or exaggerate actual times.
+                       Do not use this measurement unless you know it works in your case.
+
        Access Control related format codes:
 
                et      Tag returned by external acl
index 9a7da3ae1efdade496915bf1789617ca9a018ae0..d65c785ffd9588817301c1bcdc959db847017628 100644 (file)
@@ -166,6 +166,7 @@ typedef enum {
     LFT_PEER_RESPONSE_TIME,
     LFT_TOTAL_SERVER_SIDE_RESPONSE_TIME,
     LFT_DNS_WAIT_TIME,
+    LFT_BUSY_TIME,
 
     /* Squid internal processing details */
     LFT_SQUID_STATUS,
index 6ad34935d510e6ffed1fcaf1a9f74e2a5a6f10e6..5bbba9c8c37aeacca28710658a0cc1015c7113b2 100644 (file)
@@ -603,6 +603,18 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS
             doSec = 1;
             break;
 
+        case LFT_BUSY_TIME: {
+            const auto &stopwatch = al->busyTime;
+            if (stopwatch.ran()) {
+                // make sure total() returns nanoseconds compatible with outoff
+                using nanos = std::chrono::duration<decltype(outoff), std::nano>;
+                const nanos n = stopwatch.total();
+                outoff = n.count();
+                dooff = true;
+            }
+        }
+        break;
+
         case LFT_TIME_TO_HANDLE_REQUEST:
             outtv = al->cache.trTime;
             doMsec = 1;
index 6a0ea80f5be020c73a421e824dab8d53be6108bc..6b826dcc70818069b5f17c442ca79d1afe17537a 100644 (file)
@@ -67,6 +67,7 @@ static TokenTableEntry TokenTable2C[] = {
     TokenTableEntry("<pt", LFT_PEER_RESPONSE_TIME),
     TokenTableEntry("<tt", LFT_TOTAL_SERVER_SIDE_RESPONSE_TIME),
     TokenTableEntry("dt", LFT_DNS_WAIT_TIME),
+    TokenTableEntry("busy_time", LFT_BUSY_TIME),
 
     TokenTableEntry(">ha", LFT_ADAPTED_REQUEST_HEADER),
     TokenTableEntry(">ha", LFT_ADAPTED_REQUEST_ALL_HEADERS),