]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Author: Alex Rousskov
authorChristos Tsantilas <chtsanti@users.sourceforge.net>
Mon, 5 Apr 2010 10:29:50 +0000 (13:29 +0300)
committerChristos Tsantilas <chtsanti@users.sourceforge.net>
Mon, 5 Apr 2010 10:29:50 +0000 (13:29 +0300)
Modify icap_service_failure_limit option to forget old ICAP errors.

A busy or remote ICAP server may produce a steady but shallow stream of
errors. Any ICAP server may become nearly unusable in a short period of
time, producing a burst of errors. To avoid disabling a generally usable
service, it is important to distinguish these two cases. Just counting
the number of errors and suspending the service after
icap_service_failure_limit is reached often either suspends the service
in both cases or never suspends it at all, depending on the option
value.

One way to distinguish a large burst of errors from a steady but shallow
error stream is to forget about old errors. The icap_service_failure_limit
option modified to instructs Squid to ignore errors that
are "too old" to be counted as a part of a burst.

Another way to look at this feature is to say that the modified
icap_service_failure_limit limits the ICAP error _rate_. For example,
   # suspend service usage after 10 failures in 5 seconds:
   icap_service_failure_limit 10 in 5 seconds

Squid does not remember every transaction error that occurred within the
allowed "oldest error" time period. That would be result in a precise
but too expensive implementation, especially during error bursts on a
busy server. Instead, Squid divides the period in ten slots, counts the
number of errors that occurred in each slot, and forget the oldest
slot(s) as needed. Thus, the algorithm has about 90% precision as far as
timing of the failures is concerned. That 90% precision ought to be good
enough for any deployment.

src/FadingCounter.cc [new file with mode: 0644]
src/FadingCounter.h [new file with mode: 0644]
src/Makefile.am
src/adaptation/Config.h
src/adaptation/icap/ServiceRep.cc
src/adaptation/icap/ServiceRep.h
src/adaptation/icap/Xaction.cc
src/cache_cf.cc
src/cf.data.depend
src/cf.data.pre

diff --git a/src/FadingCounter.cc b/src/FadingCounter.cc
new file mode 100644 (file)
index 0000000..8c896c2
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * $Id$
+ */
+
+#include "squid.h"
+#include "base/TextException.h"
+#include "SquidTime.h"
+#include "FadingCounter.h"
+
+FadingCounter::FadingCounter(): horizon(-1), precision(10), delta(-1),
+    lastTime(0), total(0)
+{
+    counters.reserve(precision);
+    while (counters.size() < static_cast<unsigned int>(precision))
+        counters.push_back(0);
+}
+
+void FadingCounter::clear()
+{
+    for (int i = 0; i < precision; ++i)
+        counters[i] = 0;
+    lastTime = current_dtime;
+    total = 0;
+}
+
+void FadingCounter::configure(double newHorizon)
+{
+    if (fabs(newHorizon - horizon) >= 1e-3) { // diff exceeds one millisecond
+        clear(); // for simplicity
+        horizon = newHorizon;
+        delta = horizon / precision; // may become negative or zero
+    }
+}
+
+int FadingCounter::count(int howMany)
+{
+    Must(howMany >= 0);
+
+    if (delta < 0)
+        return total += howMany; // forget nothing
+
+    if (horizon < 1e-3) // (e.g., zero)
+        return howMany; // remember nothing
+
+    const double deltas = (current_dtime - lastTime) / delta;
+    if (deltas >= precision || current_dtime < lastTime) {
+        clear(); // forget all values
+    } else {
+        // forget stale values, if any
+        // fmod() or "current_dtime/delta" will overflow int for small deltas
+        const int lastSlot = static_cast<int>(fmod(lastTime, horizon) / delta);
+        const int staleSlots = static_cast<int>(deltas);
+        for (int i = 0, s = lastSlot + 1; i < staleSlots; ++i, ++s) {
+            const int idx = s % precision;
+            total -= counters[idx];
+            counters[idx] = 0;
+            Must(total >= 0);
+        }
+    }
+
+    // apply new information
+    lastTime = current_dtime;
+    const int curSlot = static_cast<int>(fmod(lastTime, horizon) / delta);
+    counters[curSlot % precision] += howMany;
+    total += howMany;
+    Must(total >= 0);
+
+    return total;
+}
diff --git a/src/FadingCounter.h b/src/FadingCounter.h
new file mode 100644 (file)
index 0000000..429a8d5
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * $Id$
+ */
+
+#ifndef SQUID_FADING_COUNTER_H
+#define SQUID_FADING_COUNTER_H
+
+#include "Array.h"
+
+/// Counts events, forgetting old ones. Usefull for "3 errors/minute" limits.
+class FadingCounter {
+    public:
+        FadingCounter();
+
+        /// 0=remember nothing; -1=forget nothing; new value triggers clear()
+        void configure(double horizonSeconds);
+
+        void clear(); ///< forgets all events
+
+        int count(int howMany); ///< count fresh, return #events remembered
+        int remembered() const { return total; } ///< possibly stale #events
+
+        /// read-only memory horizon in seconds; older events are forgotten
+        double horizon;
+
+    private:
+        const int precision; ///< #counting slots, controls measur. occuracy
+        double delta; ///< sub-interval duration = horizon/precision
+
+        double lastTime; ///< time of the last update
+        Vector<int> counters; ///< events per delta (possibly stale)
+        int total; ///< number of remembered events (possibly stale)
+};
+
+#endif /* SQUID_FADING_COUNTER_H */
index d0e7c1c7e5776952f3d2d7ef062a8b77720dbeab..9c2edb3ed1ccb52ec69c98ef0d86a3dcd36b91be 100644 (file)
@@ -312,6 +312,8 @@ squid_SOURCES = \
        ExternalACL.h \
        ExternalACLEntry.cc \
        ExternalACLEntry.h \
+       FadingCounter.h \
+       FadingCounter.cc \
        fd.cc \
        fde.cc \
        fde.h \
@@ -1226,6 +1228,7 @@ tests_testCacheManager_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc
 nodist_tests_testCacheManager_SOURCES = \
@@ -1407,6 +1410,7 @@ tests_testEvent_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc
 nodist_tests_testEvent_SOURCES = \
@@ -1560,6 +1564,7 @@ tests_testEventLoop_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc
 nodist_tests_testEventLoop_SOURCES = \
@@ -1706,6 +1711,7 @@ tests_test_http_range_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc \
        Packer.cc \
@@ -1861,6 +1867,7 @@ tests_testHttpRequest_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc
 nodist_tests_testHttpRequest_SOURCES = \
@@ -2229,6 +2236,7 @@ tests_testURL_SOURCES = \
        useragent.cc \
        wccp2.cc \
        whois.cc \
+       FadingCounter.cc \
        $(WIN32_SOURCE) \
        wordlist.cc
 nodist_tests_testURL_SOURCES = \
index d35da51817b94359e8f763c7669eaec1be9e774b..cdc177797e2703ef9cb2b4980305c016db33fd56 100644 (file)
@@ -38,6 +38,7 @@ public:
     int send_client_ip;
     int send_client_username;
     int service_failure_limit;
+    time_t oldest_service_failure;
     int service_revival_delay;
     int icap_uses_indirect_client;
 
index 7e788be5193a9225da11ba7939d74182e3f31736..daf457426fbd6fa470ea1a1cebcc56a36a316698 100644 (file)
@@ -18,7 +18,7 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ServiceRep);
 Adaptation::Icap::ServiceRep::ServiceRep(const Adaptation::ServiceConfig &svcCfg):
         AsyncJob("Adaptation::Icap::ServiceRep"), Adaptation::Service(svcCfg),
         theOptions(NULL), theOptionsFetcher(0), theLastUpdate(0),
-        theSessionFailures(0), isSuspended(0), notifying(false),
+        isSuspended(0), notifying(false),
         updateScheduled(false), self(NULL),
         wasAnnouncedUp(true) // do not announce an "up" service at startup
 {}
@@ -53,6 +53,9 @@ Adaptation::Icap::ServiceRep::finalize()
             writeableCfg().port = 1344;
         }
     }
+
+    theSessionFailures.configure(TheConfig.oldest_service_failure > 0 ?
+        TheConfig.oldest_service_failure : -1);
 }
 
 void Adaptation::Icap::ServiceRep::invalidate()
@@ -69,15 +72,16 @@ void Adaptation::Icap::ServiceRep::invalidate()
 
 void Adaptation::Icap::ServiceRep::noteFailure()
 {
-    ++theSessionFailures;
-    debugs(93,4, HERE << " failure " << theSessionFailures << " out of " <<
-           TheConfig.service_failure_limit << " allowed " << status());
+    const int failures = theSessionFailures.count(1);
+    debugs(93,4, HERE << " failure " << failures << " out of " <<
+           TheConfig.service_failure_limit << " allowed in " <<
+           TheConfig.oldest_service_failure << "sec " << status());
 
     if (isSuspended)
         return;
 
     if (TheConfig.service_failure_limit >= 0 &&
-            theSessionFailures > TheConfig.service_failure_limit)
+            failures > TheConfig.service_failure_limit)
         suspend("too many failures");
 
     // TODO: Should bypass setting affect how much Squid tries to talk to
@@ -235,7 +239,7 @@ void Adaptation::Icap::ServiceRep::changeOptions(Adaptation::Icap::Options *newO
 
     delete theOptions;
     theOptions = newOptions;
-    theSessionFailures = 0;
+    theSessionFailures.clear();
     isSuspended = 0;
     theLastUpdate = squid_curtime;
 
@@ -463,8 +467,8 @@ const char *Adaptation::Icap::ServiceRep::status() const
     if (notifying)
         buf.append(",notif", 6);
 
-    if (theSessionFailures > 0)
-        buf.Printf(",fail%d", theSessionFailures);
+    if (const int failures = theSessionFailures.remembered())
+        buf.Printf(",fail%d", failures);
 
     buf.append("]", 1);
     buf.terminate();
index 0bbaaa5a1ff6505e33f2f6f97067d2e6aac06fe6..3d955e39a92db172865f03a8a6c89c7ff1c19aa2 100644 (file)
@@ -35,6 +35,7 @@
 #define SQUID_ICAPSERVICEREP_H
 
 #include "cbdata.h"
+#include "FadingCounter.h"
 #include "adaptation/Service.h"
 #include "adaptation/forward.h"
 #include "adaptation/Initiator.h"
@@ -134,8 +135,7 @@ private:
     Adaptation::Initiate *theOptionsFetcher; // pending ICAP OPTIONS transaction
     time_t theLastUpdate; // time the options were last updated
 
-    static const int TheSessionFailureLimit;
-    int theSessionFailures;
+    FadingCounter theSessionFailures;
     const char *isSuspended; // also stores suspension reason for debugging
 
     bool notifying; // may be true in any state except for the initial
index 18c11dc228eefc894d0d39361b71bc8ddc092e00..f05caefda03e18a5ab9b355892ff61157a8bf772 100644 (file)
@@ -215,7 +215,6 @@ void Adaptation::Icap::Xaction::dieOnConnectionFailure()
 {
     debugs(93, 2, HERE << typeName <<
            " failed to connect to " << service().cfg().uri);
-    theService->noteFailure();
     throw TexcHere("cannot connect to the ICAP service");
 }
 
@@ -259,8 +258,6 @@ void Adaptation::Icap::Xaction::handleCommTimedout()
            theService->cfg().methodStr() << " " <<
            theService->cfg().uri << status());
     reuseConnection = false;
-    service().noteFailure();
-
     throw TexcHere(connector != NULL ?
                    "timed out while connecting to the ICAP service" :
                    "timed out while talking to the ICAP service");
@@ -281,6 +278,7 @@ void Adaptation::Icap::Xaction::handleCommClosed()
 void Adaptation::Icap::Xaction::callException(const std::exception  &e)
 {
     setOutcome(xoError);
+    service().noteFailure();
     Adaptation::Initiate::callException(e);
 }
 
index 76fa702e234b7366878c03c16bb3cc81f46d72d6..e1aa75ecf2be99c361b370048cff942c691cceba 100644 (file)
@@ -86,6 +86,10 @@ static void dump_icap_service_type(StoreEntry *, const char *, const Adaptation:
 static void free_icap_service_type(Adaptation::Icap::Config *);
 static void parse_icap_class_type();
 static void parse_icap_access_type();
+
+static void parse_icap_service_failure_limit(Adaptation::Icap::Config *);
+static void dump_icap_service_failure_limit(StoreEntry *, const char *, const Adaptation::Icap::Config &);
+static void free_icap_service_failure_limit(Adaptation::Icap::Config *);
 #endif
 
 #if USE_ECAP
@@ -3719,3 +3723,56 @@ dump_ecap_service_type(StoreEntry * entry, const char *name, const Adaptation::E
 }
 
 #endif /* USE_ECAP */
+
+#if ICAP_CLIENT
+static void parse_icap_service_failure_limit(Adaptation::Icap::Config *cfg)
+{
+    char *token;
+    time_t d;
+    time_t m;
+    cfg->service_failure_limit = GetInteger();
+
+    if ((token = strtok(NULL, w_space)) == NULL)
+        return;
+
+    if (strcmp(token,"in") != 0) {
+        debugs(3, 0, "expecting 'in' on'"  << config_input_line << "'");
+        self_destruct();
+    }
+
+    if ((token = strtok(NULL, w_space)) == NULL) {
+        self_destruct();
+    }
+
+    d = static_cast<time_t> (xatoi(token));
+    
+    m = static_cast<time_t> (1);
+
+    if (0 == d)
+        (void) 0;
+    else if ((token = strtok(NULL, w_space)) == NULL) {
+        debugs(3, 0, "No time-units on '" << config_input_line << "'");
+        self_destruct();
+    }
+    else if ((m = parseTimeUnits(token)) == 0)
+        self_destruct();
+
+    cfg->oldest_service_failure = (m * d);
+}
+
+static void dump_icap_service_failure_limit(StoreEntry *entry, const char *name, const Adaptation::Icap::Config &cfg)
+{
+    storeAppendPrintf(entry, "%s %d", name, cfg.service_failure_limit);
+    if (cfg.oldest_service_failure > 0) {
+        storeAppendPrintf(entry, " in %d seconds", (int)cfg.oldest_service_failure);
+    }
+    storeAppendPrintf(entry, "\n");
+}
+
+static void free_icap_service_failure_limit(Adaptation::Icap::Config *cfg)
+{
+    cfg->oldest_service_failure = 0;
+    cfg->service_failure_limit = 0;
+}
+
+#endif
index 75fe5ec502614b38dde298a10912d39c4d3928c3..a3b62464957fc47c22515804f2b5c5a2c0924a6f 100644 (file)
@@ -32,6 +32,7 @@ adaptation_service_chain_type icap_service ecap_service
 icap_access_type       icap_class acl
 icap_class_type                icap_service
 icap_service_type
+icap_service_failure_limit
 ecap_service_type
 int
 kb_int64_t
index f2076e1b26b6c981ad0a23c4c87266d6a6e3764e..11c00bbd5bf67c19ad12a7bd23ea07aa05ff7649 100644 (file)
@@ -5772,21 +5772,38 @@ DOC_START
 DOC_END
 
 NAME: icap_service_failure_limit
-TYPE: int
+COMMENT: limit [in memory-depth time-units]
+TYPE: icap_service_failure_limit
 IFDEF: ICAP_CLIENT
-LOC: Adaptation::Icap::TheConfig.service_failure_limit
+LOC: Adaptation::Icap::TheConfig
 DEFAULT: 10
 DOC_START
        The limit specifies the number of failures that Squid tolerates
        when establishing a new TCP connection with an ICAP service. If
        the number of failures exceeds the limit, the ICAP service is
        not used for new ICAP requests until it is time to refresh its
-       OPTIONS. The per-service failure counter is reset to zero each
-       time Squid fetches new service OPTIONS.
+       OPTIONS.
 
        A negative value disables the limit. Without the limit, an ICAP
        service will not be considered down due to connectivity failures
        between ICAP OPTIONS requests.
+
+       Squid forgets ICAP service failures older than the specified
+       value of memory-depth. The memory fading algorithm 
+       is approximate because Squid does not remember individual 
+       errors but groups them instead, splitting the option
+       value into ten time slots of equal length.
+
+       When memory-depth is 0 and by default this option has no 
+       effect on service failure expiration.
+
+       Squid always forgets failures when updating service settings
+       using an ICAP OPTIONS transaction, regardless of this option
+       setting.
+
+       For example,
+               # suspend service usage after 10 failures in 5 seconds:
+               icap_service_failure_limit 10 in 5 seconds
 DOC_END
 
 NAME: icap_service_revival_delay