/*
- * $Id: refresh.cc,v 1.5 1996/11/05 17:08:22 wessels Exp $
+ * $Id: refresh.cc,v 1.64 2004/11/06 22:20:47 hno Exp $
*
* DEBUG: section 22 Refresh Calculation
* AUTHOR: Harvest Derived
*
- * SQUID Internet Object Cache http://www.nlanr.net/Squid/
- * --------------------------------------------------------
+ * SQUID Web Proxy Cache http://www.squid-cache.org/
+ * ----------------------------------------------------------
*
- * Squid is the result of efforts by numerous individuals from the
- * Internet community. Development is led by Duane Wessels of the
- * National Laboratory for Applied Network Research and funded by
- * the National Science Foundation.
+ * Squid is the result of efforts by numerous individuals from
+ * the Internet community; see the CONTRIBUTORS file for full
+ * details. Many organizations have provided support for Squid's
+ * development; see the SPONSORS file for full details. Squid is
+ * Copyrighted (C) 2001 by the Regents of the University of
+ * California; see the COPYRIGHT file for full details. Squid
+ * incorporates software developed and/or copyrighted by other
+ * sources; see the CREDITS file for full details.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
*/
#ifndef USE_POSIX_REGEX
#endif
#include "squid.h"
+#include "Store.h"
+#include "MemObject.h"
+#include "HttpRequest.h"
+#include "HttpReply.h"
+
+typedef enum {
+ rcHTTP,
+ rcICP,
+#if USE_HTCP
+ rcHTCP,
+#endif
+#if USE_CACHE_DIGESTS
+ rcCDigest,
+#endif
+ rcStore,
+ rcCount
+} refreshCountsEnum;
+
+typedef struct
+{
+
+unsigned int expires:
+ 1;
+
+unsigned int min:
+ 1;
+
+unsigned int lmfactor:
+ 1;
+ unsigned int max;
+}
+
+stale_flags;
+
+/*
+ * This enumerated list assigns specific values, ala HTTP/FTP status
+ * codes. All Fresh codes are in the range 100-199 and all stale
+ * codes are 200-299. We might want to use these codes in logging,
+ * so best to keep them consistent over time.
+ */
+enum {
+ FRESH_REQUEST_MAX_STALE_ALL = 100,
+ FRESH_REQUEST_MAX_STALE_VALUE,
+ FRESH_EXPIRES,
+ FRESH_LMFACTOR_RULE,
+ FRESH_MIN_RULE,
+ FRESH_OVERRIDE_EXPIRES,
+ FRESH_OVERRIDE_LASTMOD,
+ STALE_MUST_REVALIDATE = 200,
+ STALE_RELOAD_INTO_IMS,
+ STALE_FORCED_RELOAD,
+ STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
+ STALE_EXPIRES,
+ STALE_MAX_RULE,
+ STALE_LMFACTOR_RULE,
+ STALE_DEFAULT = 299
+};
+
+static struct RefreshCounts
+{
+ const char *proto;
+ int total;
+ int status[STALE_DEFAULT + 1];
+}
+
+refreshCounts[rcCount];
/*
* Defaults:
* PCT 20%
* MAX 3 days
*/
-#define REFRESH_DEFAULT_MIN 0
-#define REFRESH_DEFAULT_PCT 20
-#define REFRESH_DEFAULT_MAX 259200
-
-typedef struct _refresh_t {
- char *pattern;
- regex_t compiled_pattern;
- time_t min;
- int pct;
- time_t max;
- struct _refresh_t *next;
-} refresh_t;
-
-static refresh_t *Refresh_tbl = NULL;
-static refresh_t *Refresh_tail = NULL;
+#define REFRESH_DEFAULT_MIN (time_t)0
+#define REFRESH_DEFAULT_PCT 0.20
+#define REFRESH_DEFAULT_MAX (time_t)259200
-static void
-refreshFreeList(refresh_t * t)
+static const refresh_t *refreshUncompiledPattern(const char *);
+static OBJH refreshStats;
+static int refreshStaleness(const StoreEntry *, time_t, time_t, const refresh_t *, stale_flags *);
+
+static refresh_t DefaultRefresh;
+
+const refresh_t *
+refreshLimits(const char *url)
{
- refresh_t *tnext;
+ const refresh_t *R;
- for (; t; t = tnext) {
- tnext = t->next;
- safe_free(t->pattern);
- regfree(&t->compiled_pattern);
- safe_free(t);
+ for (R = Config.Refresh; R; R = R->next) {
+ if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
+ return R;
}
-}
-void
-refreshFreeMemory(void)
-{
- refreshFreeList(Refresh_tbl);
- Refresh_tail = Refresh_tbl = NULL;
+ return NULL;
}
-void
-refreshAddToList(const char *pattern, int opts, time_t min, int pct, time_t max)
+static const refresh_t *
+refreshUncompiledPattern(const char *pat)
{
- refresh_t *t;
- regex_t comp;
- int flags = REG_EXTENDED;
- if (opts & REFRESH_ICASE)
- flags |= REG_ICASE;
- if (regcomp(&comp, pattern, flags) != REG_NOERROR) {
- debug(22, 0, "refreshAddToList: Invalid regular expression: %s\n",
- pattern);
- return;
+ const refresh_t *R;
+
+ for (R = Config.Refresh; R; R = R->next) {
+ if (0 == strcmp(R->pattern, pat))
+ return R;
}
- pct = pct < 0 ? 0 : pct;
- max = max < 0 ? 0 : max;
- t = xcalloc(1, sizeof(refresh_t));
- t->pattern = (char *) xstrdup(pattern);
- t->compiled_pattern = comp;
- t->min = min;
- t->pct = pct;
- t->max = max;
- t->next = NULL;
- if (!Refresh_tbl)
- Refresh_tbl = t;
- if (Refresh_tail)
- Refresh_tail->next = t;
- Refresh_tail = t;
+
+ return NULL;
}
/*
- * refreshCheck():
- * return 1 if its time to revalidate this entry, 0 otherwise
+ * Calculate how stale the response is (or will be at the check_time).
+ * Staleness calculation is based on the following: (1) response
+ * expiration time, (2) age greater than configured maximum, (3)
+ * last-modified factor, and (4) age less than configured minimum.
+ *
+ * If the response is fresh, return -1. Otherwise return its
+ * staleness. NOTE return value of 0 means the response is stale.
+ *
+ * The 'stale_flags' structure is used to tell the calling function
+ * _why_ this response is fresh or stale. Its used, for example,
+ * when the admin wants to override expiration and last-modified
+ * times.
*/
-int
-refreshCheck(const StoreEntry * entry, const request_t * request_unused)
+static int
+refreshStaleness(const StoreEntry * entry, time_t check_time, time_t age, const refresh_t * R, stale_flags * sf)
{
- refresh_t *R;
- time_t min = REFRESH_DEFAULT_MIN;
- int pct = REFRESH_DEFAULT_PCT;
- time_t max = REFRESH_DEFAULT_MAX;
- const char *pattern = ".";
- time_t age;
- int factor;
- debug(22, 3, "refreshCheck: '%s'\n", entry->url);
- for (R = Refresh_tbl; R; R = R->next) {
- if (regexec(&(R->compiled_pattern), entry->url, 0, 0, 0) != 0)
- continue;
- min = R->min;
- pct = R->pct;
- max = R->max;
- pattern = R->pattern;
- break;
+ /*
+ * Check for an explicit expiration time.
+ */
+
+ if (entry->expires > -1) {
+ sf->expires = 1;
+
+ if (entry->expires > check_time) {
+ debug(22, 3) ("FRESH: expires %d >= check_time %d \n",
+ (int) entry->expires, (int) check_time);
+ return -1;
+ } else {
+ debug(22, 3) ("STALE: expires %d < check_time %d \n",
+ (int) entry->expires, (int) check_time);
+ return (check_time - entry->expires);
+ }
}
- debug(22, 3, "refreshCheck: Matched '%s %d %d%% %d'\n",
- pattern, (int) min, pct, (int) max);
- age = squid_curtime - entry->timestamp;
- debug(22, 3, "refreshCheck: age = %d\n", (int) age);
- if (age <= min) {
- debug(22, 3, "refreshCheck: NO: age < min\n");
- return 0;
+
+ assert(age >= 0);
+ /*
+ * Use local heuristics to determine staleness. Start with the
+ * max age from the refresh_pattern rule.
+ */
+
+ if (age > R->max) {
+ debug(22, 3) ("STALE: age %d > max %d \n", (int) age, (int) R->max);
+ sf->max = 1;
+ return (age - R->max);
}
- if (-1 < entry->expires) {
- if (entry->expires <= squid_curtime) {
- debug(22, 3, "refreshCheck: YES: expires <= curtime\n");
- return 1;
- } else {
- debug(22, 3, "refreshCheck: NO: expires > curtime\n");
- return 0;
- }
+
+ /*
+ * Try the last-modified factor algorithm.
+ */
+ if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
+ /*
+ * stale_age is the Age of the response when it became/becomes
+ * stale according to the last-modified factor algorithm.
+ */
+ time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
+ sf->lmfactor = 1;
+
+ if (age >= stale_age) {
+ debug(22, 3) ("STALE: age %d > stale_age %d\n",
+ (int) age, (int) stale_age);
+ return (age - stale_age);
+ } else {
+ debug(22, 3) ("FRESH: age %d <= stale_age %d\n",
+ (int) age, (int) stale_age);
+ return -1;
+ }
}
- if (age > max) {
- debug(22, 3, "refreshCheck: YES: age > max\n");
- return 1;
+
+ /*
+ * If we are here, staleness is determined by the refresh_pattern
+ * configured minimum age.
+ */
+ if (age <= R->min) {
+ debug(22, 3) ("FRESH: age %d <= min %d\n", (int) age, (int) R->min);
+ sf->min = 1;
+ return -1;
}
- if (entry->timestamp <= entry->lastmod) {
- debug(22, 3, "refreshCheck: YES: lastvalid <= lastmod\n");
- return 1;
+
+ debug(22, 3) ("STALE: age %d > min %d\n", (int) age, (int) R->min);
+ return (age - R->min);
+}
+
+/* return 1 if the entry must be revalidated within delta seconds
+ * 0 otherwise
+ *
+ * note: request maybe null (e.g. for cache digests build)
+ */
+static int
+refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
+{
+ const refresh_t *R;
+ const char *uri = NULL;
+ time_t age = 0;
+ time_t check_time = squid_curtime + delta;
+ int staleness;
+ stale_flags sf;
+
+ if (entry->mem_obj)
+ uri = entry->mem_obj->url;
+ else if (request)
+ uri = urlCanonical(request);
+
+ debug(22, 3) ("refreshCheck: '%s'\n", uri ? uri : "<none>");
+
+ if (check_time > entry->timestamp)
+ age = check_time - entry->timestamp;
+
+ R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
+
+ if (NULL == R)
+ R = &DefaultRefresh;
+
+ memset(&sf, '\0', sizeof(sf));
+
+ staleness = refreshStaleness(entry, check_time, age, R, &sf);
+
+ debug(22, 3) ("Staleness = %d\n", staleness);
+
+ debug(22, 3) ("refreshCheck: Matched '%s %d %d%% %d'\n",
+ R->pattern, (int) R->min, (int) (100.0 * R->pct), (int) R->max);
+
+ debug(22, 3) ("refreshCheck: age = %d\n", (int) age);
+
+ debug(22, 3) ("\tcheck_time:\t%s\n", mkrfc1123(check_time));
+
+ debug(22, 3) ("\tentry->timestamp:\t%s\n", mkrfc1123(entry->timestamp));
+
+ if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1) {
+ debug(22, 3) ("refreshCheck: YES: Must revalidate stale response\n");
+ return STALE_MUST_REVALIDATE;
+ }
+
+ /* request-specific checks */
+ if (request) {
+ HttpHdrCc *cc = request->cache_control;
+#if HTTP_VIOLATIONS
+
+ if (!request->flags.nocache_hack) {
+ (void) 0;
+ } else if (R->flags.ignore_reload) {
+ /* The clients no-cache header is ignored */
+ debug(22, 3) ("refreshCheck: MAYBE: ignore-reload\n");
+ } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
+ /* The clients no-cache header is changed into a IMS query */
+ debug(22, 3) ("refreshCheck: YES: reload-into-ims\n");
+ return STALE_RELOAD_INTO_IMS;
+ } else {
+ /* The clients no-cache header is not overridden on this request */
+ debug(22, 3) ("refreshCheck: YES: client reload\n");
+ request->flags.nocache = 1;
+ return STALE_FORCED_RELOAD;
+ }
+
+#endif
+ if (NULL != cc) {
+ if (cc->max_age > -1) {
+#if HTTP_VIOLATIONS
+ if (R->flags.ignore_reload && cc->max_age == 0) {} else
+#endif
+ {
+#if 0
+
+ if (cc->max_age == 0) {
+ debug (22,3) ("refreshCheck: YES: client-max-age = 0\n");
+ return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
+ }
+
+#endif
+ if (age > cc->max_age) {
+ debug(22, 3) ("refreshCheck: YES: age > client-max-age\n");
+ return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
+ }
+ }
+ }
+
+ if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness > -1) {
+ if (cc->max_stale < 0) {
+ /* max-stale directive without a value */
+ debug(22, 3) ("refreshCheck: NO: max-stale wildcard\n");
+ return FRESH_REQUEST_MAX_STALE_ALL;
+ } else if (staleness < cc->max_stale) {
+ debug(22, 3) ("refreshCheck: NO: staleness < max-stale\n");
+ return FRESH_REQUEST_MAX_STALE_VALUE;
+ }
+ }
+ }
+ }
+
+ if (-1 == staleness) {
+ if (sf.expires)
+ return FRESH_EXPIRES;
+
+ assert(!sf.max);
+
+ if (sf.lmfactor)
+ return FRESH_LMFACTOR_RULE;
+
+ assert(sf.min);
+
+ return FRESH_MIN_RULE;
+ }
+
+ /*
+ * At this point the response is stale, unless one of
+ * the override options kicks in.
+ */
+ if (sf.expires) {
+#if HTTP_VIOLATIONS
+
+ if (R->flags.override_expire && age < R->min) {
+ debug(22, 3) ("refreshCheck: NO: age < min && override-expire\n");
+ return FRESH_OVERRIDE_EXPIRES;
+ }
+
+#endif
+ return STALE_EXPIRES;
}
- factor = 100 * age / (entry->timestamp - entry->lastmod);
- debug(22, 3, "refreshCheck: factor = %d\n", factor);
- if (factor > pct) {
- debug(22, 3, "refreshCheck: YES: factor > pc\n");
- return 1;
+
+ if (sf.max)
+ return STALE_MAX_RULE;
+
+ if (sf.lmfactor) {
+#if HTTP_VIOLATIONS
+
+ if (R->flags.override_lastmod && age < R->min) {
+ debug(22, 3) ("refreshCheck: NO: age < min && override-lastmod\n");
+ return FRESH_OVERRIDE_LASTMOD;
+ }
+
+#endif
+ return STALE_LMFACTOR_RULE;
}
- return 0;
+
+ return STALE_DEFAULT;
+}
+
+int
+refreshIsCachable(const StoreEntry * entry)
+{
+ /*
+ * Don't look at the request to avoid no-cache and other nuisances.
+ * the object should have a mem_obj so the URL will be found there.
+ * 60 seconds delta, to avoid objects which expire almost
+ * immediately, and which can't be refreshed.
+ */
+ /* For ESI, we use a delta of 0, as ESI objects typically can be
+ * refreshed, but the expiry may be low to enforce regular
+ * checks
+ */
+ int reason = refreshCheck(entry, NULL, ESI ? 0 : 60);
+ refreshCounts[rcStore].total++;
+ refreshCounts[rcStore].status[reason]++;
+
+ if (reason < 200)
+ /* Does not need refresh. This is certainly cachable */
+ return 1;
+
+ if (entry->lastmod < 0)
+ /* Last modified is needed to do a refresh */
+ return 0;
+
+ if (entry->mem_obj == NULL)
+ /* no mem_obj? */
+ return 1;
+
+ if (entry->getReply() == NULL)
+ /* no reply? */
+ return 1;
+
+ if (entry->getReply()->content_length == 0)
+ /* No use refreshing (caching?) 0 byte objects */
+ return 0;
+
+ /* This seems to be refreshable. Cache it */
+ return 1;
+}
+
+/* refreshCheck... functions below are protocol-specific wrappers around
+ * refreshCheck() function above */
+
+int
+refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
+{
+ int reason = refreshCheck(entry, request, 0);
+ refreshCounts[rcHTTP].total++;
+ refreshCounts[rcHTTP].status[reason]++;
+ return (reason < 200) ? 0 : 1;
+}
+
+int
+refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
+{
+ int reason = refreshCheck(entry, request, 30);
+ refreshCounts[rcICP].total++;
+ refreshCounts[rcICP].status[reason]++;
+ return (reason < 200) ? 0 : 1;
+}
+
+#if USE_HTCP
+int
+refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
+{
+ int reason = refreshCheck(entry, request, 10);
+ refreshCounts[rcHTCP].total++;
+ refreshCounts[rcHTCP].status[reason]++;
+ return (reason < 200) ? 0 : 1;
+}
+
+#endif
+
+#if USE_CACHE_DIGESTS
+int
+refreshCheckDigest(const StoreEntry * entry, time_t delta)
+{
+ int reason = refreshCheck(entry,
+ entry->mem_obj ? entry->mem_obj->request : NULL,
+ delta);
+ refreshCounts[rcCDigest].total++;
+ refreshCounts[rcCDigest].status[reason]++;
+ return (reason < 200) ? 0 : 1;
+}
+
+#endif
+
+time_t
+getMaxAge(const char *url)
+{
+ const refresh_t *R;
+ debug(22, 3) ("getMaxAge: '%s'\n", url);
+
+ if ((R = refreshLimits(url)))
+ return R->max;
+ else
+ return REFRESH_DEFAULT_MAX;
+}
+
+static void
+
+refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
+{
+ int sum = 0;
+ int tot = rc->total;
+
+ storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
+ storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
+
+#define refreshCountsStatsEntry(code,desc) { \
+ storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
+ rc->status[code], xpercent(rc->status[code], tot), desc); \
+ sum += rc->status[code]; \
+}
+
+ refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
+ "Fresh: request max-stale wildcard");
+ refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
+ "Fresh: request max-stale value");
+ refreshCountsStatsEntry(FRESH_EXPIRES,
+ "Fresh: expires time not reached");
+ refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
+ "Fresh: refresh_pattern last-mod factor percentage");
+ refreshCountsStatsEntry(FRESH_MIN_RULE,
+ "Fresh: refresh_pattern min value");
+ refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
+ "Fresh: refresh_pattern override expires");
+ refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
+ "Fresh: refresh_pattern override lastmod");
+ refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
+ "Stale: response has must-revalidate");
+ refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
+ "Stale: changed reload into IMS");
+ refreshCountsStatsEntry(STALE_FORCED_RELOAD,
+ "Stale: request has no-cache directive");
+ refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
+ "Stale: age exceeds request max-age value");
+ refreshCountsStatsEntry(STALE_EXPIRES,
+ "Stale: expires time reached");
+ refreshCountsStatsEntry(STALE_MAX_RULE,
+ "Stale: refresh_pattern max age rule");
+ refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
+ "Stale: refresh_pattern last-mod factor percentage");
+ refreshCountsStatsEntry(STALE_DEFAULT,
+ "Stale: by default");
+
+ tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
+ storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
+ rc->total, xpercent(rc->total, tot));
+ \
+ storeAppendPrintf(sentry, "\n");
+}
+
+static void
+refreshStats(StoreEntry * sentry)
+{
+ int i;
+ int total = 0;
+
+ /* get total usage count */
+
+ for (i = 0; i < rcCount; ++i)
+ total += refreshCounts[i].total;
+
+ /* protocol usage histogram */
+ storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
+
+ storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
+
+ for (i = 0; i < rcCount; ++i)
+ storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
+ refreshCounts[i].proto,
+ refreshCounts[i].total,
+ xpercent(refreshCounts[i].total, total));
+
+ /* per protocol histograms */
+ storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
+
+ for (i = 0; i < rcCount; ++i)
+ refreshCountsStats(sentry, &refreshCounts[i]);
+}
+
+void
+refreshInit(void)
+{
+ memset(refreshCounts, 0, sizeof(refreshCounts));
+ refreshCounts[rcHTTP].proto = "HTTP";
+ refreshCounts[rcICP].proto = "ICP";
+#if USE_HTCP
+
+ refreshCounts[rcHTCP].proto = "HTCP";
+#endif
+
+ refreshCounts[rcStore].proto = "On Store";
+#if USE_CACHE_DIGESTS
+
+ refreshCounts[rcCDigest].proto = "Cache Digests";
+#endif
+
+ cachemgrRegister("refresh",
+ "Refresh Algorithm Statistics",
+ refreshStats,
+ 0,
+ 1);
+ memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
+ DefaultRefresh.pattern = "<none>";
+ DefaultRefresh.min = REFRESH_DEFAULT_MIN;
+ DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
+ DefaultRefresh.max = REFRESH_DEFAULT_MAX;
}