]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Author: Henrik Nordstrom <hno@squid-cache.org>
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * $Id$
4 *
5 * DEBUG: section 22 Refresh Calculation
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #ifndef USE_POSIX_REGEX
37 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
38 #endif
39
40 #include "squid.h"
41 #include "mgr/Registration.h"
42 #include "Store.h"
43 #include "MemObject.h"
44 #include "HttpRequest.h"
45 #include "HttpReply.h"
46 #include "SquidTime.h"
47
48 typedef enum {
49 rcHTTP,
50 rcICP,
51 #if USE_HTCP
52 rcHTCP,
53 #endif
54 #if USE_CACHE_DIGESTS
55 rcCDigest,
56 #endif
57 rcStore,
58 rcCount
59 } refreshCountsEnum;
60
61 typedef struct {
62 bool expires;
63 bool min;
64 bool lmfactor;
65 bool max;
66 } stale_flags;
67
68 /*
69 * This enumerated list assigns specific values, ala HTTP/FTP status
70 * codes. All Fresh codes are in the range 100-199 and all stale
71 * codes are 200-299. We might want to use these codes in logging,
72 * so best to keep them consistent over time.
73 */
74 enum {
75 FRESH_REQUEST_MAX_STALE_ALL = 100,
76 FRESH_REQUEST_MAX_STALE_VALUE,
77 FRESH_EXPIRES,
78 FRESH_LMFACTOR_RULE,
79 FRESH_MIN_RULE,
80 FRESH_OVERRIDE_EXPIRES,
81 FRESH_OVERRIDE_LASTMOD,
82 STALE_MUST_REVALIDATE = 200,
83 STALE_RELOAD_INTO_IMS,
84 STALE_FORCED_RELOAD,
85 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
86 STALE_EXPIRES,
87 STALE_MAX_RULE,
88 STALE_LMFACTOR_RULE,
89 STALE_MAX_STALE,
90 STALE_DEFAULT = 299
91 };
92
93 static struct RefreshCounts {
94 const char *proto;
95 int total;
96 int status[STALE_DEFAULT + 1];
97 }
98
99 refreshCounts[rcCount];
100
101 /*
102 * Defaults:
103 * MIN NONE
104 * PCT 20%
105 * MAX 3 days
106 */
107 #define REFRESH_DEFAULT_MIN (time_t)0
108 #define REFRESH_DEFAULT_PCT 0.20
109 #define REFRESH_DEFAULT_MAX (time_t)259200
110
111 static const refresh_t *refreshUncompiledPattern(const char *);
112 static OBJH refreshStats;
113 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf);
114
115 static refresh_t DefaultRefresh;
116
117 const refresh_t *
118 refreshLimits(const char *url)
119 {
120 const refresh_t *R;
121
122 for (R = Config.Refresh; R; R = R->next) {
123 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
124 return R;
125 }
126
127 return NULL;
128 }
129
130 static const refresh_t *
131 refreshUncompiledPattern(const char *pat)
132 {
133 const refresh_t *R;
134
135 for (R = Config.Refresh; R; R = R->next) {
136 if (0 == strcmp(R->pattern, pat))
137 return R;
138 }
139
140 return NULL;
141 }
142
143 /**
144 * Calculate how stale the response is (or will be at the check_time).
145 * Staleness calculation is based on the following: (1) response
146 * expiration time, (2) age greater than configured maximum, (3)
147 * last-modified factor, and (4) age less than configured minimum.
148 *
149 * \retval -1 If the response is fresh.
150 * \retval >0 Otherwise return it's staleness.
151 * \retval 0 NOTE return value of 0 means the response is stale.
152 *
153 * The 'stale_flags' structure is used to tell the calling function
154 * _why_ this response is fresh or stale. Its used, for example,
155 * when the admin wants to override expiration and last-modified
156 * times.
157 */
158 static int
159 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf)
160 {
161 /** \par
162 * Check for an explicit expiration time (Expires: header).
163 */
164 if (entry->expires > -1) {
165 sf->expires = true;
166
167 if (entry->expires > check_time) {
168 debugs(22, 3, "FRESH: expires " << entry->expires <<
169 " >= check_time " << check_time << " ");
170
171 return -1;
172 } else {
173 debugs(22, 3, "STALE: expires " << entry->expires <<
174 " < check_time " << check_time << " ");
175
176 return (check_time - entry->expires);
177 }
178 }
179
180 /** \par
181 * Use local heuristics to determine staleness. Start with the
182 * max age from the refresh_pattern rule.
183 */
184 if (age > R->max) {
185 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
186 sf->max = true;
187 return (age - R->max);
188 }
189
190 /** \par
191 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
192 */
193 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
194 /*
195 * stale_age is the Age of the response when it became/becomes
196 * stale according to the last-modified factor algorithm.
197 */
198 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
199 sf->lmfactor = true;
200
201 if (age >= stale_age) {
202 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
203 return (age - stale_age);
204 } else {
205 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
206 return -1;
207 }
208 }
209
210 /** \par
211 * Finally, if all else fails; staleness is determined by the refresh_pattern
212 * configured minimum age.
213 */
214 if (age < R->min) {
215 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
216 sf->min = true;
217 return -1;
218 }
219
220 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
221 return (age - R->min);
222 }
223
224 /**
225 * \retval 1 if the entry must be revalidated within delta seconds
226 * \retval 0 otherwise
227 *
228 * note: request maybe null (e.g. for cache digests build)
229 */
230 static int
231 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
232 {
233 const refresh_t *R;
234 const char *uri = NULL;
235 time_t age = 0;
236 time_t check_time = squid_curtime + delta;
237 int staleness;
238 stale_flags sf;
239
240 if (entry->mem_obj)
241 uri = entry->mem_obj->url;
242 else if (request)
243 uri = urlCanonical(request);
244
245 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
246
247 if (check_time > entry->timestamp)
248 age = check_time - entry->timestamp;
249
250 // FIXME: what to do when age < 0 or counter overflow?
251 assert(age >= 0);
252
253 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
254
255 if (NULL == R)
256 R = &DefaultRefresh;
257
258 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
259 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
260 (int) R->max << "'");
261
262 debugs(22, 3, "\tage:\t" << age);
263
264 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
265
266 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
267
268 if (request && !request->flags.ignore_cc) {
269 const HttpHdrCc *const cc = request->cache_control;
270 if (cc && cc->min_fresh > 0) {
271 debugs(22, 3, "\tage + min-fresh:\t" << age << " + " <<
272 cc->min_fresh << " = " << age + cc->min_fresh);
273 debugs(22, 3, "\tcheck_time + min-fresh:\t" << check_time << " + "
274 << cc->min_fresh << " = " <<
275 mkrfc1123(check_time + cc->min_fresh));
276 age += cc->min_fresh;
277 check_time += cc->min_fresh;
278 }
279 }
280
281 memset(&sf, '\0', sizeof(sf));
282
283 staleness = refreshStaleness(entry, check_time, age, R, &sf);
284
285 debugs(22, 3, "Staleness = " << staleness);
286
287 // stale-if-error requires any failure be passed thru when its period is over.
288 if (request && entry->mem_obj && entry->mem_obj->getReply() && entry->mem_obj->getReply()->cache_control &&
289 EBIT_TEST(entry->mem_obj->getReply()->cache_control->mask, CC_STALE_IF_ERROR) &&
290 entry->mem_obj->getReply()->cache_control->stale_if_error < staleness) {
291
292 debugs(22, 3, "refreshCheck: stale-if-error period expired.");
293 request->flags.fail_on_validation_err = 1;
294 }
295
296 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
297 #if USE_HTTP_VIOLATIONS
298 && !R->flags.ignore_must_revalidate
299 #endif
300 ) {
301 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
302 if (request)
303 request->flags.fail_on_validation_err = 1;
304 return STALE_MUST_REVALIDATE;
305 }
306
307 /* request-specific checks */
308 if (request && !request->flags.ignore_cc) {
309 HttpHdrCc *cc = request->cache_control;
310
311 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
312 /* The clients no-cache header is changed into a IMS query */
313 debugs(22, 3, "refreshCheck: YES: refresh-ims");
314 return STALE_FORCED_RELOAD;
315 }
316
317 #if USE_HTTP_VIOLATIONS
318
319 if (!request->flags.nocache_hack) {
320 (void) 0;
321 } else if (R->flags.ignore_reload) {
322 /* The clients no-cache header is ignored */
323 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
324 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
325 /* The clients no-cache header is changed into a IMS query */
326 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
327 return STALE_RELOAD_INTO_IMS;
328 } else {
329 /* The clients no-cache header is not overridden on this request */
330 debugs(22, 3, "refreshCheck: YES: client reload");
331 request->flags.nocache = 1;
332 return STALE_FORCED_RELOAD;
333 }
334
335 #endif
336 if (NULL != cc) {
337 if (cc->max_age > -1) {
338 #if USE_HTTP_VIOLATIONS
339 if (R->flags.ignore_reload && cc->max_age == 0) {
340 debugs(22, 3, "refreshCheck: MAYBE: client-max-age = 0 and ignore-reload");
341 } else
342 #endif
343 {
344 if (cc->max_age == 0) {
345 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
346 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
347 }
348
349 if (age > cc->max_age) {
350 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
351 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
352 }
353 }
354 }
355
356 if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness > -1) {
357 if (cc->max_stale < 0) {
358 /* max-stale directive without a value */
359 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
360 return FRESH_REQUEST_MAX_STALE_ALL;
361 } else if (staleness < cc->max_stale) {
362 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
363 return FRESH_REQUEST_MAX_STALE_VALUE;
364 }
365 }
366 }
367 }
368
369 if (-1 == staleness) {
370 debugs(22, 3, "refreshCheck: object isn't stale..");
371 if (sf.expires) {
372 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
373 return FRESH_EXPIRES;
374 }
375
376 assert(!sf.max);
377
378 if (sf.lmfactor) {
379 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
380 return FRESH_LMFACTOR_RULE;
381 }
382
383 assert(sf.min);
384
385 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
386 return FRESH_MIN_RULE;
387 }
388
389 /*
390 * At this point the response is stale, unless one of
391 * the override options kicks in.
392 * NOTE: max-stale config blocks the overrides.
393 */
394 int max_stale = (R->max_stale >= 0 ? R->max_stale : Config.maxStale);
395 if ( max_stale >= 0 && staleness < max_stale) {
396 debugs(22, 3, "refreshCheck: YES: max-stale limit");
397 if (request)
398 request->flags.fail_on_validation_err = 1;
399 return STALE_MAX_STALE;
400 }
401
402 if (sf.expires) {
403 #if USE_HTTP_VIOLATIONS
404
405 if (R->flags.override_expire && age < R->min) {
406 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
407 return FRESH_OVERRIDE_EXPIRES;
408 }
409
410 #endif
411 return STALE_EXPIRES;
412 }
413
414 if (sf.max)
415 return STALE_MAX_RULE;
416
417 if (sf.lmfactor) {
418 #if USE_HTTP_VIOLATIONS
419
420 if (R->flags.override_lastmod && age < R->min) {
421 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
422 return FRESH_OVERRIDE_LASTMOD;
423 }
424
425 #endif
426 return STALE_LMFACTOR_RULE;
427 }
428
429 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
430 return STALE_DEFAULT;
431 }
432
433 int
434 refreshIsCachable(const StoreEntry * entry)
435 {
436 /*
437 * Don't look at the request to avoid no-cache and other nuisances.
438 * the object should have a mem_obj so the URL will be found there.
439 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
440 * avoid objects which expire almost immediately, and which can't
441 * be refreshed.
442 */
443 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
444 refreshCounts[rcStore].total++;
445 refreshCounts[rcStore].status[reason]++;
446
447 if (reason < STALE_MUST_REVALIDATE)
448 /* Does not need refresh. This is certainly cachable */
449 return 1;
450
451 if (entry->lastmod < 0)
452 /* Last modified is needed to do a refresh */
453 return 0;
454
455 if (entry->mem_obj == NULL)
456 /* no mem_obj? */
457 return 1;
458
459 if (entry->getReply() == NULL)
460 /* no reply? */
461 return 1;
462
463 if (entry->getReply()->content_length == 0)
464 /* No use refreshing (caching?) 0 byte objects */
465 return 0;
466
467 /* This seems to be refreshable. Cache it */
468 return 1;
469 }
470
471 /// whether reply is stale if it is a hit
472 static bool
473 refreshIsStaleIfHit(const int reason)
474 {
475 switch (reason) {
476 case FRESH_MIN_RULE:
477 case FRESH_LMFACTOR_RULE:
478 case FRESH_EXPIRES:
479 return false;
480 default:
481 return true;
482 }
483 }
484
485 /* refreshCheck... functions below are protocol-specific wrappers around
486 * refreshCheck() function above */
487
488 int
489 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
490 {
491 int reason = refreshCheck(entry, request, 0);
492 refreshCounts[rcHTTP].total++;
493 refreshCounts[rcHTTP].status[reason]++;
494 request->flags.stale_if_hit = refreshIsStaleIfHit(reason);
495 return (Config.onoff.offline || reason < 200) ? 0 : 1;
496 }
497
498 int
499 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
500 {
501 int reason = refreshCheck(entry, request, 30);
502 refreshCounts[rcICP].total++;
503 refreshCounts[rcICP].status[reason]++;
504 return (reason < 200) ? 0 : 1;
505 }
506
507 #if USE_HTCP
508 int
509 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
510 {
511 int reason = refreshCheck(entry, request, 10);
512 refreshCounts[rcHTCP].total++;
513 refreshCounts[rcHTCP].status[reason]++;
514 return (reason < 200) ? 0 : 1;
515 }
516
517 #endif
518
519 #if USE_CACHE_DIGESTS
520 int
521 refreshCheckDigest(const StoreEntry * entry, time_t delta)
522 {
523 int reason = refreshCheck(entry,
524 entry->mem_obj ? entry->mem_obj->request : NULL,
525 delta);
526 refreshCounts[rcCDigest].total++;
527 refreshCounts[rcCDigest].status[reason]++;
528 return (reason < 200) ? 0 : 1;
529 }
530
531 #endif
532
533 time_t
534 getMaxAge(const char *url)
535 {
536 const refresh_t *R;
537 debugs(22, 3, "getMaxAge: '" << url << "'");
538
539 if ((R = refreshLimits(url)))
540 return R->max;
541 else
542 return REFRESH_DEFAULT_MAX;
543 }
544
545 static void
546
547 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
548 {
549 int sum = 0;
550 int tot = rc->total;
551
552 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
553 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
554
555 #define refreshCountsStatsEntry(code,desc) { \
556 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
557 rc->status[code], xpercent(rc->status[code], tot), desc); \
558 sum += rc->status[code]; \
559 }
560
561 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
562 "Fresh: request max-stale wildcard");
563 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
564 "Fresh: request max-stale value");
565 refreshCountsStatsEntry(FRESH_EXPIRES,
566 "Fresh: expires time not reached");
567 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
568 "Fresh: refresh_pattern last-mod factor percentage");
569 refreshCountsStatsEntry(FRESH_MIN_RULE,
570 "Fresh: refresh_pattern min value");
571 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
572 "Fresh: refresh_pattern override expires");
573 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
574 "Fresh: refresh_pattern override lastmod");
575 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
576 "Stale: response has must-revalidate");
577 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
578 "Stale: changed reload into IMS");
579 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
580 "Stale: request has no-cache directive");
581 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
582 "Stale: age exceeds request max-age value");
583 refreshCountsStatsEntry(STALE_EXPIRES,
584 "Stale: expires time reached");
585 refreshCountsStatsEntry(STALE_MAX_RULE,
586 "Stale: refresh_pattern max age rule");
587 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
588 "Stale: refresh_pattern last-mod factor percentage");
589 refreshCountsStatsEntry(STALE_DEFAULT,
590 "Stale: by default");
591
592 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
593 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
594 rc->total, xpercent(rc->total, tot));
595 \
596 storeAppendPrintf(sentry, "\n");
597 }
598
599 static void
600 refreshStats(StoreEntry * sentry)
601 {
602 int i;
603 int total = 0;
604
605 /* get total usage count */
606
607 for (i = 0; i < rcCount; ++i)
608 total += refreshCounts[i].total;
609
610 /* protocol usage histogram */
611 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
612
613 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
614
615 for (i = 0; i < rcCount; ++i)
616 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
617 refreshCounts[i].proto,
618 refreshCounts[i].total,
619 xpercent(refreshCounts[i].total, total));
620
621 /* per protocol histograms */
622 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
623
624 for (i = 0; i < rcCount; ++i)
625 refreshCountsStats(sentry, &refreshCounts[i]);
626 }
627
628 static void
629 refreshRegisterWithCacheManager(void)
630 {
631 Mgr::RegisterAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
632 }
633
634 void
635 refreshInit(void)
636 {
637 memset(refreshCounts, 0, sizeof(refreshCounts));
638 refreshCounts[rcHTTP].proto = "HTTP";
639 refreshCounts[rcICP].proto = "ICP";
640 #if USE_HTCP
641
642 refreshCounts[rcHTCP].proto = "HTCP";
643 #endif
644
645 refreshCounts[rcStore].proto = "On Store";
646 #if USE_CACHE_DIGESTS
647
648 refreshCounts[rcCDigest].proto = "Cache Digests";
649 #endif
650
651 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
652 DefaultRefresh.pattern = "<none>";
653 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
654 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
655 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
656
657 refreshRegisterWithCacheManager();
658 }