]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Added max-swap-rate=swaps/sec option to Rock cache_dir.
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * $Id$
4 *
5 * DEBUG: section 22 Refresh Calculation
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #ifndef USE_POSIX_REGEX
37 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
38 #endif
39
40 #include "squid.h"
41 #include "HttpHdrCc.h"
42 #include "mgr/Registration.h"
43 #include "Store.h"
44 #include "MemObject.h"
45 #include "HttpRequest.h"
46 #include "HttpReply.h"
47 #include "SquidTime.h"
48
49 typedef enum {
50 rcHTTP,
51 rcICP,
52 #if USE_HTCP
53 rcHTCP,
54 #endif
55 #if USE_CACHE_DIGESTS
56 rcCDigest,
57 #endif
58 rcStore,
59 rcCount
60 } refreshCountsEnum;
61
62 typedef struct {
63 bool expires;
64 bool min;
65 bool lmfactor;
66 bool max;
67 } stale_flags;
68
69 /*
70 * This enumerated list assigns specific values, ala HTTP/FTP status
71 * codes. All Fresh codes are in the range 100-199 and all stale
72 * codes are 200-299. We might want to use these codes in logging,
73 * so best to keep them consistent over time.
74 */
75 enum {
76 FRESH_REQUEST_MAX_STALE_ALL = 100,
77 FRESH_REQUEST_MAX_STALE_VALUE,
78 FRESH_EXPIRES,
79 FRESH_LMFACTOR_RULE,
80 FRESH_MIN_RULE,
81 FRESH_OVERRIDE_EXPIRES,
82 FRESH_OVERRIDE_LASTMOD,
83 STALE_MUST_REVALIDATE = 200,
84 STALE_RELOAD_INTO_IMS,
85 STALE_FORCED_RELOAD,
86 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
87 STALE_EXPIRES,
88 STALE_MAX_RULE,
89 STALE_LMFACTOR_RULE,
90 STALE_MAX_STALE,
91 STALE_DEFAULT = 299
92 };
93
94 static struct RefreshCounts {
95 const char *proto;
96 int total;
97 int status[STALE_DEFAULT + 1];
98 }
99
100 refreshCounts[rcCount];
101
102 /*
103 * Defaults:
104 * MIN NONE
105 * PCT 20%
106 * MAX 3 days
107 */
108 #define REFRESH_DEFAULT_MIN (time_t)0
109 #define REFRESH_DEFAULT_PCT 0.20
110 #define REFRESH_DEFAULT_MAX (time_t)259200
111
112 static const refresh_t *refreshUncompiledPattern(const char *);
113 static OBJH refreshStats;
114 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf);
115
116 static refresh_t DefaultRefresh;
117
118 const refresh_t *
119 refreshLimits(const char *url)
120 {
121 const refresh_t *R;
122
123 for (R = Config.Refresh; R; R = R->next) {
124 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
125 return R;
126 }
127
128 return NULL;
129 }
130
131 static const refresh_t *
132 refreshUncompiledPattern(const char *pat)
133 {
134 const refresh_t *R;
135
136 for (R = Config.Refresh; R; R = R->next) {
137 if (0 == strcmp(R->pattern, pat))
138 return R;
139 }
140
141 return NULL;
142 }
143
144 /**
145 * Calculate how stale the response is (or will be at the check_time).
146 * Staleness calculation is based on the following: (1) response
147 * expiration time, (2) age greater than configured maximum, (3)
148 * last-modified factor, and (4) age less than configured minimum.
149 *
150 * \retval -1 If the response is fresh.
151 * \retval >0 Otherwise return it's staleness.
152 * \retval 0 NOTE return value of 0 means the response is stale.
153 *
154 * The 'stale_flags' structure is used to tell the calling function
155 * _why_ this response is fresh or stale. Its used, for example,
156 * when the admin wants to override expiration and last-modified
157 * times.
158 */
159 static int
160 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf)
161 {
162 /** \par
163 * Check for an explicit expiration time (Expires: header).
164 */
165 if (entry->expires > -1) {
166 sf->expires = true;
167
168 if (entry->expires > check_time) {
169 debugs(22, 3, "FRESH: expires " << entry->expires <<
170 " >= check_time " << check_time << " ");
171
172 return -1;
173 } else {
174 debugs(22, 3, "STALE: expires " << entry->expires <<
175 " < check_time " << check_time << " ");
176
177 return (check_time - entry->expires);
178 }
179 }
180
181 /** \par
182 * Use local heuristics to determine staleness. Start with the
183 * max age from the refresh_pattern rule.
184 */
185 if (age > R->max) {
186 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
187 sf->max = true;
188 return (age - R->max);
189 }
190
191 /** \par
192 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
193 */
194 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
195 /*
196 * stale_age is the Age of the response when it became/becomes
197 * stale according to the last-modified factor algorithm.
198 */
199 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
200 sf->lmfactor = true;
201
202 if (age >= stale_age) {
203 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
204 return (age - stale_age);
205 } else {
206 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
207 return -1;
208 }
209 }
210
211 /** \par
212 * Finally, if all else fails; staleness is determined by the refresh_pattern
213 * configured minimum age.
214 */
215 if (age < R->min) {
216 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
217 sf->min = true;
218 return -1;
219 }
220
221 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
222 return (age - R->min);
223 }
224
225 /**
226 * \retval 1 if the entry must be revalidated within delta seconds
227 * \retval 0 otherwise
228 *
229 * note: request maybe null (e.g. for cache digests build)
230 */
231 static int
232 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
233 {
234 const refresh_t *R;
235 const char *uri = NULL;
236 time_t age = 0;
237 time_t check_time = squid_curtime + delta;
238 int staleness;
239 stale_flags sf;
240
241 if (entry->mem_obj)
242 uri = entry->mem_obj->url;
243 else if (request)
244 uri = urlCanonical(request);
245
246 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
247
248 if (check_time > entry->timestamp)
249 age = check_time - entry->timestamp;
250
251 // FIXME: what to do when age < 0 or counter overflow?
252 assert(age >= 0);
253
254 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
255
256 if (NULL == R)
257 R = &DefaultRefresh;
258
259 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
260 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
261 (int) R->max << "'");
262
263 debugs(22, 3, "\tage:\t" << age);
264
265 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
266
267 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
268
269 if (request && !request->flags.ignore_cc) {
270 const HttpHdrCc *const cc = request->cache_control;
271 if (cc && cc->hasMinFresh()) {
272 const int32_t minFresh=cc->minFresh();
273 debugs(22, 3, "\tage + min-fresh:\t" << age << " + " <<
274 minFresh << " = " << age + minFresh);
275 debugs(22, 3, "\tcheck_time + min-fresh:\t" << check_time << " + "
276 << minFresh << " = " <<
277 mkrfc1123(check_time + minFresh));
278 age += minFresh;
279 check_time += minFresh;
280 }
281 }
282
283 memset(&sf, '\0', sizeof(sf));
284
285 staleness = refreshStaleness(entry, check_time, age, R, &sf);
286
287 debugs(22, 3, "Staleness = " << staleness);
288
289 // stale-if-error requires any failure be passed thru when its period is over.
290 if (request && entry->mem_obj && entry->mem_obj->getReply() && entry->mem_obj->getReply()->cache_control &&
291 entry->mem_obj->getReply()->cache_control->hasStaleIfError() &&
292 entry->mem_obj->getReply()->cache_control->staleIfError() < staleness) {
293
294 debugs(22, 3, "refreshCheck: stale-if-error period expired.");
295 request->flags.fail_on_validation_err = 1;
296 }
297
298 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
299 #if USE_HTTP_VIOLATIONS
300 && !R->flags.ignore_must_revalidate
301 #endif
302 ) {
303 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
304 if (request)
305 request->flags.fail_on_validation_err = 1;
306 return STALE_MUST_REVALIDATE;
307 }
308
309 /* request-specific checks */
310 if (request && !request->flags.ignore_cc) {
311 HttpHdrCc *cc = request->cache_control;
312
313 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
314 /* The clients no-cache header is changed into a IMS query */
315 debugs(22, 3, "refreshCheck: YES: refresh-ims");
316 return STALE_FORCED_RELOAD;
317 }
318
319 #if USE_HTTP_VIOLATIONS
320
321 if (!request->flags.nocache_hack) {
322 (void) 0;
323 } else if (R->flags.ignore_reload) {
324 /* The clients no-cache header is ignored */
325 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
326 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
327 /* The clients no-cache header is changed into a IMS query */
328 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
329 return STALE_RELOAD_INTO_IMS;
330 } else {
331 /* The clients no-cache header is not overridden on this request */
332 debugs(22, 3, "refreshCheck: YES: client reload");
333 request->flags.nocache = 1;
334 return STALE_FORCED_RELOAD;
335 }
336
337 #endif
338 if (NULL != cc) {
339 if (cc->hasMaxAge()) {
340 #if USE_HTTP_VIOLATIONS
341 if (R->flags.ignore_reload && cc->maxAge() == 0) {
342 debugs(22, 3, "refreshCheck: MAYBE: client-max-age = 0 and ignore-reload");
343 } else
344 #endif
345 {
346 if (cc->maxAge() == 0) {
347 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
348 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
349 }
350
351 if (age > cc->maxAge()) {
352 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
353 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
354 }
355 }
356 }
357
358 if (cc->hasMaxStale() && staleness > -1) {
359 if (cc->maxStale()==HttpHdrCc::MAX_STALE_ANY) {
360 /* max-stale directive without a value */
361 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
362 return FRESH_REQUEST_MAX_STALE_ALL;
363 } else if (staleness < cc->maxStale()) {
364 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
365 return FRESH_REQUEST_MAX_STALE_VALUE;
366 }
367 }
368 }
369 }
370
371 if (-1 == staleness) {
372 debugs(22, 3, "refreshCheck: object isn't stale..");
373 if (sf.expires) {
374 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
375 return FRESH_EXPIRES;
376 }
377
378 assert(!sf.max);
379
380 if (sf.lmfactor) {
381 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
382 return FRESH_LMFACTOR_RULE;
383 }
384
385 assert(sf.min);
386
387 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
388 return FRESH_MIN_RULE;
389 }
390
391 /*
392 * At this point the response is stale, unless one of
393 * the override options kicks in.
394 * NOTE: max-stale config blocks the overrides.
395 */
396 int max_stale = (R->max_stale >= 0 ? R->max_stale : Config.maxStale);
397 if ( max_stale >= 0 && staleness > max_stale) {
398 debugs(22, 3, "refreshCheck: YES: max-stale limit");
399 if (request)
400 request->flags.fail_on_validation_err = 1;
401 return STALE_MAX_STALE;
402 }
403
404 if (sf.expires) {
405 #if USE_HTTP_VIOLATIONS
406
407 if (R->flags.override_expire && age < R->min) {
408 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
409 return FRESH_OVERRIDE_EXPIRES;
410 }
411
412 #endif
413 return STALE_EXPIRES;
414 }
415
416 if (sf.max)
417 return STALE_MAX_RULE;
418
419 if (sf.lmfactor) {
420 #if USE_HTTP_VIOLATIONS
421
422 if (R->flags.override_lastmod && age < R->min) {
423 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
424 return FRESH_OVERRIDE_LASTMOD;
425 }
426
427 #endif
428 return STALE_LMFACTOR_RULE;
429 }
430
431 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
432 return STALE_DEFAULT;
433 }
434
435 int
436 refreshIsCachable(const StoreEntry * entry)
437 {
438 /*
439 * Don't look at the request to avoid no-cache and other nuisances.
440 * the object should have a mem_obj so the URL will be found there.
441 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
442 * avoid objects which expire almost immediately, and which can't
443 * be refreshed.
444 */
445 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
446 refreshCounts[rcStore].total++;
447 refreshCounts[rcStore].status[reason]++;
448
449 if (reason < STALE_MUST_REVALIDATE)
450 /* Does not need refresh. This is certainly cachable */
451 return 1;
452
453 if (entry->lastmod < 0)
454 /* Last modified is needed to do a refresh */
455 return 0;
456
457 if (entry->mem_obj == NULL)
458 /* no mem_obj? */
459 return 1;
460
461 if (entry->getReply() == NULL)
462 /* no reply? */
463 return 1;
464
465 if (entry->getReply()->content_length == 0)
466 /* No use refreshing (caching?) 0 byte objects */
467 return 0;
468
469 /* This seems to be refreshable. Cache it */
470 return 1;
471 }
472
473 /// whether reply is stale if it is a hit
474 static bool
475 refreshIsStaleIfHit(const int reason)
476 {
477 switch (reason) {
478 case FRESH_MIN_RULE:
479 case FRESH_LMFACTOR_RULE:
480 case FRESH_EXPIRES:
481 return false;
482 default:
483 return true;
484 }
485 }
486
487 /* refreshCheck... functions below are protocol-specific wrappers around
488 * refreshCheck() function above */
489
490 int
491 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
492 {
493 int reason = refreshCheck(entry, request, 0);
494 refreshCounts[rcHTTP].total++;
495 refreshCounts[rcHTTP].status[reason]++;
496 request->flags.stale_if_hit = refreshIsStaleIfHit(reason);
497 return (Config.onoff.offline || reason < 200) ? 0 : 1;
498 }
499
500 int
501 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
502 {
503 int reason = refreshCheck(entry, request, 30);
504 refreshCounts[rcICP].total++;
505 refreshCounts[rcICP].status[reason]++;
506 return (reason < 200) ? 0 : 1;
507 }
508
509 #if USE_HTCP
510 int
511 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
512 {
513 int reason = refreshCheck(entry, request, 10);
514 refreshCounts[rcHTCP].total++;
515 refreshCounts[rcHTCP].status[reason]++;
516 return (reason < 200) ? 0 : 1;
517 }
518
519 #endif
520
521 #if USE_CACHE_DIGESTS
522 int
523 refreshCheckDigest(const StoreEntry * entry, time_t delta)
524 {
525 int reason = refreshCheck(entry,
526 entry->mem_obj ? entry->mem_obj->request : NULL,
527 delta);
528 refreshCounts[rcCDigest].total++;
529 refreshCounts[rcCDigest].status[reason]++;
530 return (reason < 200) ? 0 : 1;
531 }
532
533 #endif
534
535 time_t
536 getMaxAge(const char *url)
537 {
538 const refresh_t *R;
539 debugs(22, 3, "getMaxAge: '" << url << "'");
540
541 if ((R = refreshLimits(url)))
542 return R->max;
543 else
544 return REFRESH_DEFAULT_MAX;
545 }
546
547 static void
548
549 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
550 {
551 int sum = 0;
552 int tot = rc->total;
553
554 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
555 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
556
557 #define refreshCountsStatsEntry(code,desc) { \
558 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
559 rc->status[code], xpercent(rc->status[code], tot), desc); \
560 sum += rc->status[code]; \
561 }
562
563 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
564 "Fresh: request max-stale wildcard");
565 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
566 "Fresh: request max-stale value");
567 refreshCountsStatsEntry(FRESH_EXPIRES,
568 "Fresh: expires time not reached");
569 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
570 "Fresh: refresh_pattern last-mod factor percentage");
571 refreshCountsStatsEntry(FRESH_MIN_RULE,
572 "Fresh: refresh_pattern min value");
573 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
574 "Fresh: refresh_pattern override expires");
575 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
576 "Fresh: refresh_pattern override lastmod");
577 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
578 "Stale: response has must-revalidate");
579 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
580 "Stale: changed reload into IMS");
581 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
582 "Stale: request has no-cache directive");
583 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
584 "Stale: age exceeds request max-age value");
585 refreshCountsStatsEntry(STALE_EXPIRES,
586 "Stale: expires time reached");
587 refreshCountsStatsEntry(STALE_MAX_RULE,
588 "Stale: refresh_pattern max age rule");
589 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
590 "Stale: refresh_pattern last-mod factor percentage");
591 refreshCountsStatsEntry(STALE_DEFAULT,
592 "Stale: by default");
593
594 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
595 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
596 rc->total, xpercent(rc->total, tot));
597 \
598 storeAppendPrintf(sentry, "\n");
599 }
600
601 static void
602 refreshStats(StoreEntry * sentry)
603 {
604 int i;
605 int total = 0;
606
607 /* get total usage count */
608
609 for (i = 0; i < rcCount; ++i)
610 total += refreshCounts[i].total;
611
612 /* protocol usage histogram */
613 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
614
615 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
616
617 for (i = 0; i < rcCount; ++i)
618 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
619 refreshCounts[i].proto,
620 refreshCounts[i].total,
621 xpercent(refreshCounts[i].total, total));
622
623 /* per protocol histograms */
624 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
625
626 for (i = 0; i < rcCount; ++i)
627 refreshCountsStats(sentry, &refreshCounts[i]);
628 }
629
630 static void
631 refreshRegisterWithCacheManager(void)
632 {
633 Mgr::RegisterAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
634 }
635
636 void
637 refreshInit(void)
638 {
639 memset(refreshCounts, 0, sizeof(refreshCounts));
640 refreshCounts[rcHTTP].proto = "HTTP";
641 refreshCounts[rcICP].proto = "ICP";
642 #if USE_HTCP
643
644 refreshCounts[rcHTCP].proto = "HTCP";
645 #endif
646
647 refreshCounts[rcStore].proto = "On Store";
648 #if USE_CACHE_DIGESTS
649
650 refreshCounts[rcCDigest].proto = "Cache Digests";
651 #endif
652
653 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
654 DefaultRefresh.pattern = "<none>";
655 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
656 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
657 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
658
659 refreshRegisterWithCacheManager();
660 }