]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Removed CVS $ markers
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * DEBUG: section 22 Refresh Calculation
4 * AUTHOR: Harvest Derived
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #ifndef USE_POSIX_REGEX
35 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
36 #endif
37
38 #include "squid.h"
39 #include "mgr/Registration.h"
40 #include "HttpHdrCc.h"
41 #include "HttpRequest.h"
42 #include "HttpReply.h"
43 #include "MemObject.h"
44 #include "SquidTime.h"
45 #include "Store.h"
46 #include "URL.h"
47
48 typedef enum {
49 rcHTTP,
50 rcICP,
51 #if USE_HTCP
52 rcHTCP,
53 #endif
54 #if USE_CACHE_DIGESTS
55 rcCDigest,
56 #endif
57 rcStore,
58 rcCount
59 } refreshCountsEnum;
60
61 typedef struct {
62 bool expires;
63 bool min;
64 bool lmfactor;
65 bool max;
66 } stale_flags;
67
68 /*
69 * This enumerated list assigns specific values, ala HTTP/FTP status
70 * codes. All Fresh codes are in the range 100-199 and all stale
71 * codes are 200-299. We might want to use these codes in logging,
72 * so best to keep them consistent over time.
73 */
74 enum {
75 FRESH_REQUEST_MAX_STALE_ALL = 100,
76 FRESH_REQUEST_MAX_STALE_VALUE,
77 FRESH_EXPIRES,
78 FRESH_LMFACTOR_RULE,
79 FRESH_MIN_RULE,
80 FRESH_OVERRIDE_EXPIRES,
81 FRESH_OVERRIDE_LASTMOD,
82 STALE_MUST_REVALIDATE = 200,
83 STALE_RELOAD_INTO_IMS,
84 STALE_FORCED_RELOAD,
85 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
86 STALE_EXPIRES,
87 STALE_MAX_RULE,
88 STALE_LMFACTOR_RULE,
89 STALE_MAX_STALE,
90 STALE_DEFAULT = 299
91 };
92
93 static struct RefreshCounts {
94 const char *proto;
95 int total;
96 int status[STALE_DEFAULT + 1];
97 }
98
99 refreshCounts[rcCount];
100
101 /*
102 * Defaults:
103 * MIN NONE
104 * PCT 20%
105 * MAX 3 days
106 */
107 #define REFRESH_DEFAULT_MIN (time_t)0
108 #define REFRESH_DEFAULT_PCT 0.20
109 #define REFRESH_DEFAULT_MAX (time_t)259200
110
111 static const refresh_t *refreshUncompiledPattern(const char *);
112 static OBJH refreshStats;
113 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf);
114
115 static refresh_t DefaultRefresh;
116
117 const refresh_t *
118 refreshLimits(const char *url)
119 {
120 const refresh_t *R;
121
122 for (R = Config.Refresh; R; R = R->next) {
123 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
124 return R;
125 }
126
127 return NULL;
128 }
129
130 static const refresh_t *
131 refreshUncompiledPattern(const char *pat)
132 {
133 const refresh_t *R;
134
135 for (R = Config.Refresh; R; R = R->next) {
136 if (0 == strcmp(R->pattern, pat))
137 return R;
138 }
139
140 return NULL;
141 }
142
143 /**
144 * Calculate how stale the response is (or will be at the check_time).
145 * Staleness calculation is based on the following: (1) response
146 * expiration time, (2) age greater than configured maximum, (3)
147 * last-modified factor, and (4) age less than configured minimum.
148 *
149 * \retval -1 If the response is fresh.
150 * \retval >0 Otherwise return it's staleness.
151 * \retval 0 NOTE return value of 0 means the response is stale.
152 *
153 * The 'stale_flags' structure is used to tell the calling function
154 * _why_ this response is fresh or stale. Its used, for example,
155 * when the admin wants to override expiration and last-modified
156 * times.
157 */
158 static int
159 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf)
160 {
161 /** \par
162 * Check for an explicit expiration time (Expires: header).
163 */
164 if (entry->expires > -1) {
165 sf->expires = true;
166
167 if (entry->expires > check_time) {
168 debugs(22, 3, "FRESH: expires " << entry->expires <<
169 " >= check_time " << check_time << " ");
170
171 return -1;
172 } else {
173 debugs(22, 3, "STALE: expires " << entry->expires <<
174 " < check_time " << check_time << " ");
175
176 return (check_time - entry->expires);
177 }
178 }
179
180 /** \par
181 * Use local heuristics to determine staleness. Start with the
182 * max age from the refresh_pattern rule.
183 */
184 if (age > R->max) {
185 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
186 sf->max = true;
187 return (age - R->max);
188 }
189
190 /** \par
191 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
192 */
193 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
194 /*
195 * stale_age is the Age of the response when it became/becomes
196 * stale according to the last-modified factor algorithm.
197 */
198 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
199 sf->lmfactor = true;
200
201 if (age >= stale_age) {
202 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
203 return (age - stale_age);
204 } else {
205 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
206 return -1;
207 }
208 }
209
210 /** \par
211 * Finally, if all else fails; staleness is determined by the refresh_pattern
212 * configured minimum age.
213 */
214 if (age < R->min) {
215 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
216 sf->min = true;
217 return -1;
218 }
219
220 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
221 return (age - R->min);
222 }
223
224 /**
225 * \retval 1 if the entry must be revalidated within delta seconds
226 * \retval 0 otherwise
227 *
228 * note: request maybe null (e.g. for cache digests build)
229 */
230 static int
231 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
232 {
233 const refresh_t *R;
234 const char *uri = NULL;
235 time_t age = 0;
236 time_t check_time = squid_curtime + delta;
237 int staleness;
238 stale_flags sf;
239
240 if (entry->mem_obj)
241 uri = entry->mem_obj->url;
242 else if (request)
243 uri = urlCanonical(request);
244
245 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
246
247 if (check_time > entry->timestamp)
248 age = check_time - entry->timestamp;
249
250 // FIXME: what to do when age < 0 or counter overflow?
251 assert(age >= 0);
252
253 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
254
255 if (NULL == R)
256 R = &DefaultRefresh;
257
258 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
259 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
260 (int) R->max << "'");
261
262 debugs(22, 3, "\tage:\t" << age);
263
264 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
265
266 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
267
268 if (request && !request->flags.ignore_cc) {
269 const HttpHdrCc *const cc = request->cache_control;
270 if (cc && cc->hasMinFresh()) {
271 const int32_t minFresh=cc->minFresh();
272 debugs(22, 3, "\tage + min-fresh:\t" << age << " + " <<
273 minFresh << " = " << age + minFresh);
274 debugs(22, 3, "\tcheck_time + min-fresh:\t" << check_time << " + "
275 << minFresh << " = " <<
276 mkrfc1123(check_time + minFresh));
277 age += minFresh;
278 check_time += minFresh;
279 }
280 }
281
282 memset(&sf, '\0', sizeof(sf));
283
284 staleness = refreshStaleness(entry, check_time, age, R, &sf);
285
286 debugs(22, 3, "Staleness = " << staleness);
287
288 // stale-if-error requires any failure be passed thru when its period is over.
289 if (request && entry->mem_obj && entry->mem_obj->getReply() && entry->mem_obj->getReply()->cache_control &&
290 entry->mem_obj->getReply()->cache_control->hasStaleIfError() &&
291 entry->mem_obj->getReply()->cache_control->staleIfError() < staleness) {
292
293 debugs(22, 3, "refreshCheck: stale-if-error period expired.");
294 request->flags.fail_on_validation_err = 1;
295 }
296
297 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
298 #if USE_HTTP_VIOLATIONS
299 && !R->flags.ignore_must_revalidate
300 #endif
301 ) {
302 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
303 if (request)
304 request->flags.fail_on_validation_err = 1;
305 return STALE_MUST_REVALIDATE;
306 }
307
308 /* request-specific checks */
309 if (request && !request->flags.ignore_cc) {
310 HttpHdrCc *cc = request->cache_control;
311
312 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
313 /* The clients no-cache header is changed into a IMS query */
314 debugs(22, 3, "refreshCheck: YES: refresh-ims");
315 return STALE_FORCED_RELOAD;
316 }
317
318 #if USE_HTTP_VIOLATIONS
319
320 if (!request->flags.nocache_hack) {
321 (void) 0;
322 } else if (R->flags.ignore_reload) {
323 /* The clients no-cache header is ignored */
324 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
325 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
326 /* The clients no-cache header is changed into a IMS query */
327 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
328 return STALE_RELOAD_INTO_IMS;
329 } else {
330 /* The clients no-cache header is not overridden on this request */
331 debugs(22, 3, "refreshCheck: YES: client reload");
332 request->flags.nocache = 1;
333 return STALE_FORCED_RELOAD;
334 }
335
336 #endif
337 if (NULL != cc) {
338 if (cc->hasMaxAge()) {
339 #if USE_HTTP_VIOLATIONS
340 if (R->flags.ignore_reload && cc->maxAge() == 0) {
341 debugs(22, 3, "refreshCheck: MAYBE: client-max-age = 0 and ignore-reload");
342 } else
343 #endif
344 {
345 if (cc->maxAge() == 0) {
346 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
347 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
348 }
349
350 if (age > cc->maxAge()) {
351 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
352 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
353 }
354 }
355 }
356
357 if (cc->hasMaxStale() && staleness > -1) {
358 if (cc->maxStale()==HttpHdrCc::MAX_STALE_ANY) {
359 /* max-stale directive without a value */
360 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
361 return FRESH_REQUEST_MAX_STALE_ALL;
362 } else if (staleness < cc->maxStale()) {
363 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
364 return FRESH_REQUEST_MAX_STALE_VALUE;
365 }
366 }
367 }
368 }
369
370 if (-1 == staleness) {
371 debugs(22, 3, "refreshCheck: object isn't stale..");
372 if (sf.expires) {
373 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
374 return FRESH_EXPIRES;
375 }
376
377 assert(!sf.max);
378
379 if (sf.lmfactor) {
380 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
381 return FRESH_LMFACTOR_RULE;
382 }
383
384 assert(sf.min);
385
386 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
387 return FRESH_MIN_RULE;
388 }
389
390 /*
391 * At this point the response is stale, unless one of
392 * the override options kicks in.
393 * NOTE: max-stale config blocks the overrides.
394 */
395 int max_stale = (R->max_stale >= 0 ? R->max_stale : Config.maxStale);
396 if ( max_stale >= 0 && staleness > max_stale) {
397 debugs(22, 3, "refreshCheck: YES: max-stale limit");
398 if (request)
399 request->flags.fail_on_validation_err = 1;
400 return STALE_MAX_STALE;
401 }
402
403 if (sf.expires) {
404 #if USE_HTTP_VIOLATIONS
405
406 if (R->flags.override_expire && age < R->min) {
407 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
408 return FRESH_OVERRIDE_EXPIRES;
409 }
410
411 #endif
412 return STALE_EXPIRES;
413 }
414
415 if (sf.max)
416 return STALE_MAX_RULE;
417
418 if (sf.lmfactor) {
419 #if USE_HTTP_VIOLATIONS
420
421 if (R->flags.override_lastmod && age < R->min) {
422 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
423 return FRESH_OVERRIDE_LASTMOD;
424 }
425
426 #endif
427 return STALE_LMFACTOR_RULE;
428 }
429
430 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
431 return STALE_DEFAULT;
432 }
433
434 int
435 refreshIsCachable(const StoreEntry * entry)
436 {
437 /*
438 * Don't look at the request to avoid no-cache and other nuisances.
439 * the object should have a mem_obj so the URL will be found there.
440 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
441 * avoid objects which expire almost immediately, and which can't
442 * be refreshed.
443 */
444 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
445 ++ refreshCounts[rcStore].total;
446 ++ refreshCounts[rcStore].status[reason];
447
448 if (reason < STALE_MUST_REVALIDATE)
449 /* Does not need refresh. This is certainly cachable */
450 return 1;
451
452 if (entry->lastmod < 0)
453 /* Last modified is needed to do a refresh */
454 return 0;
455
456 if (entry->mem_obj == NULL)
457 /* no mem_obj? */
458 return 1;
459
460 if (entry->getReply() == NULL)
461 /* no reply? */
462 return 1;
463
464 if (entry->getReply()->content_length == 0)
465 /* No use refreshing (caching?) 0 byte objects */
466 return 0;
467
468 /* This seems to be refreshable. Cache it */
469 return 1;
470 }
471
472 /// whether reply is stale if it is a hit
473 static bool
474 refreshIsStaleIfHit(const int reason)
475 {
476 switch (reason) {
477 case FRESH_MIN_RULE:
478 case FRESH_LMFACTOR_RULE:
479 case FRESH_EXPIRES:
480 return false;
481 default:
482 return true;
483 }
484 }
485
486 /* refreshCheck... functions below are protocol-specific wrappers around
487 * refreshCheck() function above */
488
489 int
490 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
491 {
492 int reason = refreshCheck(entry, request, 0);
493 ++ refreshCounts[rcHTTP].total;
494 ++ refreshCounts[rcHTTP].status[reason];
495 request->flags.stale_if_hit = refreshIsStaleIfHit(reason);
496 return (Config.onoff.offline || reason < 200) ? 0 : 1;
497 }
498
499 int
500 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
501 {
502 int reason = refreshCheck(entry, request, 30);
503 ++ refreshCounts[rcICP].total;
504 ++ refreshCounts[rcICP].status[reason];
505 return (reason < 200) ? 0 : 1;
506 }
507
508 #if USE_HTCP
509 int
510 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
511 {
512 int reason = refreshCheck(entry, request, 10);
513 ++ refreshCounts[rcHTCP].total;
514 ++ refreshCounts[rcHTCP].status[reason];
515 return (reason < 200) ? 0 : 1;
516 }
517
518 #endif
519
520 #if USE_CACHE_DIGESTS
521 int
522 refreshCheckDigest(const StoreEntry * entry, time_t delta)
523 {
524 int reason = refreshCheck(entry,
525 entry->mem_obj ? entry->mem_obj->request : NULL,
526 delta);
527 ++ refreshCounts[rcCDigest].total;
528 ++ refreshCounts[rcCDigest].status[reason];
529 return (reason < 200) ? 0 : 1;
530 }
531
532 #endif
533
534 time_t
535 getMaxAge(const char *url)
536 {
537 const refresh_t *R;
538 debugs(22, 3, "getMaxAge: '" << url << "'");
539
540 if ((R = refreshLimits(url)))
541 return R->max;
542 else
543 return REFRESH_DEFAULT_MAX;
544 }
545
546 static void
547
548 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
549 {
550 int sum = 0;
551 int tot = rc->total;
552
553 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
554 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
555
556 #define refreshCountsStatsEntry(code,desc) { \
557 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
558 rc->status[code], xpercent(rc->status[code], tot), desc); \
559 sum += rc->status[code]; \
560 }
561
562 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
563 "Fresh: request max-stale wildcard");
564 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
565 "Fresh: request max-stale value");
566 refreshCountsStatsEntry(FRESH_EXPIRES,
567 "Fresh: expires time not reached");
568 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
569 "Fresh: refresh_pattern last-mod factor percentage");
570 refreshCountsStatsEntry(FRESH_MIN_RULE,
571 "Fresh: refresh_pattern min value");
572 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
573 "Fresh: refresh_pattern override expires");
574 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
575 "Fresh: refresh_pattern override lastmod");
576 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
577 "Stale: response has must-revalidate");
578 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
579 "Stale: changed reload into IMS");
580 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
581 "Stale: request has no-cache directive");
582 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
583 "Stale: age exceeds request max-age value");
584 refreshCountsStatsEntry(STALE_EXPIRES,
585 "Stale: expires time reached");
586 refreshCountsStatsEntry(STALE_MAX_RULE,
587 "Stale: refresh_pattern max age rule");
588 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
589 "Stale: refresh_pattern last-mod factor percentage");
590 refreshCountsStatsEntry(STALE_DEFAULT,
591 "Stale: by default");
592
593 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
594 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
595 rc->total, xpercent(rc->total, tot));
596 \
597 storeAppendPrintf(sentry, "\n");
598 }
599
600 static void
601 refreshStats(StoreEntry * sentry)
602 {
603 int i;
604 int total = 0;
605
606 /* get total usage count */
607
608 for (i = 0; i < rcCount; ++i)
609 total += refreshCounts[i].total;
610
611 /* protocol usage histogram */
612 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
613
614 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
615
616 for (i = 0; i < rcCount; ++i)
617 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
618 refreshCounts[i].proto,
619 refreshCounts[i].total,
620 xpercent(refreshCounts[i].total, total));
621
622 /* per protocol histograms */
623 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
624
625 for (i = 0; i < rcCount; ++i)
626 refreshCountsStats(sentry, &refreshCounts[i]);
627 }
628
629 static void
630 refreshRegisterWithCacheManager(void)
631 {
632 Mgr::RegisterAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
633 }
634
635 void
636 refreshInit(void)
637 {
638 memset(refreshCounts, 0, sizeof(refreshCounts));
639 refreshCounts[rcHTTP].proto = "HTTP";
640 refreshCounts[rcICP].proto = "ICP";
641 #if USE_HTCP
642
643 refreshCounts[rcHTCP].proto = "HTCP";
644 #endif
645
646 refreshCounts[rcStore].proto = "On Store";
647 #if USE_CACHE_DIGESTS
648
649 refreshCounts[rcCDigest].proto = "Cache Digests";
650 #endif
651
652 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
653 DefaultRefresh.pattern = "<none>";
654 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
655 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
656 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
657
658 refreshRegisterWithCacheManager();
659 }