]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * DEBUG: section 22 Refresh Calculation
4 * AUTHOR: Harvest Derived
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #ifndef USE_POSIX_REGEX
35 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
36 #endif
37
38 #include "squid.h"
39 #include "HttpHdrCc.h"
40 #include "HttpReply.h"
41 #include "HttpRequest.h"
42 #include "MemObject.h"
43 #include "mgr/Registration.h"
44 #include "RefreshPattern.h"
45 #include "SquidConfig.h"
46 #include "SquidTime.h"
47 #include "Store.h"
48 #include "URL.h"
49
50 typedef enum {
51 rcHTTP,
52 rcICP,
53 #if USE_HTCP
54 rcHTCP,
55 #endif
56 #if USE_CACHE_DIGESTS
57 rcCDigest,
58 #endif
59 rcStore,
60 rcCount
61 } refreshCountsEnum;
62
63 typedef struct {
64 bool expires;
65 bool min;
66 bool lmfactor;
67 bool max;
68 } stale_flags;
69
70 /*
71 * This enumerated list assigns specific values, ala HTTP/FTP status
72 * codes. All Fresh codes are in the range 100-199 and all stale
73 * codes are 200-299. We might want to use these codes in logging,
74 * so best to keep them consistent over time.
75 */
76 enum {
77 FRESH_REQUEST_MAX_STALE_ALL = 100,
78 FRESH_REQUEST_MAX_STALE_VALUE,
79 FRESH_EXPIRES,
80 FRESH_LMFACTOR_RULE,
81 FRESH_MIN_RULE,
82 FRESH_OVERRIDE_EXPIRES,
83 FRESH_OVERRIDE_LASTMOD,
84 STALE_MUST_REVALIDATE = 200,
85 STALE_RELOAD_INTO_IMS,
86 STALE_FORCED_RELOAD,
87 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
88 STALE_EXPIRES,
89 STALE_MAX_RULE,
90 STALE_LMFACTOR_RULE,
91 STALE_MAX_STALE,
92 STALE_DEFAULT = 299
93 };
94
95 static struct RefreshCounts {
96 const char *proto;
97 int total;
98 int status[STALE_DEFAULT + 1];
99 }
100
101 refreshCounts[rcCount];
102
103 /*
104 * Defaults:
105 * MIN NONE
106 * PCT 20%
107 * MAX 3 days
108 */
109 #define REFRESH_DEFAULT_MIN (time_t)0
110 #define REFRESH_DEFAULT_PCT 0.20
111 #define REFRESH_DEFAULT_MAX (time_t)259200
112
113 static const RefreshPattern *refreshUncompiledPattern(const char *);
114 static OBJH refreshStats;
115 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf);
116
117 static RefreshPattern DefaultRefresh;
118
119 const RefreshPattern *
120 refreshLimits(const char *url)
121 {
122 const RefreshPattern *R;
123
124 for (R = Config.Refresh; R; R = R->next) {
125 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
126 return R;
127 }
128
129 return NULL;
130 }
131
132 static const RefreshPattern *
133 refreshUncompiledPattern(const char *pat)
134 {
135 const RefreshPattern *R;
136
137 for (R = Config.Refresh; R; R = R->next) {
138 if (0 == strcmp(R->pattern, pat))
139 return R;
140 }
141
142 return NULL;
143 }
144
145 /**
146 * Calculate how stale the response is (or will be at the check_time).
147 * Staleness calculation is based on the following: (1) response
148 * expiration time, (2) age greater than configured maximum, (3)
149 * last-modified factor, and (4) age less than configured minimum.
150 *
151 * \retval -1 If the response is fresh.
152 * \retval >0 Otherwise return it's staleness.
153 * \retval 0 NOTE return value of 0 means the response is stale.
154 *
155 * The 'stale_flags' structure is used to tell the calling function
156 * _why_ this response is fresh or stale. Its used, for example,
157 * when the admin wants to override expiration and last-modified
158 * times.
159 */
160 static int
161 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf)
162 {
163 /** \par
164 * Check for an explicit expiration time (Expires: header).
165 */
166 if (entry->expires > -1) {
167 sf->expires = true;
168
169 if (entry->expires > check_time) {
170 debugs(22, 3, "FRESH: expires " << entry->expires <<
171 " >= check_time " << check_time << " ");
172
173 return -1;
174 } else {
175 debugs(22, 3, "STALE: expires " << entry->expires <<
176 " < check_time " << check_time << " ");
177
178 return (check_time - entry->expires);
179 }
180 }
181
182 /** \par
183 * Use local heuristics to determine staleness. Start with the
184 * max age from the refresh_pattern rule.
185 */
186 if (age > R->max) {
187 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
188 sf->max = true;
189 return (age - R->max);
190 }
191
192 /** \par
193 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
194 */
195 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
196 /*
197 * stale_age is the Age of the response when it became/becomes
198 * stale according to the last-modified factor algorithm.
199 */
200 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
201 sf->lmfactor = true;
202
203 if (age >= stale_age) {
204 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
205 return (age - stale_age);
206 } else {
207 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
208 return -1;
209 }
210 }
211
212 /** \par
213 * Finally, if all else fails; staleness is determined by the refresh_pattern
214 * configured minimum age.
215 */
216 if (age < R->min) {
217 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
218 sf->min = true;
219 return -1;
220 }
221
222 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
223 return (age - R->min);
224 }
225
226 /**
227 * \retval 1 if the entry must be revalidated within delta seconds
228 * \retval 0 otherwise
229 *
230 * note: request maybe null (e.g. for cache digests build)
231 */
232 static int
233 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
234 {
235 const RefreshPattern *R;
236 const char *uri = NULL;
237 time_t age = 0;
238 time_t check_time = squid_curtime + delta;
239 int staleness;
240 stale_flags sf;
241
242 if (entry->mem_obj)
243 uri = entry->mem_obj->url;
244 else if (request)
245 uri = urlCanonical(request);
246
247 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
248
249 if (check_time > entry->timestamp)
250 age = check_time - entry->timestamp;
251
252 // FIXME: what to do when age < 0 or counter overflow?
253 assert(age >= 0);
254
255 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
256
257 if (NULL == R)
258 R = &DefaultRefresh;
259
260 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
261 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
262 (int) R->max << "'");
263
264 debugs(22, 3, "\tage:\t" << age);
265
266 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
267
268 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
269
270 if (request && !request->flags.ignoreCc) {
271 const HttpHdrCc *const cc = request->cache_control;
272 if (cc && cc->hasMinFresh()) {
273 const int32_t minFresh=cc->minFresh();
274 debugs(22, 3, "\tage + min-fresh:\t" << age << " + " <<
275 minFresh << " = " << age + minFresh);
276 debugs(22, 3, "\tcheck_time + min-fresh:\t" << check_time << " + "
277 << minFresh << " = " <<
278 mkrfc1123(check_time + minFresh));
279 age += minFresh;
280 check_time += minFresh;
281 }
282 }
283
284 memset(&sf, '\0', sizeof(sf));
285
286 staleness = refreshStaleness(entry, check_time, age, R, &sf);
287
288 debugs(22, 3, "Staleness = " << staleness);
289
290 // stale-if-error requires any failure be passed thru when its period is over.
291 if (request && entry->mem_obj && entry->mem_obj->getReply() && entry->mem_obj->getReply()->cache_control &&
292 entry->mem_obj->getReply()->cache_control->hasStaleIfError() &&
293 entry->mem_obj->getReply()->cache_control->staleIfError() < staleness) {
294
295 debugs(22, 3, "refreshCheck: stale-if-error period expired.");
296 request->flags.failOnValidationError = true;
297 }
298
299 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
300 #if USE_HTTP_VIOLATIONS
301 && !R->flags.ignore_must_revalidate
302 #endif
303 ) {
304 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
305 if (request)
306 request->flags.failOnValidationError = true;
307 return STALE_MUST_REVALIDATE;
308 }
309
310 /* request-specific checks */
311 if (request && !request->flags.ignoreCc) {
312 HttpHdrCc *cc = request->cache_control;
313
314 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
315 /* The clients no-cache header is changed into a IMS query */
316 debugs(22, 3, "refreshCheck: YES: refresh-ims");
317 return STALE_FORCED_RELOAD;
318 }
319
320 #if USE_HTTP_VIOLATIONS
321
322 if (!request->flags.noCacheHack()) {
323 (void) 0;
324 } else if (R->flags.ignore_reload) {
325 /* The clients no-cache header is ignored */
326 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
327 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
328 /* The clients no-cache header is changed into a IMS query */
329 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
330 return STALE_RELOAD_INTO_IMS;
331 } else {
332 /* The clients no-cache header is not overridden on this request */
333 debugs(22, 3, "refreshCheck: YES: client reload");
334 request->flags.noCache = true;
335 return STALE_FORCED_RELOAD;
336 }
337
338 #endif
339 if (NULL != cc) {
340 if (cc->hasMaxAge()) {
341 #if USE_HTTP_VIOLATIONS
342 if (R->flags.ignore_reload && cc->maxAge() == 0) {
343 debugs(22, 3, "refreshCheck: MAYBE: client-max-age = 0 and ignore-reload");
344 } else
345 #endif
346 {
347 if (cc->maxAge() == 0) {
348 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
349 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
350 }
351
352 if (age > cc->maxAge()) {
353 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
354 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
355 }
356 }
357 }
358
359 if (cc->hasMaxStale() && staleness > -1) {
360 if (cc->maxStale()==HttpHdrCc::MAX_STALE_ANY) {
361 /* max-stale directive without a value */
362 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
363 return FRESH_REQUEST_MAX_STALE_ALL;
364 } else if (staleness < cc->maxStale()) {
365 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
366 return FRESH_REQUEST_MAX_STALE_VALUE;
367 }
368 }
369 }
370 }
371
372 if (-1 == staleness) {
373 debugs(22, 3, "refreshCheck: object isn't stale..");
374 if (sf.expires) {
375 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
376 return FRESH_EXPIRES;
377 }
378
379 assert(!sf.max);
380
381 if (sf.lmfactor) {
382 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
383 return FRESH_LMFACTOR_RULE;
384 }
385
386 assert(sf.min);
387
388 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
389 return FRESH_MIN_RULE;
390 }
391
392 /*
393 * At this point the response is stale, unless one of
394 * the override options kicks in.
395 * NOTE: max-stale config blocks the overrides.
396 */
397 int max_stale = (R->max_stale >= 0 ? R->max_stale : Config.maxStale);
398 if ( max_stale >= 0 && staleness > max_stale) {
399 debugs(22, 3, "refreshCheck: YES: max-stale limit");
400 if (request)
401 request->flags.failOnValidationError = true;
402 return STALE_MAX_STALE;
403 }
404
405 if (sf.expires) {
406 #if USE_HTTP_VIOLATIONS
407
408 if (R->flags.override_expire && age < R->min) {
409 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
410 return FRESH_OVERRIDE_EXPIRES;
411 }
412
413 #endif
414 return STALE_EXPIRES;
415 }
416
417 if (sf.max)
418 return STALE_MAX_RULE;
419
420 if (sf.lmfactor) {
421 #if USE_HTTP_VIOLATIONS
422
423 if (R->flags.override_lastmod && age < R->min) {
424 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
425 return FRESH_OVERRIDE_LASTMOD;
426 }
427
428 #endif
429 return STALE_LMFACTOR_RULE;
430 }
431
432 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
433 return STALE_DEFAULT;
434 }
435
436 int
437 refreshIsCachable(const StoreEntry * entry)
438 {
439 /*
440 * Don't look at the request to avoid no-cache and other nuisances.
441 * the object should have a mem_obj so the URL will be found there.
442 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
443 * avoid objects which expire almost immediately, and which can't
444 * be refreshed.
445 */
446 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
447 ++ refreshCounts[rcStore].total;
448 ++ refreshCounts[rcStore].status[reason];
449
450 if (reason < STALE_MUST_REVALIDATE)
451 /* Does not need refresh. This is certainly cachable */
452 return 1;
453
454 if (entry->lastmod < 0)
455 /* Last modified is needed to do a refresh */
456 return 0;
457
458 if (entry->mem_obj == NULL)
459 /* no mem_obj? */
460 return 1;
461
462 if (entry->getReply() == NULL)
463 /* no reply? */
464 return 1;
465
466 if (entry->getReply()->content_length == 0)
467 /* No use refreshing (caching?) 0 byte objects */
468 return 0;
469
470 /* This seems to be refreshable. Cache it */
471 return 1;
472 }
473
474 /// whether reply is stale if it is a hit
475 static bool
476 refreshIsStaleIfHit(const int reason)
477 {
478 switch (reason) {
479 case FRESH_MIN_RULE:
480 case FRESH_LMFACTOR_RULE:
481 case FRESH_EXPIRES:
482 return false;
483 default:
484 return true;
485 }
486 }
487
488 /* refreshCheck... functions below are protocol-specific wrappers around
489 * refreshCheck() function above */
490
491 int
492 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
493 {
494 int reason = refreshCheck(entry, request, 0);
495 ++ refreshCounts[rcHTTP].total;
496 ++ refreshCounts[rcHTTP].status[reason];
497 request->flags.staleIfHit = refreshIsStaleIfHit(reason);
498 return (Config.onoff.offline || reason < 200) ? 0 : 1;
499 }
500
501 int
502 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
503 {
504 int reason = refreshCheck(entry, request, 30);
505 ++ refreshCounts[rcICP].total;
506 ++ refreshCounts[rcICP].status[reason];
507 return (reason < 200) ? 0 : 1;
508 }
509
510 #if USE_HTCP
511 int
512 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
513 {
514 int reason = refreshCheck(entry, request, 10);
515 ++ refreshCounts[rcHTCP].total;
516 ++ refreshCounts[rcHTCP].status[reason];
517 return (reason < 200) ? 0 : 1;
518 }
519
520 #endif
521
522 #if USE_CACHE_DIGESTS
523 int
524 refreshCheckDigest(const StoreEntry * entry, time_t delta)
525 {
526 int reason = refreshCheck(entry,
527 entry->mem_obj ? entry->mem_obj->request : NULL,
528 delta);
529 ++ refreshCounts[rcCDigest].total;
530 ++ refreshCounts[rcCDigest].status[reason];
531 return (reason < 200) ? 0 : 1;
532 }
533
534 #endif
535
536 time_t
537 getMaxAge(const char *url)
538 {
539 const RefreshPattern *R;
540 debugs(22, 3, "getMaxAge: '" << url << "'");
541
542 if ((R = refreshLimits(url)))
543 return R->max;
544 else
545 return REFRESH_DEFAULT_MAX;
546 }
547
548 static void
549
550 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
551 {
552 int sum = 0;
553 int tot = rc->total;
554
555 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
556 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
557
558 #define refreshCountsStatsEntry(code,desc) { \
559 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
560 rc->status[code], xpercent(rc->status[code], tot), desc); \
561 sum += rc->status[code]; \
562 }
563
564 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
565 "Fresh: request max-stale wildcard");
566 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
567 "Fresh: request max-stale value");
568 refreshCountsStatsEntry(FRESH_EXPIRES,
569 "Fresh: expires time not reached");
570 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
571 "Fresh: refresh_pattern last-mod factor percentage");
572 refreshCountsStatsEntry(FRESH_MIN_RULE,
573 "Fresh: refresh_pattern min value");
574 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
575 "Fresh: refresh_pattern override expires");
576 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
577 "Fresh: refresh_pattern override lastmod");
578 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
579 "Stale: response has must-revalidate");
580 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
581 "Stale: changed reload into IMS");
582 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
583 "Stale: request has no-cache directive");
584 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
585 "Stale: age exceeds request max-age value");
586 refreshCountsStatsEntry(STALE_EXPIRES,
587 "Stale: expires time reached");
588 refreshCountsStatsEntry(STALE_MAX_RULE,
589 "Stale: refresh_pattern max age rule");
590 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
591 "Stale: refresh_pattern last-mod factor percentage");
592 refreshCountsStatsEntry(STALE_DEFAULT,
593 "Stale: by default");
594
595 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
596 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
597 rc->total, xpercent(rc->total, tot));
598 \
599 storeAppendPrintf(sentry, "\n");
600 }
601
602 static void
603 refreshStats(StoreEntry * sentry)
604 {
605 int i;
606 int total = 0;
607
608 /* get total usage count */
609
610 for (i = 0; i < rcCount; ++i)
611 total += refreshCounts[i].total;
612
613 /* protocol usage histogram */
614 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
615
616 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
617
618 for (i = 0; i < rcCount; ++i)
619 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
620 refreshCounts[i].proto,
621 refreshCounts[i].total,
622 xpercent(refreshCounts[i].total, total));
623
624 /* per protocol histograms */
625 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
626
627 for (i = 0; i < rcCount; ++i)
628 refreshCountsStats(sentry, &refreshCounts[i]);
629 }
630
631 static void
632 refreshRegisterWithCacheManager(void)
633 {
634 Mgr::RegisterAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
635 }
636
637 void
638 refreshInit(void)
639 {
640 memset(refreshCounts, 0, sizeof(refreshCounts));
641 refreshCounts[rcHTTP].proto = "HTTP";
642 refreshCounts[rcICP].proto = "ICP";
643 #if USE_HTCP
644
645 refreshCounts[rcHTCP].proto = "HTCP";
646 #endif
647
648 refreshCounts[rcStore].proto = "On Store";
649 #if USE_CACHE_DIGESTS
650
651 refreshCounts[rcCDigest].proto = "Cache Digests";
652 #endif
653
654 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
655 DefaultRefresh.pattern = "<none>";
656 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
657 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
658 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
659
660 refreshRegisterWithCacheManager();
661 }