]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Docs: update refresh_pattern internal documentation
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * DEBUG: section 22 Refresh Calculation
4 * AUTHOR: Harvest Derived
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #ifndef USE_POSIX_REGEX
35 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
36 #endif
37
38 #include "squid.h"
39 #include "HttpHdrCc.h"
40 #include "HttpReply.h"
41 #include "HttpRequest.h"
42 #include "MemObject.h"
43 #include "mgr/Registration.h"
44 #include "RefreshPattern.h"
45 #include "SquidConfig.h"
46 #include "SquidTime.h"
47 #include "Store.h"
48 #include "URL.h"
49
50 typedef enum {
51 rcHTTP,
52 rcICP,
53 #if USE_HTCP
54 rcHTCP,
55 #endif
56 #if USE_CACHE_DIGESTS
57 rcCDigest,
58 #endif
59 rcStore,
60 rcCount
61 } refreshCountsEnum;
62
63 typedef struct {
64 bool expires;
65 bool min;
66 bool lmfactor;
67 bool max;
68 } stale_flags;
69
70 /*
71 * This enumerated list assigns specific values, ala HTTP/FTP status
72 * codes. All Fresh codes are in the range 100-199 and all stale
73 * codes are 200-299. We might want to use these codes in logging,
74 * so best to keep them consistent over time.
75 */
76 enum {
77 FRESH_REQUEST_MAX_STALE_ALL = 100,
78 FRESH_REQUEST_MAX_STALE_VALUE,
79 FRESH_EXPIRES,
80 FRESH_LMFACTOR_RULE,
81 FRESH_MIN_RULE,
82 FRESH_OVERRIDE_EXPIRES,
83 FRESH_OVERRIDE_LASTMOD,
84 STALE_MUST_REVALIDATE = 200,
85 STALE_RELOAD_INTO_IMS,
86 STALE_FORCED_RELOAD,
87 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
88 STALE_EXPIRES,
89 STALE_MAX_RULE,
90 STALE_LMFACTOR_RULE,
91 STALE_MAX_STALE,
92 STALE_DEFAULT = 299
93 };
94
95 static struct RefreshCounts {
96 const char *proto;
97 int total;
98 int status[STALE_DEFAULT + 1];
99 }
100
101 refreshCounts[rcCount];
102
103 /*
104 * Defaults:
105 * MIN NONE
106 * PCT 20%
107 * MAX 3 days
108 */
109 #define REFRESH_DEFAULT_MIN (time_t)0
110 #define REFRESH_DEFAULT_PCT 0.20
111 #define REFRESH_DEFAULT_MAX (time_t)259200
112
113 static const RefreshPattern *refreshUncompiledPattern(const char *);
114 static OBJH refreshStats;
115 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf);
116
117 static RefreshPattern DefaultRefresh;
118
119 /** Locate the first refresh_pattern rule that matches the given URL by regex.
120 *
121 * \note regexec() returns 0 if matched, and REG_NOMATCH otherwise
122 *
123 * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match.
124 */
125 const RefreshPattern *
126 refreshLimits(const char *url)
127 {
128 const RefreshPattern *R;
129
130 for (R = Config.Refresh; R; R = R->next) {
131 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
132 return R;
133 }
134
135 return NULL;
136 }
137
138 /** Locate the first refresh_pattern rule that has the given uncompiled regex.
139 *
140 * \note There is only one reference to this function, below. It always passes "." as the pattern.
141 * This function is only ever called if there is no URI. Because a regex match is impossible, Squid
142 * forces the "." rule to apply (if it exists)
143 *
144 * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match.
145 */
146 static const RefreshPattern *
147 refreshUncompiledPattern(const char *pat)
148 {
149 const RefreshPattern *R;
150
151 for (R = Config.Refresh; R; R = R->next) {
152 if (0 == strcmp(R->pattern, pat))
153 return R;
154 }
155
156 return NULL;
157 }
158
159 /**
160 * Calculate how stale the response is (or will be at the check_time).
161 * Staleness calculation is based on the following: (1) response
162 * expiration time, (2) age greater than configured maximum, (3)
163 * last-modified factor, and (4) age less than configured minimum.
164 *
165 * \retval -1 If the response is fresh.
166 * \retval >0 Otherwise return it's staleness.
167 * \retval 0 NOTE return value of 0 means the response is stale.
168 *
169 * The 'stale_flags' structure is used to tell the calling function
170 * _why_ this response is fresh or stale. Its used, for example,
171 * when the admin wants to override expiration and last-modified
172 * times.
173 */
174 static int
175 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf)
176 {
177 /** \par
178 * Check for an explicit expiration time (Expires: header).
179 */
180 if (entry->expires > -1) {
181 sf->expires = true;
182
183 if (entry->expires > check_time) {
184 debugs(22, 3, "FRESH: expires " << entry->expires <<
185 " >= check_time " << check_time << " ");
186
187 return -1;
188 } else {
189 debugs(22, 3, "STALE: expires " << entry->expires <<
190 " < check_time " << check_time << " ");
191
192 return (check_time - entry->expires);
193 }
194 }
195
196 /** \par
197 * Use local heuristics to determine staleness. Start with the
198 * max age from the refresh_pattern rule.
199 */
200 if (age > R->max) {
201 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
202 sf->max = true;
203 return (age - R->max);
204 }
205
206 /** \par
207 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
208 */
209 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
210 /*
211 * stale_age is the Age of the response when it became/becomes
212 * stale according to the last-modified factor algorithm.
213 */
214 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
215 sf->lmfactor = true;
216
217 if (age >= stale_age) {
218 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
219 return (age - stale_age);
220 } else {
221 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
222 return -1;
223 }
224 }
225
226 /** \par
227 * Finally, if all else fails; staleness is determined by the refresh_pattern
228 * configured minimum age.
229 */
230 if (age < R->min) {
231 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
232 sf->min = true;
233 return -1;
234 }
235
236 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
237 return (age - R->min);
238 }
239
240 /**
241 * \retval 1 if the entry must be revalidated within delta seconds
242 * \retval 0 otherwise
243 *
244 * note: request maybe null (e.g. for cache digests build)
245 */
246 static int
247 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
248 {
249 const RefreshPattern *R;
250 const char *uri = NULL;
251 time_t age = 0;
252 time_t check_time = squid_curtime + delta;
253 int staleness;
254 stale_flags sf;
255
256 if (entry->mem_obj)
257 uri = entry->mem_obj->storeId();
258 else if (request)
259 uri = urlCanonical(request);
260
261 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
262
263 if (check_time > entry->timestamp)
264 age = check_time - entry->timestamp;
265
266 // FIXME: what to do when age < 0 or counter overflow?
267 assert(age >= 0);
268
269 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
270
271 if (NULL == R)
272 R = &DefaultRefresh;
273
274 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
275 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
276 (int) R->max << "'");
277
278 debugs(22, 3, "\tage:\t" << age);
279
280 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
281
282 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
283
284 if (request && !request->flags.ignoreCc) {
285 const HttpHdrCc *const cc = request->cache_control;
286 if (cc && cc->hasMinFresh()) {
287 const int32_t minFresh=cc->minFresh();
288 debugs(22, 3, "\tage + min-fresh:\t" << age << " + " <<
289 minFresh << " = " << age + minFresh);
290 debugs(22, 3, "\tcheck_time + min-fresh:\t" << check_time << " + "
291 << minFresh << " = " <<
292 mkrfc1123(check_time + minFresh));
293 age += minFresh;
294 check_time += minFresh;
295 }
296 }
297
298 memset(&sf, '\0', sizeof(sf));
299
300 staleness = refreshStaleness(entry, check_time, age, R, &sf);
301
302 debugs(22, 3, "Staleness = " << staleness);
303
304 // stale-if-error requires any failure be passed thru when its period is over.
305 if (request && entry->mem_obj && entry->mem_obj->getReply() && entry->mem_obj->getReply()->cache_control &&
306 entry->mem_obj->getReply()->cache_control->hasStaleIfError() &&
307 entry->mem_obj->getReply()->cache_control->staleIfError() < staleness) {
308
309 debugs(22, 3, "refreshCheck: stale-if-error period expired.");
310 request->flags.failOnValidationError = true;
311 }
312
313 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
314 #if USE_HTTP_VIOLATIONS
315 && !R->flags.ignore_must_revalidate
316 #endif
317 ) {
318 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
319 if (request)
320 request->flags.failOnValidationError = true;
321 return STALE_MUST_REVALIDATE;
322 }
323
324 /* request-specific checks */
325 if (request && !request->flags.ignoreCc) {
326 HttpHdrCc *cc = request->cache_control;
327
328 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
329 /* The clients no-cache header is changed into a IMS query */
330 debugs(22, 3, "refreshCheck: YES: refresh-ims");
331 return STALE_FORCED_RELOAD;
332 }
333
334 #if USE_HTTP_VIOLATIONS
335
336 if (!request->flags.noCacheHack()) {
337 (void) 0;
338 } else if (R->flags.ignore_reload) {
339 /* The clients no-cache header is ignored */
340 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
341 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
342 /* The clients no-cache header is changed into a IMS query */
343 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
344 return STALE_RELOAD_INTO_IMS;
345 } else {
346 /* The clients no-cache header is not overridden on this request */
347 debugs(22, 3, "refreshCheck: YES: client reload");
348 request->flags.noCache = true;
349 return STALE_FORCED_RELOAD;
350 }
351
352 #endif
353 if (NULL != cc) {
354 if (cc->hasMaxAge()) {
355 #if USE_HTTP_VIOLATIONS
356 if (R->flags.ignore_reload && cc->maxAge() == 0) {
357 debugs(22, 3, "refreshCheck: MAYBE: client-max-age = 0 and ignore-reload");
358 } else
359 #endif
360 {
361 if (cc->maxAge() == 0) {
362 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
363 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
364 }
365
366 if (age > cc->maxAge()) {
367 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
368 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
369 }
370 }
371 }
372
373 if (cc->hasMaxStale() && staleness > -1) {
374 if (cc->maxStale()==HttpHdrCc::MAX_STALE_ANY) {
375 /* max-stale directive without a value */
376 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
377 return FRESH_REQUEST_MAX_STALE_ALL;
378 } else if (staleness < cc->maxStale()) {
379 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
380 return FRESH_REQUEST_MAX_STALE_VALUE;
381 }
382 }
383 }
384 }
385
386 if (-1 == staleness) {
387 debugs(22, 3, "refreshCheck: object isn't stale..");
388 if (sf.expires) {
389 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
390 return FRESH_EXPIRES;
391 }
392
393 assert(!sf.max);
394
395 if (sf.lmfactor) {
396 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
397 return FRESH_LMFACTOR_RULE;
398 }
399
400 assert(sf.min);
401
402 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
403 return FRESH_MIN_RULE;
404 }
405
406 /*
407 * At this point the response is stale, unless one of
408 * the override options kicks in.
409 * NOTE: max-stale config blocks the overrides.
410 */
411 int max_stale = (R->max_stale >= 0 ? R->max_stale : Config.maxStale);
412 if ( max_stale >= 0 && staleness > max_stale) {
413 debugs(22, 3, "refreshCheck: YES: max-stale limit");
414 if (request)
415 request->flags.failOnValidationError = true;
416 return STALE_MAX_STALE;
417 }
418
419 if (sf.expires) {
420 #if USE_HTTP_VIOLATIONS
421
422 if (R->flags.override_expire && age < R->min) {
423 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
424 return FRESH_OVERRIDE_EXPIRES;
425 }
426
427 #endif
428 return STALE_EXPIRES;
429 }
430
431 if (sf.max)
432 return STALE_MAX_RULE;
433
434 if (sf.lmfactor) {
435 #if USE_HTTP_VIOLATIONS
436
437 if (R->flags.override_lastmod && age < R->min) {
438 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
439 return FRESH_OVERRIDE_LASTMOD;
440 }
441
442 #endif
443 return STALE_LMFACTOR_RULE;
444 }
445
446 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
447 return STALE_DEFAULT;
448 }
449
450 int
451 refreshIsCachable(const StoreEntry * entry)
452 {
453 /*
454 * Don't look at the request to avoid no-cache and other nuisances.
455 * the object should have a mem_obj so the URL will be found there.
456 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
457 * avoid objects which expire almost immediately, and which can't
458 * be refreshed.
459 */
460 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
461 ++ refreshCounts[rcStore].total;
462 ++ refreshCounts[rcStore].status[reason];
463
464 if (reason < STALE_MUST_REVALIDATE)
465 /* Does not need refresh. This is certainly cachable */
466 return 1;
467
468 if (entry->lastmod < 0)
469 /* Last modified is needed to do a refresh */
470 return 0;
471
472 if (entry->mem_obj == NULL)
473 /* no mem_obj? */
474 return 1;
475
476 if (entry->getReply() == NULL)
477 /* no reply? */
478 return 1;
479
480 if (entry->getReply()->content_length == 0)
481 /* No use refreshing (caching?) 0 byte objects */
482 return 0;
483
484 /* This seems to be refreshable. Cache it */
485 return 1;
486 }
487
488 /// whether reply is stale if it is a hit
489 static bool
490 refreshIsStaleIfHit(const int reason)
491 {
492 switch (reason) {
493 case FRESH_MIN_RULE:
494 case FRESH_LMFACTOR_RULE:
495 case FRESH_EXPIRES:
496 return false;
497 default:
498 return true;
499 }
500 }
501
502 /* refreshCheck... functions below are protocol-specific wrappers around
503 * refreshCheck() function above */
504
505 int
506 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
507 {
508 int reason = refreshCheck(entry, request, 0);
509 ++ refreshCounts[rcHTTP].total;
510 ++ refreshCounts[rcHTTP].status[reason];
511 request->flags.staleIfHit = refreshIsStaleIfHit(reason);
512 return (Config.onoff.offline || reason < 200) ? 0 : 1;
513 }
514
515 int
516 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
517 {
518 int reason = refreshCheck(entry, request, 30);
519 ++ refreshCounts[rcICP].total;
520 ++ refreshCounts[rcICP].status[reason];
521 return (reason < 200) ? 0 : 1;
522 }
523
524 #if USE_HTCP
525 int
526 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
527 {
528 int reason = refreshCheck(entry, request, 10);
529 ++ refreshCounts[rcHTCP].total;
530 ++ refreshCounts[rcHTCP].status[reason];
531 return (reason < 200) ? 0 : 1;
532 }
533
534 #endif
535
536 #if USE_CACHE_DIGESTS
537 int
538 refreshCheckDigest(const StoreEntry * entry, time_t delta)
539 {
540 int reason = refreshCheck(entry,
541 entry->mem_obj ? entry->mem_obj->request : NULL,
542 delta);
543 ++ refreshCounts[rcCDigest].total;
544 ++ refreshCounts[rcCDigest].status[reason];
545 return (reason < 200) ? 0 : 1;
546 }
547
548 #endif
549
550 time_t
551 getMaxAge(const char *url)
552 {
553 const RefreshPattern *R;
554 debugs(22, 3, "getMaxAge: '" << url << "'");
555
556 if ((R = refreshLimits(url)))
557 return R->max;
558 else
559 return REFRESH_DEFAULT_MAX;
560 }
561
562 static void
563
564 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
565 {
566 int sum = 0;
567 int tot = rc->total;
568
569 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
570 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
571
572 #define refreshCountsStatsEntry(code,desc) { \
573 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
574 rc->status[code], xpercent(rc->status[code], tot), desc); \
575 sum += rc->status[code]; \
576 }
577
578 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
579 "Fresh: request max-stale wildcard");
580 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
581 "Fresh: request max-stale value");
582 refreshCountsStatsEntry(FRESH_EXPIRES,
583 "Fresh: expires time not reached");
584 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
585 "Fresh: refresh_pattern last-mod factor percentage");
586 refreshCountsStatsEntry(FRESH_MIN_RULE,
587 "Fresh: refresh_pattern min value");
588 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
589 "Fresh: refresh_pattern override expires");
590 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
591 "Fresh: refresh_pattern override lastmod");
592 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
593 "Stale: response has must-revalidate");
594 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
595 "Stale: changed reload into IMS");
596 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
597 "Stale: request has no-cache directive");
598 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
599 "Stale: age exceeds request max-age value");
600 refreshCountsStatsEntry(STALE_EXPIRES,
601 "Stale: expires time reached");
602 refreshCountsStatsEntry(STALE_MAX_RULE,
603 "Stale: refresh_pattern max age rule");
604 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
605 "Stale: refresh_pattern last-mod factor percentage");
606 refreshCountsStatsEntry(STALE_DEFAULT,
607 "Stale: by default");
608
609 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
610 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
611 rc->total, xpercent(rc->total, tot));
612 \
613 storeAppendPrintf(sentry, "\n");
614 }
615
616 static void
617 refreshStats(StoreEntry * sentry)
618 {
619 int i;
620 int total = 0;
621
622 /* get total usage count */
623
624 for (i = 0; i < rcCount; ++i)
625 total += refreshCounts[i].total;
626
627 /* protocol usage histogram */
628 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
629
630 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
631
632 for (i = 0; i < rcCount; ++i)
633 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
634 refreshCounts[i].proto,
635 refreshCounts[i].total,
636 xpercent(refreshCounts[i].total, total));
637
638 /* per protocol histograms */
639 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
640
641 for (i = 0; i < rcCount; ++i)
642 refreshCountsStats(sentry, &refreshCounts[i]);
643 }
644
645 static void
646 refreshRegisterWithCacheManager(void)
647 {
648 Mgr::RegisterAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
649 }
650
651 void
652 refreshInit(void)
653 {
654 memset(refreshCounts, 0, sizeof(refreshCounts));
655 refreshCounts[rcHTTP].proto = "HTTP";
656 refreshCounts[rcICP].proto = "ICP";
657 #if USE_HTCP
658
659 refreshCounts[rcHTCP].proto = "HTCP";
660 #endif
661
662 refreshCounts[rcStore].proto = "On Store";
663 #if USE_CACHE_DIGESTS
664
665 refreshCounts[rcCDigest].proto = "Cache Digests";
666 #endif
667
668 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
669 DefaultRefresh.pattern = "<none>";
670 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
671 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
672 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
673
674 refreshRegisterWithCacheManager();
675 }