]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Merge from trunk
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * $Id: refresh.cc,v 1.76 2007/05/24 01:45:03 hno Exp $
4 *
5 * DEBUG: section 22 Refresh Calculation
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #ifndef USE_POSIX_REGEX
37 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
38 #endif
39
40 #include "squid.h"
41 #include "CacheManager.h"
42 #include "Store.h"
43 #include "MemObject.h"
44 #include "HttpRequest.h"
45 #include "HttpReply.h"
46 #include "SquidTime.h"
47
48 typedef enum {
49 rcHTTP,
50 rcICP,
51 #if USE_HTCP
52 rcHTCP,
53 #endif
54 #if USE_CACHE_DIGESTS
55 rcCDigest,
56 #endif
57 rcStore,
58 rcCount
59 } refreshCountsEnum;
60
61 typedef struct
62 {
63 bool expires;
64 bool min;
65 bool lmfactor;
66 bool max;
67 } stale_flags;
68
69 /*
70 * This enumerated list assigns specific values, ala HTTP/FTP status
71 * codes. All Fresh codes are in the range 100-199 and all stale
72 * codes are 200-299. We might want to use these codes in logging,
73 * so best to keep them consistent over time.
74 */
75 enum {
76 FRESH_REQUEST_MAX_STALE_ALL = 100,
77 FRESH_REQUEST_MAX_STALE_VALUE,
78 FRESH_EXPIRES,
79 FRESH_LMFACTOR_RULE,
80 FRESH_MIN_RULE,
81 FRESH_OVERRIDE_EXPIRES,
82 FRESH_OVERRIDE_LASTMOD,
83 STALE_MUST_REVALIDATE = 200,
84 STALE_RELOAD_INTO_IMS,
85 STALE_FORCED_RELOAD,
86 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
87 STALE_EXPIRES,
88 STALE_MAX_RULE,
89 STALE_LMFACTOR_RULE,
90 STALE_DEFAULT = 299
91 };
92
93 static struct RefreshCounts
94 {
95 const char *proto;
96 int total;
97 int status[STALE_DEFAULT + 1];
98 }
99
100 refreshCounts[rcCount];
101
102 /*
103 * Defaults:
104 * MIN NONE
105 * PCT 20%
106 * MAX 3 days
107 */
108 #define REFRESH_DEFAULT_MIN (time_t)0
109 #define REFRESH_DEFAULT_PCT 0.20
110 #define REFRESH_DEFAULT_MAX (time_t)259200
111
112 static const refresh_t *refreshUncompiledPattern(const char *);
113 static OBJH refreshStats;
114 static int refreshStaleness(const StoreEntry *, time_t, time_t, const refresh_t *, stale_flags *);
115
116 static refresh_t DefaultRefresh;
117
118 const refresh_t *
119 refreshLimits(const char *url)
120 {
121 const refresh_t *R;
122
123 for (R = Config.Refresh; R; R = R->next) {
124 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
125 return R;
126 }
127
128 return NULL;
129 }
130
131 static const refresh_t *
132 refreshUncompiledPattern(const char *pat)
133 {
134 const refresh_t *R;
135
136 for (R = Config.Refresh; R; R = R->next) {
137 if (0 == strcmp(R->pattern, pat))
138 return R;
139 }
140
141 return NULL;
142 }
143
144 /*
145 * Calculate how stale the response is (or will be at the check_time).
146 * Staleness calculation is based on the following: (1) response
147 * expiration time, (2) age greater than configured maximum, (3)
148 * last-modified factor, and (4) age less than configured minimum.
149 *
150 * If the response is fresh, return -1. Otherwise return its
151 * staleness. NOTE return value of 0 means the response is stale.
152 *
153 * The 'stale_flags' structure is used to tell the calling function
154 * _why_ this response is fresh or stale. Its used, for example,
155 * when the admin wants to override expiration and last-modified
156 * times.
157 */
158 static int
159 refreshStaleness(const StoreEntry * entry, time_t check_time, time_t age, const refresh_t * R, stale_flags * sf)
160 {
161 /*
162 * Check for an explicit expiration time.
163 */
164
165 if (entry->expires > -1) {
166 sf->expires = true;
167
168 if (entry->expires > check_time) {
169 debugs(22, 3, "FRESH: expires " << entry->expires <<
170 " >= check_time " << check_time << " ");
171
172 return -1;
173 } else {
174 debugs(22, 3, "STALE: expires " << entry->expires <<
175 " < check_time " << check_time << " ");
176
177 return (check_time - entry->expires);
178 }
179 }
180
181 assert(age >= 0);
182 /*
183 * Use local heuristics to determine staleness. Start with the
184 * max age from the refresh_pattern rule.
185 */
186
187 if (age > R->max) {
188 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
189 sf->max = true;
190 return (age - R->max);
191 }
192
193 /*
194 * Try the last-modified factor algorithm.
195 */
196 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
197 /*
198 * stale_age is the Age of the response when it became/becomes
199 * stale according to the last-modified factor algorithm.
200 */
201 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
202 sf->lmfactor = true;
203
204 if (age >= stale_age) {
205 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
206 return (age - stale_age);
207 } else {
208 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
209 return -1;
210 }
211 }
212
213 /*
214 * If we are here, staleness is determined by the refresh_pattern
215 * configured minimum age.
216 */
217 if (age < R->min) {
218 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
219 sf->min = true;
220 return -1;
221 }
222
223 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
224 return (age - R->min);
225 }
226
227 /* return 1 if the entry must be revalidated within delta seconds
228 * 0 otherwise
229 *
230 * note: request maybe null (e.g. for cache digests build)
231 */
232 static int
233 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
234 {
235 const refresh_t *R;
236 const char *uri = NULL;
237 time_t age = 0;
238 time_t check_time = squid_curtime + delta;
239 int staleness;
240 stale_flags sf;
241
242 if (entry->mem_obj)
243 uri = entry->mem_obj->url;
244 else if (request)
245 uri = urlCanonical(request);
246
247 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
248
249 if (check_time > entry->timestamp)
250 age = check_time - entry->timestamp;
251
252 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
253
254 if (NULL == R)
255 R = &DefaultRefresh;
256
257 memset(&sf, '\0', sizeof(sf));
258
259 staleness = refreshStaleness(entry, check_time, age, R, &sf);
260
261 debugs(22, 3, "Staleness = " << staleness);
262
263 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
264 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
265 (int) R->max << "'");
266
267
268 debugs(22, 3, "refreshCheck: age = " << age);
269
270 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
271
272 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
273
274 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1) {
275 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
276 return STALE_MUST_REVALIDATE;
277 }
278
279 /* request-specific checks */
280 if (request) {
281 HttpHdrCc *cc = request->cache_control;
282
283 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
284 /* The clients no-cache header is changed into a IMS query */
285 debugs(22, 3, "refreshCheck: YES: refresh-ims");
286 return STALE_FORCED_RELOAD;
287 }
288
289 #if HTTP_VIOLATIONS
290
291 if (!request->flags.nocache_hack) {
292 (void) 0;
293 } else if (R->flags.ignore_reload) {
294 /* The clients no-cache header is ignored */
295 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
296 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
297 /* The clients no-cache header is changed into a IMS query */
298 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
299 return STALE_RELOAD_INTO_IMS;
300 } else {
301 /* The clients no-cache header is not overridden on this request */
302 debugs(22, 3, "refreshCheck: YES: client reload");
303 request->flags.nocache = 1;
304 return STALE_FORCED_RELOAD;
305 }
306
307 #endif
308 if (NULL != cc) {
309 if (cc->max_age > -1) {
310 #if HTTP_VIOLATIONS
311 if (R->flags.ignore_reload && cc->max_age == 0) {} else
312 #endif
313 {
314 #if 0
315
316 if (cc->max_age == 0) {
317 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
318 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
319 }
320
321 #endif
322 if (age > cc->max_age) {
323 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
324 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
325 }
326 }
327 }
328
329 if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness > -1) {
330 if (cc->max_stale < 0) {
331 /* max-stale directive without a value */
332 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
333 return FRESH_REQUEST_MAX_STALE_ALL;
334 } else if (staleness < cc->max_stale) {
335 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
336 return FRESH_REQUEST_MAX_STALE_VALUE;
337 }
338 }
339 }
340 }
341
342 if (-1 == staleness) {
343 debugs(22, 3, "refreshCheck: object isn't stale..");
344 if (sf.expires) {
345 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
346 return FRESH_EXPIRES;
347 }
348
349 assert(!sf.max);
350
351 if (sf.lmfactor) {
352 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
353 return FRESH_LMFACTOR_RULE;
354 }
355
356 assert(sf.min);
357
358 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
359 return FRESH_MIN_RULE;
360 }
361
362 /*
363 * At this point the response is stale, unless one of
364 * the override options kicks in.
365 */
366 if (sf.expires) {
367 #if HTTP_VIOLATIONS
368
369 if (R->flags.override_expire && age < R->min) {
370 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
371 return FRESH_OVERRIDE_EXPIRES;
372 }
373
374 #endif
375 return STALE_EXPIRES;
376 }
377
378 if (sf.max)
379 return STALE_MAX_RULE;
380
381 if (sf.lmfactor) {
382 #if HTTP_VIOLATIONS
383
384 if (R->flags.override_lastmod && age < R->min) {
385 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
386 return FRESH_OVERRIDE_LASTMOD;
387 }
388
389 #endif
390 return STALE_LMFACTOR_RULE;
391 }
392
393 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
394 return STALE_DEFAULT;
395 }
396
397 int
398 refreshIsCachable(const StoreEntry * entry)
399 {
400 /*
401 * Don't look at the request to avoid no-cache and other nuisances.
402 * the object should have a mem_obj so the URL will be found there.
403 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
404 * avoid objects which expire almost immediately, and which can't
405 * be refreshed.
406 */
407 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
408 refreshCounts[rcStore].total++;
409 refreshCounts[rcStore].status[reason]++;
410
411 if (reason < STALE_MUST_REVALIDATE)
412 /* Does not need refresh. This is certainly cachable */
413 return 1;
414
415 if (entry->lastmod < 0)
416 /* Last modified is needed to do a refresh */
417 return 0;
418
419 if (entry->mem_obj == NULL)
420 /* no mem_obj? */
421 return 1;
422
423 if (entry->getReply() == NULL)
424 /* no reply? */
425 return 1;
426
427 if (entry->getReply()->content_length == 0)
428 /* No use refreshing (caching?) 0 byte objects */
429 return 0;
430
431 /* This seems to be refreshable. Cache it */
432 return 1;
433 }
434
435 /* refreshCheck... functions below are protocol-specific wrappers around
436 * refreshCheck() function above */
437
438 int
439 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
440 {
441 int reason = refreshCheck(entry, request, 0);
442 refreshCounts[rcHTTP].total++;
443 refreshCounts[rcHTTP].status[reason]++;
444 return (reason < 200) ? 0 : 1;
445 }
446
447 int
448 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
449 {
450 int reason = refreshCheck(entry, request, 30);
451 refreshCounts[rcICP].total++;
452 refreshCounts[rcICP].status[reason]++;
453 return (reason < 200) ? 0 : 1;
454 }
455
456 #if USE_HTCP
457 int
458 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
459 {
460 int reason = refreshCheck(entry, request, 10);
461 refreshCounts[rcHTCP].total++;
462 refreshCounts[rcHTCP].status[reason]++;
463 return (reason < 200) ? 0 : 1;
464 }
465
466 #endif
467
468 #if USE_CACHE_DIGESTS
469 int
470 refreshCheckDigest(const StoreEntry * entry, time_t delta)
471 {
472 int reason = refreshCheck(entry,
473 entry->mem_obj ? entry->mem_obj->request : NULL,
474 delta);
475 refreshCounts[rcCDigest].total++;
476 refreshCounts[rcCDigest].status[reason]++;
477 return (reason < 200) ? 0 : 1;
478 }
479
480 #endif
481
482 time_t
483 getMaxAge(const char *url)
484 {
485 const refresh_t *R;
486 debugs(22, 3, "getMaxAge: '" << url << "'");
487
488 if ((R = refreshLimits(url)))
489 return R->max;
490 else
491 return REFRESH_DEFAULT_MAX;
492 }
493
494 static void
495
496 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
497 {
498 int sum = 0;
499 int tot = rc->total;
500
501 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
502 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
503
504 #define refreshCountsStatsEntry(code,desc) { \
505 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
506 rc->status[code], xpercent(rc->status[code], tot), desc); \
507 sum += rc->status[code]; \
508 }
509
510 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
511 "Fresh: request max-stale wildcard");
512 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
513 "Fresh: request max-stale value");
514 refreshCountsStatsEntry(FRESH_EXPIRES,
515 "Fresh: expires time not reached");
516 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
517 "Fresh: refresh_pattern last-mod factor percentage");
518 refreshCountsStatsEntry(FRESH_MIN_RULE,
519 "Fresh: refresh_pattern min value");
520 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
521 "Fresh: refresh_pattern override expires");
522 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
523 "Fresh: refresh_pattern override lastmod");
524 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
525 "Stale: response has must-revalidate");
526 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
527 "Stale: changed reload into IMS");
528 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
529 "Stale: request has no-cache directive");
530 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
531 "Stale: age exceeds request max-age value");
532 refreshCountsStatsEntry(STALE_EXPIRES,
533 "Stale: expires time reached");
534 refreshCountsStatsEntry(STALE_MAX_RULE,
535 "Stale: refresh_pattern max age rule");
536 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
537 "Stale: refresh_pattern last-mod factor percentage");
538 refreshCountsStatsEntry(STALE_DEFAULT,
539 "Stale: by default");
540
541 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
542 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
543 rc->total, xpercent(rc->total, tot));
544 \
545 storeAppendPrintf(sentry, "\n");
546 }
547
548 static void
549 refreshStats(StoreEntry * sentry)
550 {
551 int i;
552 int total = 0;
553
554 /* get total usage count */
555
556 for (i = 0; i < rcCount; ++i)
557 total += refreshCounts[i].total;
558
559 /* protocol usage histogram */
560 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
561
562 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
563
564 for (i = 0; i < rcCount; ++i)
565 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
566 refreshCounts[i].proto,
567 refreshCounts[i].total,
568 xpercent(refreshCounts[i].total, total));
569
570 /* per protocol histograms */
571 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
572
573 for (i = 0; i < rcCount; ++i)
574 refreshCountsStats(sentry, &refreshCounts[i]);
575 }
576
577 static void
578 refreshRegisterWithCacheManager(void)
579 {
580 CacheManager::GetInstance()->
581 registerAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
582 }
583
584 void
585 refreshInit(void)
586 {
587 memset(refreshCounts, 0, sizeof(refreshCounts));
588 refreshCounts[rcHTTP].proto = "HTTP";
589 refreshCounts[rcICP].proto = "ICP";
590 #if USE_HTCP
591
592 refreshCounts[rcHTCP].proto = "HTCP";
593 #endif
594
595 refreshCounts[rcStore].proto = "On Store";
596 #if USE_CACHE_DIGESTS
597
598 refreshCounts[rcCDigest].proto = "Cache Digests";
599 #endif
600
601 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
602 DefaultRefresh.pattern = "<none>";
603 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
604 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
605 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
606
607 refreshRegisterWithCacheManager();
608 }