]> git.ipfire.org Git - thirdparty/squid.git/blob - src/refresh.cc
Merged from trunk
[thirdparty/squid.git] / src / refresh.cc
1
2 /*
3 * $Id$
4 *
5 * DEBUG: section 22 Refresh Calculation
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #ifndef USE_POSIX_REGEX
37 #define USE_POSIX_REGEX /* put before includes; always use POSIX */
38 #endif
39
40 #include "squid.h"
41 #include "CacheManager.h"
42 #include "Store.h"
43 #include "MemObject.h"
44 #include "HttpRequest.h"
45 #include "HttpReply.h"
46 #include "SquidTime.h"
47
48 typedef enum {
49 rcHTTP,
50 rcICP,
51 #if USE_HTCP
52 rcHTCP,
53 #endif
54 #if USE_CACHE_DIGESTS
55 rcCDigest,
56 #endif
57 rcStore,
58 rcCount
59 } refreshCountsEnum;
60
61 typedef struct {
62 bool expires;
63 bool min;
64 bool lmfactor;
65 bool max;
66 } stale_flags;
67
68 /*
69 * This enumerated list assigns specific values, ala HTTP/FTP status
70 * codes. All Fresh codes are in the range 100-199 and all stale
71 * codes are 200-299. We might want to use these codes in logging,
72 * so best to keep them consistent over time.
73 */
74 enum {
75 FRESH_REQUEST_MAX_STALE_ALL = 100,
76 FRESH_REQUEST_MAX_STALE_VALUE,
77 FRESH_EXPIRES,
78 FRESH_LMFACTOR_RULE,
79 FRESH_MIN_RULE,
80 FRESH_OVERRIDE_EXPIRES,
81 FRESH_OVERRIDE_LASTMOD,
82 STALE_MUST_REVALIDATE = 200,
83 STALE_RELOAD_INTO_IMS,
84 STALE_FORCED_RELOAD,
85 STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
86 STALE_EXPIRES,
87 STALE_MAX_RULE,
88 STALE_LMFACTOR_RULE,
89 STALE_DEFAULT = 299
90 };
91
92 static struct RefreshCounts {
93 const char *proto;
94 int total;
95 int status[STALE_DEFAULT + 1];
96 }
97
98 refreshCounts[rcCount];
99
100 /*
101 * Defaults:
102 * MIN NONE
103 * PCT 20%
104 * MAX 3 days
105 */
106 #define REFRESH_DEFAULT_MIN (time_t)0
107 #define REFRESH_DEFAULT_PCT 0.20
108 #define REFRESH_DEFAULT_MAX (time_t)259200
109
110 static const refresh_t *refreshUncompiledPattern(const char *);
111 static OBJH refreshStats;
112 static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf);
113
114 static refresh_t DefaultRefresh;
115
116 const refresh_t *
117 refreshLimits(const char *url)
118 {
119 const refresh_t *R;
120
121 for (R = Config.Refresh; R; R = R->next) {
122 if (!regexec(&(R->compiled_pattern), url, 0, 0, 0))
123 return R;
124 }
125
126 return NULL;
127 }
128
129 static const refresh_t *
130 refreshUncompiledPattern(const char *pat)
131 {
132 const refresh_t *R;
133
134 for (R = Config.Refresh; R; R = R->next) {
135 if (0 == strcmp(R->pattern, pat))
136 return R;
137 }
138
139 return NULL;
140 }
141
142 /**
143 * Calculate how stale the response is (or will be at the check_time).
144 * Staleness calculation is based on the following: (1) response
145 * expiration time, (2) age greater than configured maximum, (3)
146 * last-modified factor, and (4) age less than configured minimum.
147 *
148 * \retval -1 If the response is fresh.
149 * \retval >0 Otherwise return it's staleness.
150 * \retval 0 NOTE return value of 0 means the response is stale.
151 *
152 * The 'stale_flags' structure is used to tell the calling function
153 * _why_ this response is fresh or stale. Its used, for example,
154 * when the admin wants to override expiration and last-modified
155 * times.
156 */
157 static int
158 refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const refresh_t * R, stale_flags * sf)
159 {
160 /** \par
161 * Check for an explicit expiration time (Expires: header).
162 */
163 if (entry->expires > -1) {
164 sf->expires = true;
165
166 if (entry->expires > check_time) {
167 debugs(22, 3, "FRESH: expires " << entry->expires <<
168 " >= check_time " << check_time << " ");
169
170 return -1;
171 } else {
172 debugs(22, 3, "STALE: expires " << entry->expires <<
173 " < check_time " << check_time << " ");
174
175 return (check_time - entry->expires);
176 }
177 }
178
179 /** \par
180 * Use local heuristics to determine staleness. Start with the
181 * max age from the refresh_pattern rule.
182 */
183 if (age > R->max) {
184 debugs(22, 3, "STALE: age " << age << " > max " << R->max << " ");
185 sf->max = true;
186 return (age - R->max);
187 }
188
189 /** \par
190 * Try the last-modified factor algorithm: refresh_pattern n% percentage of Last-Modified: age.
191 */
192 if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
193 /*
194 * stale_age is the Age of the response when it became/becomes
195 * stale according to the last-modified factor algorithm.
196 */
197 time_t stale_age = static_cast<time_t>((entry->timestamp - entry->lastmod) * R->pct);
198 sf->lmfactor = true;
199
200 if (age >= stale_age) {
201 debugs(22, 3, "STALE: age " << age << " > stale_age " << stale_age);
202 return (age - stale_age);
203 } else {
204 debugs(22, 3, "FRESH: age " << age << " <= stale_age " << stale_age);
205 return -1;
206 }
207 }
208
209 /** \par
210 * Finally, if all else fails; staleness is determined by the refresh_pattern
211 * configured minimum age.
212 */
213 if (age < R->min) {
214 debugs(22, 3, "FRESH: age " << age << " < min " << R->min);
215 sf->min = true;
216 return -1;
217 }
218
219 debugs(22, 3, "STALE: age " << age << " >= min " << R->min);
220 return (age - R->min);
221 }
222
223 /**
224 * \retval 1 if the entry must be revalidated within delta seconds
225 * \retval 0 otherwise
226 *
227 * note: request maybe null (e.g. for cache digests build)
228 */
229 static int
230 refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta)
231 {
232 const refresh_t *R;
233 const char *uri = NULL;
234 time_t age = 0;
235 time_t check_time = squid_curtime + delta;
236 int staleness;
237 stale_flags sf;
238
239 if (entry->mem_obj)
240 uri = entry->mem_obj->url;
241 else if (request)
242 uri = urlCanonical(request);
243
244 debugs(22, 3, "refreshCheck: '" << (uri ? uri : "<none>") << "'");
245
246 if (check_time > entry->timestamp)
247 age = check_time - entry->timestamp;
248
249 // FIXME: what to do when age < 0 or counter overflow?
250 assert(age >= 0);
251
252 R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
253
254 if (NULL == R)
255 R = &DefaultRefresh;
256
257 memset(&sf, '\0', sizeof(sf));
258
259 staleness = refreshStaleness(entry, check_time, age, R, &sf);
260
261 debugs(22, 3, "Staleness = " << staleness);
262
263 debugs(22, 3, "refreshCheck: Matched '" << R->pattern << " " <<
264 (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
265 (int) R->max << "'");
266
267
268 debugs(22, 3, "refreshCheck: age = " << age);
269
270 debugs(22, 3, "\tcheck_time:\t" << mkrfc1123(check_time));
271
272 debugs(22, 3, "\tentry->timestamp:\t" << mkrfc1123(entry->timestamp));
273
274 if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1
275 #if USE_HTTP_VIOLATIONS
276 && !R->flags.ignore_must_revalidate
277 #endif
278 ) {
279 debugs(22, 3, "refreshCheck: YES: Must revalidate stale response");
280 return STALE_MUST_REVALIDATE;
281 }
282
283 /* request-specific checks */
284 if (request && !request->flags.ignore_cc) {
285 HttpHdrCc *cc = request->cache_control;
286
287 if (request->flags.ims && (R->flags.refresh_ims || Config.onoff.refresh_all_ims)) {
288 /* The clients no-cache header is changed into a IMS query */
289 debugs(22, 3, "refreshCheck: YES: refresh-ims");
290 return STALE_FORCED_RELOAD;
291 }
292
293 #if USE_HTTP_VIOLATIONS
294
295 if (!request->flags.nocache_hack) {
296 (void) 0;
297 } else if (R->flags.ignore_reload) {
298 /* The clients no-cache header is ignored */
299 debugs(22, 3, "refreshCheck: MAYBE: ignore-reload");
300 } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
301 /* The clients no-cache header is changed into a IMS query */
302 debugs(22, 3, "refreshCheck: YES: reload-into-ims");
303 return STALE_RELOAD_INTO_IMS;
304 } else {
305 /* The clients no-cache header is not overridden on this request */
306 debugs(22, 3, "refreshCheck: YES: client reload");
307 request->flags.nocache = 1;
308 return STALE_FORCED_RELOAD;
309 }
310
311 #endif
312 if (NULL != cc) {
313 if (cc->max_age > -1) {
314 #if USE_HTTP_VIOLATIONS
315 if (R->flags.ignore_reload && cc->max_age == 0) {} else
316 #endif
317 {
318 #if 0
319
320 if (cc->max_age == 0) {
321 debugs(22, 3, "refreshCheck: YES: client-max-age = 0");
322 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
323 }
324
325 #endif
326 if (age > cc->max_age) {
327 debugs(22, 3, "refreshCheck: YES: age > client-max-age");
328 return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
329 }
330 }
331 }
332
333 if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness > -1) {
334 if (cc->max_stale < 0) {
335 /* max-stale directive without a value */
336 debugs(22, 3, "refreshCheck: NO: max-stale wildcard");
337 return FRESH_REQUEST_MAX_STALE_ALL;
338 } else if (staleness < cc->max_stale) {
339 debugs(22, 3, "refreshCheck: NO: staleness < max-stale");
340 return FRESH_REQUEST_MAX_STALE_VALUE;
341 }
342 }
343 }
344 }
345
346 if (-1 == staleness) {
347 debugs(22, 3, "refreshCheck: object isn't stale..");
348 if (sf.expires) {
349 debugs(22, 3, "refreshCheck: returning FRESH_EXPIRES");
350 return FRESH_EXPIRES;
351 }
352
353 assert(!sf.max);
354
355 if (sf.lmfactor) {
356 debugs(22, 3, "refreshCheck: returning FRESH_LMFACTOR_RULE");
357 return FRESH_LMFACTOR_RULE;
358 }
359
360 assert(sf.min);
361
362 debugs(22, 3, "refreshCheck: returning FRESH_MIN_RULE");
363 return FRESH_MIN_RULE;
364 }
365
366 /*
367 * At this point the response is stale, unless one of
368 * the override options kicks in.
369 */
370 if (sf.expires) {
371 #if USE_HTTP_VIOLATIONS
372
373 if (R->flags.override_expire && age < R->min) {
374 debugs(22, 3, "refreshCheck: NO: age < min && override-expire");
375 return FRESH_OVERRIDE_EXPIRES;
376 }
377
378 #endif
379 return STALE_EXPIRES;
380 }
381
382 if (sf.max)
383 return STALE_MAX_RULE;
384
385 if (sf.lmfactor) {
386 #if USE_HTTP_VIOLATIONS
387
388 if (R->flags.override_lastmod && age < R->min) {
389 debugs(22, 3, "refreshCheck: NO: age < min && override-lastmod");
390 return FRESH_OVERRIDE_LASTMOD;
391 }
392
393 #endif
394 return STALE_LMFACTOR_RULE;
395 }
396
397 debugs(22, 3, "refreshCheck: returning STALE_DEFAULT");
398 return STALE_DEFAULT;
399 }
400
401 int
402 refreshIsCachable(const StoreEntry * entry)
403 {
404 /*
405 * Don't look at the request to avoid no-cache and other nuisances.
406 * the object should have a mem_obj so the URL will be found there.
407 * minimum_expiry_time seconds delta (defaults to 60 seconds), to
408 * avoid objects which expire almost immediately, and which can't
409 * be refreshed.
410 */
411 int reason = refreshCheck(entry, NULL, Config.minimum_expiry_time);
412 refreshCounts[rcStore].total++;
413 refreshCounts[rcStore].status[reason]++;
414
415 if (reason < STALE_MUST_REVALIDATE)
416 /* Does not need refresh. This is certainly cachable */
417 return 1;
418
419 if (entry->lastmod < 0)
420 /* Last modified is needed to do a refresh */
421 return 0;
422
423 if (entry->mem_obj == NULL)
424 /* no mem_obj? */
425 return 1;
426
427 if (entry->getReply() == NULL)
428 /* no reply? */
429 return 1;
430
431 if (entry->getReply()->content_length == 0)
432 /* No use refreshing (caching?) 0 byte objects */
433 return 0;
434
435 /* This seems to be refreshable. Cache it */
436 return 1;
437 }
438
439 /* refreshCheck... functions below are protocol-specific wrappers around
440 * refreshCheck() function above */
441
442 int
443 refreshCheckHTTP(const StoreEntry * entry, HttpRequest * request)
444 {
445 int reason = refreshCheck(entry, request, 0);
446 refreshCounts[rcHTTP].total++;
447 refreshCounts[rcHTTP].status[reason]++;
448 return (reason < 200) ? 0 : 1;
449 }
450
451 int
452 refreshCheckICP(const StoreEntry * entry, HttpRequest * request)
453 {
454 int reason = refreshCheck(entry, request, 30);
455 refreshCounts[rcICP].total++;
456 refreshCounts[rcICP].status[reason]++;
457 return (reason < 200) ? 0 : 1;
458 }
459
460 #if USE_HTCP
461 int
462 refreshCheckHTCP(const StoreEntry * entry, HttpRequest * request)
463 {
464 int reason = refreshCheck(entry, request, 10);
465 refreshCounts[rcHTCP].total++;
466 refreshCounts[rcHTCP].status[reason]++;
467 return (reason < 200) ? 0 : 1;
468 }
469
470 #endif
471
472 #if USE_CACHE_DIGESTS
473 int
474 refreshCheckDigest(const StoreEntry * entry, time_t delta)
475 {
476 int reason = refreshCheck(entry,
477 entry->mem_obj ? entry->mem_obj->request : NULL,
478 delta);
479 refreshCounts[rcCDigest].total++;
480 refreshCounts[rcCDigest].status[reason]++;
481 return (reason < 200) ? 0 : 1;
482 }
483
484 #endif
485
486 time_t
487 getMaxAge(const char *url)
488 {
489 const refresh_t *R;
490 debugs(22, 3, "getMaxAge: '" << url << "'");
491
492 if ((R = refreshLimits(url)))
493 return R->max;
494 else
495 return REFRESH_DEFAULT_MAX;
496 }
497
498 static void
499
500 refreshCountsStats(StoreEntry * sentry, struct RefreshCounts *rc)
501 {
502 int sum = 0;
503 int tot = rc->total;
504
505 storeAppendPrintf(sentry, "\n\n%s histogram:\n", rc->proto);
506 storeAppendPrintf(sentry, "Count\t%%Total\tCategory\n");
507
508 #define refreshCountsStatsEntry(code,desc) { \
509 storeAppendPrintf(sentry, "%6d\t%6.2f\t%s\n", \
510 rc->status[code], xpercent(rc->status[code], tot), desc); \
511 sum += rc->status[code]; \
512 }
513
514 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_ALL,
515 "Fresh: request max-stale wildcard");
516 refreshCountsStatsEntry(FRESH_REQUEST_MAX_STALE_VALUE,
517 "Fresh: request max-stale value");
518 refreshCountsStatsEntry(FRESH_EXPIRES,
519 "Fresh: expires time not reached");
520 refreshCountsStatsEntry(FRESH_LMFACTOR_RULE,
521 "Fresh: refresh_pattern last-mod factor percentage");
522 refreshCountsStatsEntry(FRESH_MIN_RULE,
523 "Fresh: refresh_pattern min value");
524 refreshCountsStatsEntry(FRESH_OVERRIDE_EXPIRES,
525 "Fresh: refresh_pattern override expires");
526 refreshCountsStatsEntry(FRESH_OVERRIDE_LASTMOD,
527 "Fresh: refresh_pattern override lastmod");
528 refreshCountsStatsEntry(STALE_MUST_REVALIDATE,
529 "Stale: response has must-revalidate");
530 refreshCountsStatsEntry(STALE_RELOAD_INTO_IMS,
531 "Stale: changed reload into IMS");
532 refreshCountsStatsEntry(STALE_FORCED_RELOAD,
533 "Stale: request has no-cache directive");
534 refreshCountsStatsEntry(STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,
535 "Stale: age exceeds request max-age value");
536 refreshCountsStatsEntry(STALE_EXPIRES,
537 "Stale: expires time reached");
538 refreshCountsStatsEntry(STALE_MAX_RULE,
539 "Stale: refresh_pattern max age rule");
540 refreshCountsStatsEntry(STALE_LMFACTOR_RULE,
541 "Stale: refresh_pattern last-mod factor percentage");
542 refreshCountsStatsEntry(STALE_DEFAULT,
543 "Stale: by default");
544
545 tot = sum; /* paranoid: "total" line shows 100% if we forgot nothing */
546 storeAppendPrintf(sentry, "%6d\t%6.2f\tTOTAL\n",
547 rc->total, xpercent(rc->total, tot));
548 \
549 storeAppendPrintf(sentry, "\n");
550 }
551
552 static void
553 refreshStats(StoreEntry * sentry)
554 {
555 int i;
556 int total = 0;
557
558 /* get total usage count */
559
560 for (i = 0; i < rcCount; ++i)
561 total += refreshCounts[i].total;
562
563 /* protocol usage histogram */
564 storeAppendPrintf(sentry, "\nRefreshCheck calls per protocol\n\n");
565
566 storeAppendPrintf(sentry, "Protocol\t#Calls\t%%Calls\n");
567
568 for (i = 0; i < rcCount; ++i)
569 storeAppendPrintf(sentry, "%10s\t%6d\t%6.2f\n",
570 refreshCounts[i].proto,
571 refreshCounts[i].total,
572 xpercent(refreshCounts[i].total, total));
573
574 /* per protocol histograms */
575 storeAppendPrintf(sentry, "\n\nRefreshCheck histograms for various protocols\n");
576
577 for (i = 0; i < rcCount; ++i)
578 refreshCountsStats(sentry, &refreshCounts[i]);
579 }
580
581 static void
582 refreshRegisterWithCacheManager(void)
583 {
584 CacheManager::GetInstance()->
585 registerAction("refresh", "Refresh Algorithm Statistics", refreshStats, 0, 1);
586 }
587
588 void
589 refreshInit(void)
590 {
591 memset(refreshCounts, 0, sizeof(refreshCounts));
592 refreshCounts[rcHTTP].proto = "HTTP";
593 refreshCounts[rcICP].proto = "ICP";
594 #if USE_HTCP
595
596 refreshCounts[rcHTCP].proto = "HTCP";
597 #endif
598
599 refreshCounts[rcStore].proto = "On Store";
600 #if USE_CACHE_DIGESTS
601
602 refreshCounts[rcCDigest].proto = "Cache Digests";
603 #endif
604
605 memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
606 DefaultRefresh.pattern = "<none>";
607 DefaultRefresh.min = REFRESH_DEFAULT_MIN;
608 DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
609 DefaultRefresh.max = REFRESH_DEFAULT_MAX;
610
611 refreshRegisterWithCacheManager();
612 }