]> git.ipfire.org Git - thirdparty/squid.git/blob - src/test_cache_digest.cc
merged from trunk
[thirdparty/squid.git] / src / test_cache_digest.cc
1
2 /*
3 * $Id: test_cache_digest.cc,v 1.35 2008/01/20 08:54:28 amosjeffries Exp $
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /*
36 * Test-suite for playing with cache digests
37 */
38
39 #include "squid.h"
40
41 typedef struct
42 {
43 int query_count;
44 int true_hit_count;
45 int true_miss_count;
46 int false_hit_count;
47 int false_miss_count;
48 } CacheQueryStats;
49
50 typedef struct _Cache Cache;
51
52 struct _Cache
53 {
54 const char *name;
55 hash_table *hash;
56 CacheDigest *digest;
57 Cache *peer;
58 CacheQueryStats qstats;
59 int count; /* #currently cached entries */
60 int req_count; /* #requests to this cache */
61 int bad_add_count; /* #duplicate adds */
62 int bad_del_count; /* #dels with no prior add */
63 };
64
65
66 typedef struct _CacheEntry
67 {
68 const cache_key *key;
69
70 struct _CacheEntry *next;
71 unsigned char key_arr[SQUID_MD5_DIGEST_LENGTH];
72 /* storeSwapLogData s; */
73 } CacheEntry;
74
75 /* parsed access log entry */
76
77 typedef struct
78 {
79 cache_key key[SQUID_MD5_DIGEST_LENGTH];
80 time_t timestamp;
81 short int use_icp; /* true/false */
82 } RawAccessLogEntry;
83
84 typedef enum {
85 frError = -2, frMore = -1, frEof = 0, frOk = 1
86 } fr_result;
87
88 typedef struct _FileIterator FileIterator;
89 typedef fr_result(*FI_READER) (FileIterator * fi);
90
91 struct _FileIterator
92 {
93 const char *fname;
94 FILE *file;
95 time_t inner_time; /* timestamp of the current entry */
96 time_t time_offset; /* to adjust time set by reader */
97 int line_count; /* number of lines scanned */
98 int bad_line_count; /* number of parsing errors */
99 int time_warp_count; /* number of out-of-order entries in the file */
100 FI_READER reader; /* reads next entry and updates inner_time */
101 void *entry; /* buffer for the current entry, freed with xfree() */
102 };
103
104 /* globals */
105 static time_t cur_time = -1; /* timestamp of the current log entry */
106
107 /* copied from url.c */
108 const char *RequestMethodStr[] =
109 {
110 "NONE",
111 "GET",
112 "POST",
113 "PUT",
114 "HEAD",
115 "CONNECT",
116 "TRACE",
117 "PURGE"
118 };
119
120 /* copied from url.c */
121 static HttpRequestMethod
122 methodStrToId(const char *s)
123 {
124 if (strcasecmp(s, "GET") == 0) {
125 return METHOD_GET;
126 } else if (strcasecmp(s, "POST") == 0) {
127 return METHOD_POST;
128 } else if (strcasecmp(s, "PUT") == 0) {
129 return METHOD_PUT;
130 } else if (strcasecmp(s, "HEAD") == 0) {
131 return METHOD_HEAD;
132 } else if (strcasecmp(s, "CONNECT") == 0) {
133 return METHOD_CONNECT;
134 } else if (strcasecmp(s, "TRACE") == 0) {
135 return METHOD_TRACE;
136 } else if (strcasecmp(s, "PURGE") == 0) {
137 return METHOD_PURGE;
138 }
139
140 return METHOD_NONE;
141 }
142
143 /* FileIterator */
144
145 static void fileIteratorAdvance(FileIterator * fi);
146
147 static FileIterator *
148 fileIteratorCreate(const char *fname, FI_READER reader)
149 {
150 FileIterator *fi = (FileIterator *)xcalloc(1, sizeof(FileIterator));
151 assert(fname && reader);
152 fi->fname = fname;
153 fi->reader = reader;
154 fi->file = fopen(fname, "r");
155
156 if (!fi->file) {
157 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
158 return NULL;
159 } else
160 fprintf(stderr, "opened %s\n", fname);
161
162 fileIteratorAdvance(fi);
163
164 return fi;
165 }
166
167 static void
168 fileIteratorDestroy(FileIterator * fi)
169 {
170 assert(fi);
171
172 if (fi->file) {
173 fclose(fi->file);
174 fprintf(stderr, "closed %s\n", fi->fname);
175 }
176
177 xfree(fi->entry);
178 xfree(fi);
179 }
180
181 static void
182 fileIteratorSetCurTime(FileIterator * fi, time_t ct)
183 {
184 assert(fi);
185 assert(fi->inner_time > 0);
186 fi->time_offset = ct - fi->inner_time;
187 }
188
189 static void
190 fileIteratorAdvance(FileIterator * fi)
191 {
192 int res;
193 assert(fi);
194
195 do {
196 const time_t last_time = fi->inner_time;
197 fi->inner_time = -1;
198 res = fi->reader(fi);
199 fi->line_count++;
200
201 if (fi->inner_time < 0)
202 fi->inner_time = last_time;
203 else
204 fi->inner_time += fi->time_offset;
205
206 if (res == frError)
207 fi->bad_line_count++;
208 else if (res == frEof) {
209 fprintf(stderr, "exhausted %s (%d entries) at %s",
210 fi->fname, fi->line_count, ctime(&fi->inner_time));
211 fi->inner_time = -1;
212 } else if (fi->inner_time < last_time) {
213 assert(last_time >= 0);
214 fi->time_warp_count++;
215 fi->inner_time = last_time;
216 }
217
218 /* report progress */
219 if (!(fi->line_count % 50000))
220 fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
221 fi->fname, fi->line_count / 1000, fi->bad_line_count,
222 ctime(&fi->inner_time));
223 } while (res < 0);
224 }
225
226 /* CacheEntry */
227
228 static CacheEntry *
229 cacheEntryCreate(const storeSwapLogData * s)
230 {
231 CacheEntry *e = (CacheEntry *)xcalloc(1, sizeof(CacheEntry));
232 assert(s);
233 /* e->s = *s; */
234 xmemcpy(e->key_arr, s->key, SQUID_MD5_DIGEST_LENGTH);
235 e->key = &e->key_arr[0];
236 return e;
237 }
238
239 static void
240 cacheEntryDestroy(CacheEntry * e)
241 {
242 assert(e);
243 xfree(e);
244 }
245
246
247 /* Cache */
248
249 static Cache *
250 cacheCreate(const char *name)
251 {
252 Cache *c;
253 assert(name && strlen(name));
254 c = (Cache *)xcalloc(1, sizeof(Cache));
255 c->name = name;
256 c->hash = hash_create(storeKeyHashCmp, (int)2e6, storeKeyHashHash);
257 return c;
258 }
259
260 static void
261 cacheDestroy(Cache * cache)
262 {
263 CacheEntry *e = NULL;
264 hash_table *hash;
265 assert(cache);
266 hash = cache->hash;
267 /* destroy hash table contents */
268 hash_first(hash);
269
270 while ((e = (CacheEntry *)hash_next(hash))) {
271 hash_remove_link(hash, (hash_link *) e);
272 cacheEntryDestroy(e);
273 }
274
275 /* destroy the hash table itself */
276 hashFreeMemory(hash);
277
278 if (cache->digest)
279 cacheDigestDestroy(cache->digest);
280
281 xfree(cache);
282 }
283
284 /* re-digests currently hashed entries */
285 static void
286 cacheResetDigest(Cache * cache)
287 {
288 CacheEntry *e = NULL;
289 hash_table *hash;
290
291 struct timeval t_start, t_end;
292
293 assert(cache);
294 fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
295
296 if (cache->digest)
297 cacheDigestDestroy(cache->digest);
298
299 hash = cache->hash;
300
301 cache->digest = cacheDigestCreate(cache->count + 1, 6);
302
303 if (!cache->count)
304 return;
305
306 gettimeofday(&t_start, NULL);
307
308 hash_first(hash);
309
310 while ((e = (CacheEntry *)hash_next(hash))) {
311 cacheDigestAdd(cache->digest, e->key);
312 }
313
314 gettimeofday(&t_end, NULL);
315 assert(cache->digest->count == cache->count);
316 fprintf(stderr, "%s: init-ed digest with %d entries\n",
317 cache->name, cache->digest->count);
318 fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
319 cache->name,
320 tvSubDsec(t_start, t_end),
321 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
322 /* check how long it takes to traverse the hash */
323 gettimeofday(&t_start, NULL);
324 hash_first(hash);
325
326 for (e = (CacheEntry *)hash_next(hash); e; e = (CacheEntry *)hash_next(hash)) {}
327
328 gettimeofday(&t_end, NULL);
329 fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
330 cache->name,
331 tvSubDsec(t_start, t_end),
332 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
333 }
334
335 static void
336 cacheQueryPeer(Cache * cache, const cache_key * key)
337 {
338 const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
339 const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
340
341 cache->qstats.query_count++;
342
343 if (peer_has_it) {
344 if (we_think_we_have_it)
345 cache->qstats.true_hit_count++;
346 else
347 cache->qstats.false_miss_count++;
348 } else {
349 if (we_think_we_have_it)
350 cache->qstats.false_hit_count++;
351 else
352 cache->qstats.true_miss_count++;
353 }
354 }
355
356 static void
357 cacheQueryReport(Cache * cache, CacheQueryStats * stats)
358 {
359 fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
360 cache->name,
361 stats->query_count, xpercentInt(stats->query_count, cache->req_count)
362 );
363 fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
364 cache->name,
365 stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
366 stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
367 stats->true_hit_count + stats->true_miss_count,
368 xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
369 );
370 fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
371 cache->name,
372 stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
373 stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
374 stats->false_hit_count + stats->false_miss_count,
375 xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
376 );
377 }
378
379 static void
380 cacheReport(Cache * cache)
381 {
382 fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
383 cache->name, cache->count, cache->req_count,
384 cache->bad_add_count, cache->bad_del_count);
385
386 }
387
388 static void
389 cacheFetch(Cache * cache, const RawAccessLogEntry * e)
390 {
391 assert(e);
392 cache->req_count++;
393
394 if (e->use_icp)
395 cacheQueryPeer(cache, e->key);
396 }
397
398 static fr_result
399 swapStateReader(FileIterator * fi)
400 {
401 storeSwapLogData *entry;
402
403 if (!fi->entry)
404 fi->entry = xcalloc(1, sizeof(storeSwapLogData));
405
406 entry = (storeSwapLogData *)fi->entry;
407
408 if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
409 return frEof;
410
411 fi->inner_time = entry->lastref;
412
413 if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
414 fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
415 exit(-3);
416 }
417
418 return frOk;
419 }
420
421 static fr_result
422 accessLogReader(FileIterator * fi)
423 {
424 static char buf[4096];
425 RawAccessLogEntry *entry;
426 char *url;
427 char *method;
428 HttpRequestMethod method_id = METHOD_NONE;
429 char *hier = NULL;
430
431 assert(fi);
432
433 if (!fi->entry)
434 fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
435 else
436 memset(fi->entry, 0, sizeof(RawAccessLogEntry));
437
438 entry = (RawAccessLogEntry*)fi->entry;
439
440 if (!fgets(buf, sizeof(buf), fi->file))
441 return frEof; /* eof */
442
443 entry->timestamp = fi->inner_time = (time_t) atoi(buf);
444
445 url = strstr(buf, "://");
446
447 hier = url ? strstr(url, " - ") : NULL;
448
449 if (!url || !hier) {
450 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
451 * fname, scanned_count, buf); */
452 return frError;
453 }
454
455 method = url;
456
457 while (!xisdigit(*method)) {
458 if (*method == ' ')
459 *method = '\0';
460
461 --method;
462 }
463
464 method += 2;
465 method_id = methodStrToId(method);
466
467 if (method_id == METHOD_NONE) {
468 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
469 * fname, scanned_count, method, buf); */
470 return frError;
471 }
472
473 while (*url)
474 url--;
475
476 url++;
477
478 *hier = '\0';
479
480 hier += 3;
481
482 *strchr(hier, '/') = '\0';
483
484 /*fprintf(stdout, "%s:%d: %s %s %s\n",
485 * fname, count, method, url, hier); */
486 entry->use_icp = strcmp(hier, "NONE");
487
488 /* no ICP lookup for these status codes */
489 /* strcmp(hier, "NONE") &&
490 * strcmp(hier, "DIRECT") &&
491 * strcmp(hier, "FIREWALL_IP_DIRECT") &&
492 * strcmp(hier, "LOCAL_IP_DIRECT") &&
493 * strcmp(hier, "NO_DIRECT_FAIL") &&
494 * strcmp(hier, "NO_PARENT_DIRECT") &&
495 * strcmp(hier, "SINGLE_PARENT") &&
496 * strcmp(hier, "PASSTHROUGH_PARENT") &&
497 * strcmp(hier, "SSL_PARENT_MISS") &&
498 * strcmp(hier, "DEFAULT_PARENT");
499 */
500 xmemcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
501
502 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
503 * fname, count, method, storeKeyText(entry->key), url, hier); */
504 return frOk;
505 }
506
507
508 static void
509 cachePurge(Cache * cache, storeSwapLogData * s, int update_digest)
510 {
511 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
512
513 if (!olde) {
514 cache->bad_del_count++;
515 } else {
516 assert(cache->count);
517 hash_remove_link(cache->hash, (hash_link *) olde);
518
519 if (update_digest)
520 cacheDigestDel(cache->digest, s->key);
521
522 cacheEntryDestroy(olde);
523
524 cache->count--;
525 }
526 }
527
528 static void
529 cacheStore(Cache * cache, storeSwapLogData * s, int update_digest)
530 {
531 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
532
533 if (olde) {
534 cache->bad_add_count++;
535 } else {
536 CacheEntry *e = cacheEntryCreate(s);
537 hash_join(cache->hash, (hash_link *)&e->key);
538 cache->count++;
539
540 if (update_digest)
541 cacheDigestAdd(cache->digest, e->key);
542 }
543 }
544
545 static void
546 cacheUpdateStore(Cache * cache, storeSwapLogData * s, int update_digest)
547 {
548 switch (s->op) {
549
550 case SWAP_LOG_ADD:
551 cacheStore(cache, s, update_digest);
552 break;
553
554 case SWAP_LOG_DEL:
555 cachePurge(cache, s, update_digest);
556 break;
557
558 default:
559 assert(0);
560 }
561 }
562
563 static int
564 usage(const char *prg_name)
565 {
566 fprintf(stderr, "usage: %s <access_log> <swap_state> ...\n",
567 prg_name);
568 return -1;
569 }
570
571 int
572 main(int argc, char *argv[])
573 {
574 FileIterator **fis = NULL;
575 const int fi_count = argc - 1;
576 int active_fi_count = 0;
577 time_t ready_time;
578 Cache *them, *us;
579 int i;
580
581 if (argc < 3)
582 return usage(argv[0]);
583
584 them = cacheCreate("them");
585
586 us = cacheCreate("us");
587
588 them->peer = us;
589
590 us->peer = them;
591
592 fis = (FileIterator **)xcalloc(fi_count, sizeof(FileIterator *));
593
594 /* init iterators with files */
595 fis[0] = fileIteratorCreate(argv[1], accessLogReader);
596
597 for (i = 2; i < argc; ++i)
598 fis[i - 1] = fileIteratorCreate(argv[i], swapStateReader);
599
600 /* check that all files were found */
601 for (i = 0; i < fi_count; ++i)
602 if (!fis[i])
603 return -2;
604
605 /* read prefix to get start-up contents of the peer cache */
606 ready_time = -1;
607
608 for (i = 1; i < fi_count; ++i) {
609 FileIterator *fi = fis[i];
610
611 while (fi->inner_time > 0) {
612 if (((storeSwapLogData *) fi->entry)->op == SWAP_LOG_DEL) {
613 cachePurge(them, (storeSwapLogData *)fi->entry, 0);
614
615 if (ready_time < 0)
616 ready_time = fi->inner_time;
617 } else {
618 if (ready_time > 0 && fi->inner_time > ready_time)
619 break;
620
621 cacheStore(them, (storeSwapLogData *)fi->entry, 0);
622 }
623
624 fileIteratorAdvance(fi);
625 }
626 }
627
628 /* digest peer cache content */
629 cacheResetDigest(them);
630
631 us->digest = cacheDigestClone(them->digest); /* @netw@ */
632
633 /* shift the time in access log to match ready_time */
634 fileIteratorSetCurTime(fis[0], ready_time);
635
636 /* iterate, use the iterator with the smallest positive inner_time */
637 cur_time = -1;
638
639 do {
640 int next_i = -1;
641 time_t next_time = -1;
642 active_fi_count = 0;
643
644 for (i = 0; i < fi_count; ++i) {
645 if (fis[i]->inner_time >= 0) {
646 if (!active_fi_count || fis[i]->inner_time < next_time) {
647 next_i = i;
648 next_time = fis[i]->inner_time;
649 }
650
651 active_fi_count++;
652 }
653 }
654
655 if (next_i >= 0) {
656 cur_time = next_time;
657 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time)); */
658
659 if (next_i == 0)
660 cacheFetch(us, (RawAccessLogEntry *)fis[next_i]->entry);
661 else
662 cacheUpdateStore(them, (storeSwapLogData *)fis[next_i]->entry, 1);
663
664 fileIteratorAdvance(fis[next_i]);
665 }
666 } while (active_fi_count);
667
668 /* report */
669 cacheReport(them);
670
671 cacheReport(us);
672
673 cacheQueryReport(us, &us->qstats);
674
675 /* clean */
676 for (i = 0; i < argc - 1; ++i) {
677 fileIteratorDestroy(fis[i]);
678 }
679
680 xfree(fis);
681 cacheDestroy(them);
682 cacheDestroy(us);
683 return 0;
684 }