]> git.ipfire.org Git - thirdparty/squid.git/blob - src/test_cache_digest.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / test_cache_digest.cc
1
2 /*
3 * $Id$
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /*
36 * Test-suite for playing with cache digests
37 */
38
39 #include "squid.h"
40
41 #if HAVE_ERRNO_H
42 #include <errno.h>
43 #endif
44
45 typedef struct {
46 int query_count;
47 int true_hit_count;
48 int true_miss_count;
49 int false_hit_count;
50 int false_miss_count;
51 } CacheQueryStats;
52
53 typedef struct _Cache Cache;
54
55 struct _Cache {
56 const char *name;
57 hash_table *hash;
58 CacheDigest *digest;
59 Cache *peer;
60 CacheQueryStats qstats;
61 int count; /* #currently cached entries */
62 int req_count; /* #requests to this cache */
63 int bad_add_count; /* #duplicate adds */
64 int bad_del_count; /* #dels with no prior add */
65 };
66
67 typedef struct _CacheEntry {
68 const cache_key *key;
69
70 struct _CacheEntry *next;
71 unsigned char key_arr[SQUID_MD5_DIGEST_LENGTH];
72 /* storeSwapLogData s; */
73 } CacheEntry;
74
75 /* parsed access log entry */
76
77 typedef struct {
78 cache_key key[SQUID_MD5_DIGEST_LENGTH];
79 time_t timestamp;
80 short int use_icp; /* true/false */
81 } RawAccessLogEntry;
82
83 typedef enum {
84 frError = -2, frMore = -1, frEof = 0, frOk = 1
85 } fr_result;
86
87 typedef struct _FileIterator FileIterator;
88 typedef fr_result(*FI_READER) (FileIterator * fi);
89
90 struct _FileIterator {
91 const char *fname;
92 FILE *file;
93 time_t inner_time; /* timestamp of the current entry */
94 time_t time_offset; /* to adjust time set by reader */
95 int line_count; /* number of lines scanned */
96 int bad_line_count; /* number of parsing errors */
97 int time_warp_count; /* number of out-of-order entries in the file */
98 FI_READER reader; /* reads next entry and updates inner_time */
99 void *entry; /* buffer for the current entry, freed with xfree() */
100 };
101
102 /* globals */
103 static time_t cur_time = -1; /* timestamp of the current log entry */
104
105 /* copied from url.c */
106 const char *RequestMethodStr[] = {
107 "NONE",
108 "GET",
109 "POST",
110 "PUT",
111 "HEAD",
112 "CONNECT",
113 "TRACE",
114 "PURGE"
115 };
116
117 /* copied from url.c */
118 static HttpRequestMethod
119 methodStrToId(const char *s)
120 {
121 if (strcasecmp(s, "GET") == 0) {
122 return METHOD_GET;
123 } else if (strcasecmp(s, "POST") == 0) {
124 return METHOD_POST;
125 } else if (strcasecmp(s, "PUT") == 0) {
126 return METHOD_PUT;
127 } else if (strcasecmp(s, "HEAD") == 0) {
128 return METHOD_HEAD;
129 } else if (strcasecmp(s, "CONNECT") == 0) {
130 return METHOD_CONNECT;
131 } else if (strcasecmp(s, "TRACE") == 0) {
132 return METHOD_TRACE;
133 } else if (strcasecmp(s, "PURGE") == 0) {
134 return METHOD_PURGE;
135 }
136
137 return METHOD_NONE;
138 }
139
140 /* FileIterator */
141
142 static void fileIteratorAdvance(FileIterator * fi);
143
144 static FileIterator *
145 fileIteratorCreate(const char *fname, FI_READER reader)
146 {
147 FileIterator *fi = (FileIterator *)xcalloc(1, sizeof(FileIterator));
148 assert(fname && reader);
149 fi->fname = fname;
150 fi->reader = reader;
151 fi->file = fopen(fname, "r");
152
153 if (!fi->file) {
154 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
155 return NULL;
156 } else
157 fprintf(stderr, "opened %s\n", fname);
158
159 fileIteratorAdvance(fi);
160
161 return fi;
162 }
163
164 static void
165 fileIteratorDestroy(FileIterator * fi)
166 {
167 assert(fi);
168
169 if (fi->file) {
170 fclose(fi->file);
171 fprintf(stderr, "closed %s\n", fi->fname);
172 }
173
174 xfree(fi->entry);
175 xfree(fi);
176 }
177
178 static void
179 fileIteratorSetCurTime(FileIterator * fi, time_t ct)
180 {
181 assert(fi);
182 assert(fi->inner_time > 0);
183 fi->time_offset = ct - fi->inner_time;
184 }
185
186 static void
187 fileIteratorAdvance(FileIterator * fi)
188 {
189 int res;
190 assert(fi);
191
192 do {
193 const time_t last_time = fi->inner_time;
194 fi->inner_time = -1;
195 res = fi->reader(fi);
196 ++ fi->line_count;
197
198 if (fi->inner_time < 0)
199 fi->inner_time = last_time;
200 else
201 fi->inner_time += fi->time_offset;
202
203 if (res == frError)
204 ++ fi->bad_line_count;
205 else if (res == frEof) {
206 fprintf(stderr, "exhausted %s (%d entries) at %s",
207 fi->fname, fi->line_count, ctime(&fi->inner_time));
208 fi->inner_time = -1;
209 } else if (fi->inner_time < last_time) {
210 assert(last_time >= 0);
211 ++ fi->time_warp_count;
212 fi->inner_time = last_time;
213 }
214
215 /* report progress */
216 if (!(fi->line_count % 50000))
217 fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
218 fi->fname, fi->line_count / 1000, fi->bad_line_count,
219 ctime(&fi->inner_time));
220 } while (res < 0);
221 }
222
223 /* CacheEntry */
224
225 static CacheEntry *
226 cacheEntryCreate(const storeSwapLogData * s)
227 {
228 CacheEntry *e = (CacheEntry *)xcalloc(1, sizeof(CacheEntry));
229 assert(s);
230 /* e->s = *s; */
231 memcpy(e->key_arr, s->key, SQUID_MD5_DIGEST_LENGTH);
232 e->key = &e->key_arr[0];
233 return e;
234 }
235
236 static void
237 cacheEntryDestroy(CacheEntry * e)
238 {
239 assert(e);
240 xfree(e);
241 }
242
243 /* Cache */
244
245 static Cache *
246 cacheCreate(const char *name)
247 {
248 Cache *c;
249 assert(name && strlen(name));
250 c = (Cache *)xcalloc(1, sizeof(Cache));
251 c->name = name;
252 c->hash = hash_create(storeKeyHashCmp, (int)2e6, storeKeyHashHash);
253 return c;
254 }
255
256 static void
257 cacheDestroy(Cache * cache)
258 {
259 CacheEntry *e = NULL;
260 hash_table *hash;
261 assert(cache);
262 hash = cache->hash;
263 /* destroy hash table contents */
264 hash_first(hash);
265
266 while ((e = (CacheEntry *)hash_next(hash))) {
267 hash_remove_link(hash, (hash_link *) e);
268 cacheEntryDestroy(e);
269 }
270
271 /* destroy the hash table itself */
272 hashFreeMemory(hash);
273
274 if (cache->digest)
275 cacheDigestDestroy(cache->digest);
276
277 xfree(cache);
278 }
279
280 /* re-digests currently hashed entries */
281 static void
282 cacheResetDigest(Cache * cache)
283 {
284 CacheEntry *e = NULL;
285 hash_table *hash;
286
287 struct timeval t_start, t_end;
288
289 assert(cache);
290 fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
291
292 if (cache->digest)
293 cacheDigestDestroy(cache->digest);
294
295 hash = cache->hash;
296
297 cache->digest = cacheDigestCreate(cache->count + 1, 6);
298
299 if (!cache->count)
300 return;
301
302 gettimeofday(&t_start, NULL);
303
304 hash_first(hash);
305
306 while ((e = (CacheEntry *)hash_next(hash))) {
307 cacheDigestAdd(cache->digest, e->key);
308 }
309
310 gettimeofday(&t_end, NULL);
311 assert(cache->digest->count == cache->count);
312 fprintf(stderr, "%s: init-ed digest with %d entries\n",
313 cache->name, cache->digest->count);
314 fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
315 cache->name,
316 tvSubDsec(t_start, t_end),
317 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
318 /* check how long it takes to traverse the hash */
319 gettimeofday(&t_start, NULL);
320 hash_first(hash);
321
322 for (e = (CacheEntry *)hash_next(hash); e; e = (CacheEntry *)hash_next(hash)) {}
323
324 gettimeofday(&t_end, NULL);
325 fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
326 cache->name,
327 tvSubDsec(t_start, t_end),
328 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
329 }
330
331 static void
332 cacheQueryPeer(Cache * cache, const cache_key * key)
333 {
334 const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
335 const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
336
337 ++ cache->qstats.query_count;
338
339 if (peer_has_it) {
340 if (we_think_we_have_it)
341 ++ cache->qstats.true_hit_count;
342 else
343 ++ cache->qstats.false_miss_count;
344 } else {
345 if (we_think_we_have_it)
346 ++ cache->qstats.false_hit_count;
347 else
348 ++ cache->qstats.true_miss_count;
349 }
350 }
351
352 static void
353 cacheQueryReport(Cache * cache, CacheQueryStats * stats)
354 {
355 fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
356 cache->name,
357 stats->query_count, xpercentInt(stats->query_count, cache->req_count)
358 );
359 fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
360 cache->name,
361 stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
362 stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
363 stats->true_hit_count + stats->true_miss_count,
364 xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
365 );
366 fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
367 cache->name,
368 stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
369 stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
370 stats->false_hit_count + stats->false_miss_count,
371 xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
372 );
373 }
374
375 static void
376 cacheReport(Cache * cache)
377 {
378 fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
379 cache->name, cache->count, cache->req_count,
380 cache->bad_add_count, cache->bad_del_count);
381
382 }
383
384 static void
385 cacheFetch(Cache * cache, const RawAccessLogEntry * e)
386 {
387 assert(e);
388 ++ cache->req_count;
389
390 if (e->use_icp)
391 cacheQueryPeer(cache, e->key);
392 }
393
394 static fr_result
395 swapStateReader(FileIterator * fi)
396 {
397 storeSwapLogData *entry;
398
399 if (!fi->entry)
400 fi->entry = xcalloc(1, sizeof(storeSwapLogData));
401
402 entry = (storeSwapLogData *)fi->entry;
403
404 if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
405 return frEof;
406
407 fi->inner_time = entry->lastref;
408
409 if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
410 fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
411 exit(-3);
412 }
413
414 return frOk;
415 }
416
417 static fr_result
418 accessLogReader(FileIterator * fi)
419 {
420 static char buf[4096];
421 RawAccessLogEntry *entry;
422 char *url;
423 char *method;
424 HttpRequestMethod method_id = METHOD_NONE;
425 char *hier = NULL;
426
427 assert(fi);
428
429 if (!fi->entry)
430 fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
431 else
432 memset(fi->entry, 0, sizeof(RawAccessLogEntry));
433
434 entry = (RawAccessLogEntry*)fi->entry;
435
436 if (!fgets(buf, sizeof(buf), fi->file))
437 return frEof; /* eof */
438
439 entry->timestamp = fi->inner_time = (time_t) atoi(buf);
440
441 url = strstr(buf, "://");
442
443 hier = url ? strstr(url, " - ") : NULL;
444
445 if (!url || !hier) {
446 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
447 * fname, scanned_count, buf); */
448 return frError;
449 }
450
451 method = url;
452
453 while (!xisdigit(*method)) {
454 if (*method == ' ')
455 *method = '\0';
456
457 --method;
458 }
459
460 method += 2;
461 method_id = methodStrToId(method);
462
463 if (method_id == METHOD_NONE) {
464 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
465 * fname, scanned_count, method, buf); */
466 return frError;
467 }
468
469 while (*url)
470 --url;
471
472 ++url;
473
474 *hier = '\0';
475
476 hier += 3;
477
478 *strchr(hier, '/') = '\0';
479
480 /*fprintf(stdout, "%s:%d: %s %s %s\n",
481 * fname, count, method, url, hier); */
482 entry->use_icp = strcmp(hier, "NONE");
483
484 /* no ICP lookup for these status codes */
485 /* strcmp(hier, "NONE") &&
486 * strcmp(hier, "DIRECT") &&
487 * strcmp(hier, "FIREWALL_IP_DIRECT") &&
488 * strcmp(hier, "LOCAL_IP_DIRECT") &&
489 * strcmp(hier, "NO_DIRECT_FAIL") &&
490 * strcmp(hier, "NO_PARENT_DIRECT") &&
491 * strcmp(hier, "SINGLE_PARENT") &&
492 * strcmp(hier, "PASSTHROUGH_PARENT") &&
493 * strcmp(hier, "SSL_PARENT_MISS") &&
494 * strcmp(hier, "DEFAULT_PARENT");
495 */
496 memcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
497
498 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
499 * fname, count, method, storeKeyText(entry->key), url, hier); */
500 return frOk;
501 }
502
503 static void
504 cachePurge(Cache * cache, storeSwapLogData * s, int update_digest)
505 {
506 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
507
508 if (!olde) {
509 ++ cache->bad_del_count;
510 } else {
511 assert(cache->count);
512 hash_remove_link(cache->hash, (hash_link *) olde);
513
514 if (update_digest)
515 cacheDigestDel(cache->digest, s->key);
516
517 cacheEntryDestroy(olde);
518
519 -- cache->count;
520 }
521 }
522
523 static void
524 cacheStore(Cache * cache, storeSwapLogData * s, int update_digest)
525 {
526 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
527
528 if (olde) {
529 ++ cache->bad_add_count;
530 } else {
531 CacheEntry *e = cacheEntryCreate(s);
532 hash_join(cache->hash, (hash_link *)&e->key);
533 ++ cache->count;
534
535 if (update_digest)
536 cacheDigestAdd(cache->digest, e->key);
537 }
538 }
539
540 static void
541 cacheUpdateStore(Cache * cache, storeSwapLogData * s, int update_digest)
542 {
543 switch (s->op) {
544
545 case SWAP_LOG_ADD:
546 cacheStore(cache, s, update_digest);
547 break;
548
549 case SWAP_LOG_DEL:
550 cachePurge(cache, s, update_digest);
551 break;
552
553 default:
554 assert(0);
555 }
556 }
557
558 static int
559 usage(const char *prg_name)
560 {
561 fprintf(stderr, "usage: %s <access_log> <swap_state> ...\n",
562 prg_name);
563 return -1;
564 }
565
566 int
567 main(int argc, char *argv[])
568 {
569 FileIterator **fis = NULL;
570 const int fi_count = argc - 1;
571 int active_fi_count = 0;
572 time_t ready_time;
573 Cache *them, *us;
574 int i;
575
576 if (argc < 3)
577 return usage(argv[0]);
578
579 them = cacheCreate("them");
580
581 us = cacheCreate("us");
582
583 them->peer = us;
584
585 us->peer = them;
586
587 fis = (FileIterator **)xcalloc(fi_count, sizeof(FileIterator *));
588
589 /* init iterators with files */
590 fis[0] = fileIteratorCreate(argv[1], accessLogReader);
591
592 for (i = 2; i < argc; ++i)
593 fis[i - 1] = fileIteratorCreate(argv[i], swapStateReader);
594
595 /* check that all files were found */
596 for (i = 0; i < fi_count; ++i)
597 if (!fis[i])
598 return -2;
599
600 /* read prefix to get start-up contents of the peer cache */
601 ready_time = -1;
602
603 for (i = 1; i < fi_count; ++i) {
604 FileIterator *fi = fis[i];
605
606 while (fi->inner_time > 0) {
607 if (((storeSwapLogData *) fi->entry)->op == SWAP_LOG_DEL) {
608 cachePurge(them, (storeSwapLogData *)fi->entry, 0);
609
610 if (ready_time < 0)
611 ready_time = fi->inner_time;
612 } else {
613 if (ready_time > 0 && fi->inner_time > ready_time)
614 break;
615
616 cacheStore(them, (storeSwapLogData *)fi->entry, 0);
617 }
618
619 fileIteratorAdvance(fi);
620 }
621 }
622
623 /* digest peer cache content */
624 cacheResetDigest(them);
625
626 us->digest = cacheDigestClone(them->digest); /* @netw@ */
627
628 /* shift the time in access log to match ready_time */
629 fileIteratorSetCurTime(fis[0], ready_time);
630
631 /* iterate, use the iterator with the smallest positive inner_time */
632 cur_time = -1;
633
634 do {
635 int next_i = -1;
636 time_t next_time = -1;
637 active_fi_count = 0;
638
639 for (i = 0; i < fi_count; ++i) {
640 if (fis[i]->inner_time >= 0) {
641 if (!active_fi_count || fis[i]->inner_time < next_time) {
642 next_i = i;
643 next_time = fis[i]->inner_time;
644 }
645
646 ++active_fi_count;
647 }
648 }
649
650 if (next_i >= 0) {
651 cur_time = next_time;
652 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time)); */
653
654 if (next_i == 0)
655 cacheFetch(us, (RawAccessLogEntry *)fis[next_i]->entry);
656 else
657 cacheUpdateStore(them, (storeSwapLogData *)fis[next_i]->entry, 1);
658
659 fileIteratorAdvance(fis[next_i]);
660 }
661 } while (active_fi_count);
662
663 /* report */
664 cacheReport(them);
665
666 cacheReport(us);
667
668 cacheQueryReport(us, &us->qstats);
669
670 /* clean */
671 for (i = 0; i < argc - 1; ++i) {
672 fileIteratorDestroy(fis[i]);
673 }
674
675 xfree(fis);
676 cacheDestroy(them);
677 cacheDestroy(us);
678 return 0;
679 }