]> git.ipfire.org Git - thirdparty/squid.git/blob - src/test_cache_digest.cc
protos.h refactoring, part one.
[thirdparty/squid.git] / src / test_cache_digest.cc
1
2 /*
3 * $Id$
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /*
36 * Test-suite for playing with cache digests
37 */
38
39 #include "squid.h"
40 #include "CacheDigest.h"
41 #include "store_key_md5.h"
42
43 #if HAVE_ERRNO_H
44 #include <errno.h>
45 #endif
46
47 typedef struct {
48 int query_count;
49 int true_hit_count;
50 int true_miss_count;
51 int false_hit_count;
52 int false_miss_count;
53 } CacheQueryStats;
54
55 typedef struct _Cache Cache;
56
57 struct _Cache {
58 const char *name;
59 hash_table *hash;
60 CacheDigest *digest;
61 Cache *peer;
62 CacheQueryStats qstats;
63 int count; /* #currently cached entries */
64 int req_count; /* #requests to this cache */
65 int bad_add_count; /* #duplicate adds */
66 int bad_del_count; /* #dels with no prior add */
67 };
68
69 typedef struct _CacheEntry {
70 const cache_key *key;
71
72 struct _CacheEntry *next;
73 unsigned char key_arr[SQUID_MD5_DIGEST_LENGTH];
74 /* storeSwapLogData s; */
75 } CacheEntry;
76
77 /* parsed access log entry */
78
79 typedef struct {
80 cache_key key[SQUID_MD5_DIGEST_LENGTH];
81 time_t timestamp;
82 short int use_icp; /* true/false */
83 } RawAccessLogEntry;
84
85 typedef enum {
86 frError = -2, frMore = -1, frEof = 0, frOk = 1
87 } fr_result;
88
89 typedef struct _FileIterator FileIterator;
90 typedef fr_result(*FI_READER) (FileIterator * fi);
91
92 struct _FileIterator {
93 const char *fname;
94 FILE *file;
95 time_t inner_time; /* timestamp of the current entry */
96 time_t time_offset; /* to adjust time set by reader */
97 int line_count; /* number of lines scanned */
98 int bad_line_count; /* number of parsing errors */
99 int time_warp_count; /* number of out-of-order entries in the file */
100 FI_READER reader; /* reads next entry and updates inner_time */
101 void *entry; /* buffer for the current entry, freed with xfree() */
102 };
103
104 /* globals */
105 static time_t cur_time = -1; /* timestamp of the current log entry */
106
107 /* copied from url.c */
108 const char *RequestMethodStr[] = {
109 "NONE",
110 "GET",
111 "POST",
112 "PUT",
113 "HEAD",
114 "CONNECT",
115 "TRACE",
116 "PURGE"
117 };
118
119 /* copied from url.c */
120 static HttpRequestMethod
121 methodStrToId(const char *s)
122 {
123 if (strcasecmp(s, "GET") == 0) {
124 return METHOD_GET;
125 } else if (strcasecmp(s, "POST") == 0) {
126 return METHOD_POST;
127 } else if (strcasecmp(s, "PUT") == 0) {
128 return METHOD_PUT;
129 } else if (strcasecmp(s, "HEAD") == 0) {
130 return METHOD_HEAD;
131 } else if (strcasecmp(s, "CONNECT") == 0) {
132 return METHOD_CONNECT;
133 } else if (strcasecmp(s, "TRACE") == 0) {
134 return METHOD_TRACE;
135 } else if (strcasecmp(s, "PURGE") == 0) {
136 return METHOD_PURGE;
137 }
138
139 return METHOD_NONE;
140 }
141
142 /* FileIterator */
143
144 static void fileIteratorAdvance(FileIterator * fi);
145
146 static FileIterator *
147 fileIteratorCreate(const char *fname, FI_READER reader)
148 {
149 FileIterator *fi = (FileIterator *)xcalloc(1, sizeof(FileIterator));
150 assert(fname && reader);
151 fi->fname = fname;
152 fi->reader = reader;
153 fi->file = fopen(fname, "r");
154
155 if (!fi->file) {
156 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
157 return NULL;
158 } else
159 fprintf(stderr, "opened %s\n", fname);
160
161 fileIteratorAdvance(fi);
162
163 return fi;
164 }
165
166 static void
167 fileIteratorDestroy(FileIterator * fi)
168 {
169 assert(fi);
170
171 if (fi->file) {
172 fclose(fi->file);
173 fprintf(stderr, "closed %s\n", fi->fname);
174 }
175
176 xfree(fi->entry);
177 xfree(fi);
178 }
179
180 static void
181 fileIteratorSetCurTime(FileIterator * fi, time_t ct)
182 {
183 assert(fi);
184 assert(fi->inner_time > 0);
185 fi->time_offset = ct - fi->inner_time;
186 }
187
188 static void
189 fileIteratorAdvance(FileIterator * fi)
190 {
191 int res;
192 assert(fi);
193
194 do {
195 const time_t last_time = fi->inner_time;
196 fi->inner_time = -1;
197 res = fi->reader(fi);
198 ++ fi->line_count;
199
200 if (fi->inner_time < 0)
201 fi->inner_time = last_time;
202 else
203 fi->inner_time += fi->time_offset;
204
205 if (res == frError)
206 ++ fi->bad_line_count;
207 else if (res == frEof) {
208 fprintf(stderr, "exhausted %s (%d entries) at %s",
209 fi->fname, fi->line_count, ctime(&fi->inner_time));
210 fi->inner_time = -1;
211 } else if (fi->inner_time < last_time) {
212 assert(last_time >= 0);
213 ++ fi->time_warp_count;
214 fi->inner_time = last_time;
215 }
216
217 /* report progress */
218 if (!(fi->line_count % 50000))
219 fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
220 fi->fname, fi->line_count / 1000, fi->bad_line_count,
221 ctime(&fi->inner_time));
222 } while (res < 0);
223 }
224
225 /* CacheEntry */
226
227 static CacheEntry *
228 cacheEntryCreate(const storeSwapLogData * s)
229 {
230 CacheEntry *e = (CacheEntry *)xcalloc(1, sizeof(CacheEntry));
231 assert(s);
232 /* e->s = *s; */
233 memcpy(e->key_arr, s->key, SQUID_MD5_DIGEST_LENGTH);
234 e->key = &e->key_arr[0];
235 return e;
236 }
237
238 static void
239 cacheEntryDestroy(CacheEntry * e)
240 {
241 assert(e);
242 xfree(e);
243 }
244
245 /* Cache */
246
247 static Cache *
248 cacheCreate(const char *name)
249 {
250 Cache *c;
251 assert(name && strlen(name));
252 c = (Cache *)xcalloc(1, sizeof(Cache));
253 c->name = name;
254 c->hash = hash_create(storeKeyHashCmp, (int)2e6, storeKeyHashHash);
255 return c;
256 }
257
258 static void
259 cacheDestroy(Cache * cache)
260 {
261 CacheEntry *e = NULL;
262 hash_table *hash;
263 assert(cache);
264 hash = cache->hash;
265 /* destroy hash table contents */
266 hash_first(hash);
267
268 while ((e = (CacheEntry *)hash_next(hash))) {
269 hash_remove_link(hash, (hash_link *) e);
270 cacheEntryDestroy(e);
271 }
272
273 /* destroy the hash table itself */
274 hashFreeMemory(hash);
275
276 if (cache->digest)
277 cacheDigestDestroy(cache->digest);
278
279 xfree(cache);
280 }
281
282 /* re-digests currently hashed entries */
283 static void
284 cacheResetDigest(Cache * cache)
285 {
286 CacheEntry *e = NULL;
287 hash_table *hash;
288
289 struct timeval t_start, t_end;
290
291 assert(cache);
292 fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
293
294 if (cache->digest)
295 cacheDigestDestroy(cache->digest);
296
297 hash = cache->hash;
298
299 cache->digest = cacheDigestCreate(cache->count + 1, 6);
300
301 if (!cache->count)
302 return;
303
304 gettimeofday(&t_start, NULL);
305
306 hash_first(hash);
307
308 while ((e = (CacheEntry *)hash_next(hash))) {
309 cacheDigestAdd(cache->digest, e->key);
310 }
311
312 gettimeofday(&t_end, NULL);
313 assert(cache->digest->count == cache->count);
314 fprintf(stderr, "%s: init-ed digest with %d entries\n",
315 cache->name, cache->digest->count);
316 fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
317 cache->name,
318 tvSubDsec(t_start, t_end),
319 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
320 /* check how long it takes to traverse the hash */
321 gettimeofday(&t_start, NULL);
322 hash_first(hash);
323
324 for (e = (CacheEntry *)hash_next(hash); e; e = (CacheEntry *)hash_next(hash)) {}
325
326 gettimeofday(&t_end, NULL);
327 fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
328 cache->name,
329 tvSubDsec(t_start, t_end),
330 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
331 }
332
333 static void
334 cacheQueryPeer(Cache * cache, const cache_key * key)
335 {
336 const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
337 const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
338
339 ++ cache->qstats.query_count;
340
341 if (peer_has_it) {
342 if (we_think_we_have_it)
343 ++ cache->qstats.true_hit_count;
344 else
345 ++ cache->qstats.false_miss_count;
346 } else {
347 if (we_think_we_have_it)
348 ++ cache->qstats.false_hit_count;
349 else
350 ++ cache->qstats.true_miss_count;
351 }
352 }
353
354 static void
355 cacheQueryReport(Cache * cache, CacheQueryStats * stats)
356 {
357 fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
358 cache->name,
359 stats->query_count, xpercentInt(stats->query_count, cache->req_count)
360 );
361 fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
362 cache->name,
363 stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
364 stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
365 stats->true_hit_count + stats->true_miss_count,
366 xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
367 );
368 fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
369 cache->name,
370 stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
371 stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
372 stats->false_hit_count + stats->false_miss_count,
373 xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
374 );
375 }
376
377 static void
378 cacheReport(Cache * cache)
379 {
380 fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
381 cache->name, cache->count, cache->req_count,
382 cache->bad_add_count, cache->bad_del_count);
383
384 }
385
386 static void
387 cacheFetch(Cache * cache, const RawAccessLogEntry * e)
388 {
389 assert(e);
390 ++ cache->req_count;
391
392 if (e->use_icp)
393 cacheQueryPeer(cache, e->key);
394 }
395
396 static fr_result
397 swapStateReader(FileIterator * fi)
398 {
399 storeSwapLogData *entry;
400
401 if (!fi->entry)
402 fi->entry = xcalloc(1, sizeof(storeSwapLogData));
403
404 entry = (storeSwapLogData *)fi->entry;
405
406 if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
407 return frEof;
408
409 fi->inner_time = entry->lastref;
410
411 if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
412 fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
413 exit(-3);
414 }
415
416 return frOk;
417 }
418
419 static fr_result
420 accessLogReader(FileIterator * fi)
421 {
422 static char buf[4096];
423 RawAccessLogEntry *entry;
424 char *url;
425 char *method;
426 HttpRequestMethod method_id = METHOD_NONE;
427 char *hier = NULL;
428
429 assert(fi);
430
431 if (!fi->entry)
432 fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
433 else
434 memset(fi->entry, 0, sizeof(RawAccessLogEntry));
435
436 entry = (RawAccessLogEntry*)fi->entry;
437
438 if (!fgets(buf, sizeof(buf), fi->file))
439 return frEof; /* eof */
440
441 entry->timestamp = fi->inner_time = (time_t) atoi(buf);
442
443 url = strstr(buf, "://");
444
445 hier = url ? strstr(url, " - ") : NULL;
446
447 if (!url || !hier) {
448 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
449 * fname, scanned_count, buf); */
450 return frError;
451 }
452
453 method = url;
454
455 while (!xisdigit(*method)) {
456 if (*method == ' ')
457 *method = '\0';
458
459 --method;
460 }
461
462 method += 2;
463 method_id = methodStrToId(method);
464
465 if (method_id == METHOD_NONE) {
466 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
467 * fname, scanned_count, method, buf); */
468 return frError;
469 }
470
471 while (*url)
472 --url;
473
474 ++url;
475
476 *hier = '\0';
477
478 hier += 3;
479
480 *strchr(hier, '/') = '\0';
481
482 /*fprintf(stdout, "%s:%d: %s %s %s\n",
483 * fname, count, method, url, hier); */
484 entry->use_icp = strcmp(hier, "NONE");
485
486 /* no ICP lookup for these status codes */
487 /* strcmp(hier, "NONE") &&
488 * strcmp(hier, "DIRECT") &&
489 * strcmp(hier, "FIREWALL_IP_DIRECT") &&
490 * strcmp(hier, "LOCAL_IP_DIRECT") &&
491 * strcmp(hier, "NO_DIRECT_FAIL") &&
492 * strcmp(hier, "NO_PARENT_DIRECT") &&
493 * strcmp(hier, "SINGLE_PARENT") &&
494 * strcmp(hier, "PASSTHROUGH_PARENT") &&
495 * strcmp(hier, "SSL_PARENT_MISS") &&
496 * strcmp(hier, "DEFAULT_PARENT");
497 */
498 memcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
499
500 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
501 * fname, count, method, storeKeyText(entry->key), url, hier); */
502 return frOk;
503 }
504
505 static void
506 cachePurge(Cache * cache, storeSwapLogData * s, int update_digest)
507 {
508 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
509
510 if (!olde) {
511 ++ cache->bad_del_count;
512 } else {
513 assert(cache->count);
514 hash_remove_link(cache->hash, (hash_link *) olde);
515
516 if (update_digest)
517 cacheDigestDel(cache->digest, s->key);
518
519 cacheEntryDestroy(olde);
520
521 -- cache->count;
522 }
523 }
524
525 static void
526 cacheStore(Cache * cache, storeSwapLogData * s, int update_digest)
527 {
528 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
529
530 if (olde) {
531 ++ cache->bad_add_count;
532 } else {
533 CacheEntry *e = cacheEntryCreate(s);
534 hash_join(cache->hash, (hash_link *)&e->key);
535 ++ cache->count;
536
537 if (update_digest)
538 cacheDigestAdd(cache->digest, e->key);
539 }
540 }
541
542 static void
543 cacheUpdateStore(Cache * cache, storeSwapLogData * s, int update_digest)
544 {
545 switch (s->op) {
546
547 case SWAP_LOG_ADD:
548 cacheStore(cache, s, update_digest);
549 break;
550
551 case SWAP_LOG_DEL:
552 cachePurge(cache, s, update_digest);
553 break;
554
555 default:
556 assert(0);
557 }
558 }
559
560 static int
561 usage(const char *prg_name)
562 {
563 fprintf(stderr, "usage: %s <access_log> <swap_state> ...\n",
564 prg_name);
565 return -1;
566 }
567
568 int
569 main(int argc, char *argv[])
570 {
571 FileIterator **fis = NULL;
572 const int fi_count = argc - 1;
573 int active_fi_count = 0;
574 time_t ready_time;
575 Cache *them, *us;
576 int i;
577
578 if (argc < 3)
579 return usage(argv[0]);
580
581 them = cacheCreate("them");
582
583 us = cacheCreate("us");
584
585 them->peer = us;
586
587 us->peer = them;
588
589 fis = (FileIterator **)xcalloc(fi_count, sizeof(FileIterator *));
590
591 /* init iterators with files */
592 fis[0] = fileIteratorCreate(argv[1], accessLogReader);
593
594 for (i = 2; i < argc; ++i)
595 fis[i - 1] = fileIteratorCreate(argv[i], swapStateReader);
596
597 /* check that all files were found */
598 for (i = 0; i < fi_count; ++i)
599 if (!fis[i])
600 return -2;
601
602 /* read prefix to get start-up contents of the peer cache */
603 ready_time = -1;
604
605 for (i = 1; i < fi_count; ++i) {
606 FileIterator *fi = fis[i];
607
608 while (fi->inner_time > 0) {
609 if (((storeSwapLogData *) fi->entry)->op == SWAP_LOG_DEL) {
610 cachePurge(them, (storeSwapLogData *)fi->entry, 0);
611
612 if (ready_time < 0)
613 ready_time = fi->inner_time;
614 } else {
615 if (ready_time > 0 && fi->inner_time > ready_time)
616 break;
617
618 cacheStore(them, (storeSwapLogData *)fi->entry, 0);
619 }
620
621 fileIteratorAdvance(fi);
622 }
623 }
624
625 /* digest peer cache content */
626 cacheResetDigest(them);
627
628 us->digest = cacheDigestClone(them->digest); /* @netw@ */
629
630 /* shift the time in access log to match ready_time */
631 fileIteratorSetCurTime(fis[0], ready_time);
632
633 /* iterate, use the iterator with the smallest positive inner_time */
634 cur_time = -1;
635
636 do {
637 int next_i = -1;
638 time_t next_time = -1;
639 active_fi_count = 0;
640
641 for (i = 0; i < fi_count; ++i) {
642 if (fis[i]->inner_time >= 0) {
643 if (!active_fi_count || fis[i]->inner_time < next_time) {
644 next_i = i;
645 next_time = fis[i]->inner_time;
646 }
647
648 ++active_fi_count;
649 }
650 }
651
652 if (next_i >= 0) {
653 cur_time = next_time;
654 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time)); */
655
656 if (next_i == 0)
657 cacheFetch(us, (RawAccessLogEntry *)fis[next_i]->entry);
658 else
659 cacheUpdateStore(them, (storeSwapLogData *)fis[next_i]->entry, 1);
660
661 fileIteratorAdvance(fis[next_i]);
662 }
663 } while (active_fi_count);
664
665 /* report */
666 cacheReport(them);
667
668 cacheReport(us);
669
670 cacheQueryReport(us, &us->qstats);
671
672 /* clean */
673 for (i = 0; i < argc - 1; ++i) {
674 fileIteratorDestroy(fis[i]);
675 }
676
677 xfree(fis);
678 cacheDestroy(them);
679 cacheDestroy(us);
680 return 0;
681 }