]> git.ipfire.org Git - thirdparty/squid.git/blob - src/test_cache_digest.cc
Merged from trunk
[thirdparty/squid.git] / src / test_cache_digest.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /*
10 * Test-suite for playing with cache digests
11 */
12
13 #include "squid.h"
14 #include "CacheDigest.h"
15 #include "store_key_md5.h"
16
17 #include <cerrno>
18
19 typedef struct {
20 int query_count;
21 int true_hit_count;
22 int true_miss_count;
23 int false_hit_count;
24 int false_miss_count;
25 } CacheQueryStats;
26
27 typedef struct _Cache Cache;
28
29 struct _Cache {
30 const char *name;
31 hash_table *hash;
32 CacheDigest *digest;
33 Cache *peer;
34 CacheQueryStats qstats;
35 int count; /* #currently cached entries */
36 int req_count; /* #requests to this cache */
37 int bad_add_count; /* #duplicate adds */
38 int bad_del_count; /* #dels with no prior add */
39 };
40
41 typedef struct _CacheEntry {
42 const cache_key *key;
43
44 struct _CacheEntry *next;
45 unsigned char key_arr[SQUID_MD5_DIGEST_LENGTH];
46 /* storeSwapLogData s; */
47 } CacheEntry;
48
49 /* parsed access log entry */
50
51 typedef struct {
52 cache_key key[SQUID_MD5_DIGEST_LENGTH];
53 time_t timestamp;
54 short int use_icp; /* true/false */
55 } RawAccessLogEntry;
56
57 typedef enum {
58 frError = -2, frMore = -1, frEof = 0, frOk = 1
59 } fr_result;
60
61 typedef struct _FileIterator FileIterator;
62 typedef fr_result(*FI_READER) (FileIterator * fi);
63
64 struct _FileIterator {
65 const char *fname;
66 FILE *file;
67 time_t inner_time; /* timestamp of the current entry */
68 time_t time_offset; /* to adjust time set by reader */
69 int line_count; /* number of lines scanned */
70 int bad_line_count; /* number of parsing errors */
71 int time_warp_count; /* number of out-of-order entries in the file */
72 FI_READER reader; /* reads next entry and updates inner_time */
73 void *entry; /* buffer for the current entry, freed with xfree() */
74 };
75
76 /* globals */
77 static time_t cur_time = -1; /* timestamp of the current log entry */
78
79 /* copied from url.c */
80 static HttpRequestMethod
81 methodStrToId(const char *s)
82 {
83 if (strcasecmp(s, "GET") == 0) {
84 return METHOD_GET;
85 } else if (strcasecmp(s, "POST") == 0) {
86 return METHOD_POST;
87 } else if (strcasecmp(s, "PUT") == 0) {
88 return METHOD_PUT;
89 } else if (strcasecmp(s, "HEAD") == 0) {
90 return METHOD_HEAD;
91 } else if (strcasecmp(s, "CONNECT") == 0) {
92 return METHOD_CONNECT;
93 } else if (strcasecmp(s, "TRACE") == 0) {
94 return METHOD_TRACE;
95 } else if (strcasecmp(s, "PURGE") == 0) {
96 return METHOD_PURGE;
97 }
98
99 return METHOD_NONE;
100 }
101
102 /* FileIterator */
103
104 static void fileIteratorAdvance(FileIterator * fi);
105
106 static FileIterator *
107 fileIteratorCreate(const char *fname, FI_READER reader)
108 {
109 FileIterator *fi = (FileIterator *)xcalloc(1, sizeof(FileIterator));
110 assert(fname && reader);
111 fi->fname = fname;
112 fi->reader = reader;
113 fi->file = fopen(fname, "r");
114
115 if (!fi->file) {
116 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
117 return NULL;
118 } else
119 fprintf(stderr, "opened %s\n", fname);
120
121 fileIteratorAdvance(fi);
122
123 return fi;
124 }
125
126 static void
127 fileIteratorDestroy(FileIterator * fi)
128 {
129 assert(fi);
130
131 if (fi->file) {
132 fclose(fi->file);
133 fprintf(stderr, "closed %s\n", fi->fname);
134 }
135
136 xfree(fi->entry);
137 xfree(fi);
138 }
139
140 static void
141 fileIteratorSetCurTime(FileIterator * fi, time_t ct)
142 {
143 assert(fi);
144 assert(fi->inner_time > 0);
145 fi->time_offset = ct - fi->inner_time;
146 }
147
148 static void
149 fileIteratorAdvance(FileIterator * fi)
150 {
151 int res;
152 assert(fi);
153
154 do {
155 const time_t last_time = fi->inner_time;
156 fi->inner_time = -1;
157 res = fi->reader(fi);
158 ++ fi->line_count;
159
160 if (fi->inner_time < 0)
161 fi->inner_time = last_time;
162 else
163 fi->inner_time += fi->time_offset;
164
165 if (res == frError)
166 ++ fi->bad_line_count;
167 else if (res == frEof) {
168 fprintf(stderr, "exhausted %s (%d entries) at %s",
169 fi->fname, fi->line_count, ctime(&fi->inner_time));
170 fi->inner_time = -1;
171 } else if (fi->inner_time < last_time) {
172 assert(last_time >= 0);
173 ++ fi->time_warp_count;
174 fi->inner_time = last_time;
175 }
176
177 /* report progress */
178 if (!(fi->line_count % 50000))
179 fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
180 fi->fname, fi->line_count / 1000, fi->bad_line_count,
181 ctime(&fi->inner_time));
182 } while (res < 0);
183 }
184
185 /* CacheEntry */
186
187 static CacheEntry *
188 cacheEntryCreate(const storeSwapLogData * s)
189 {
190 CacheEntry *e = (CacheEntry *)xcalloc(1, sizeof(CacheEntry));
191 assert(s);
192 /* e->s = *s; */
193 memcpy(e->key_arr, s->key, SQUID_MD5_DIGEST_LENGTH);
194 e->key = &e->key_arr[0];
195 return e;
196 }
197
198 static void
199 cacheEntryDestroy(CacheEntry * e)
200 {
201 assert(e);
202 xfree(e);
203 }
204
205 /* Cache */
206
207 static Cache *
208 cacheCreate(const char *name)
209 {
210 Cache *c;
211 assert(name && strlen(name));
212 c = (Cache *)xcalloc(1, sizeof(Cache));
213 c->name = name;
214 c->hash = hash_create(storeKeyHashCmp, (int)2e6, storeKeyHashHash);
215 return c;
216 }
217
218 static void
219 cacheDestroy(Cache * cache)
220 {
221 CacheEntry *e = NULL;
222 hash_table *hash;
223 assert(cache);
224 hash = cache->hash;
225 /* destroy hash table contents */
226 hash_first(hash);
227
228 while ((e = (CacheEntry *)hash_next(hash))) {
229 hash_remove_link(hash, (hash_link *) e);
230 cacheEntryDestroy(e);
231 }
232
233 /* destroy the hash table itself */
234 hashFreeMemory(hash);
235
236 if (cache->digest)
237 cacheDigestDestroy(cache->digest);
238
239 xfree(cache);
240 }
241
242 /* re-digests currently hashed entries */
243 static void
244 cacheResetDigest(Cache * cache)
245 {
246 CacheEntry *e = NULL;
247 hash_table *hash;
248
249 struct timeval t_start, t_end;
250
251 assert(cache);
252 fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
253
254 if (cache->digest)
255 cacheDigestDestroy(cache->digest);
256
257 hash = cache->hash;
258
259 cache->digest = cacheDigestCreate(cache->count + 1, 6);
260
261 if (!cache->count)
262 return;
263
264 gettimeofday(&t_start, NULL);
265
266 hash_first(hash);
267
268 while ((e = (CacheEntry *)hash_next(hash))) {
269 cacheDigestAdd(cache->digest, e->key);
270 }
271
272 gettimeofday(&t_end, NULL);
273 assert(cache->digest->count == cache->count);
274 fprintf(stderr, "%s: init-ed digest with %d entries\n",
275 cache->name, cache->digest->count);
276 fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
277 cache->name,
278 tvSubDsec(t_start, t_end),
279 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
280 /* check how long it takes to traverse the hash */
281 gettimeofday(&t_start, NULL);
282 hash_first(hash);
283
284 for (e = (CacheEntry *)hash_next(hash); e; e = (CacheEntry *)hash_next(hash)) {}
285
286 gettimeofday(&t_end, NULL);
287 fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
288 cache->name,
289 tvSubDsec(t_start, t_end),
290 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
291 }
292
293 static void
294 cacheQueryPeer(Cache * cache, const cache_key * key)
295 {
296 const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
297 const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
298
299 ++ cache->qstats.query_count;
300
301 if (peer_has_it) {
302 if (we_think_we_have_it)
303 ++ cache->qstats.true_hit_count;
304 else
305 ++ cache->qstats.false_miss_count;
306 } else {
307 if (we_think_we_have_it)
308 ++ cache->qstats.false_hit_count;
309 else
310 ++ cache->qstats.true_miss_count;
311 }
312 }
313
314 static void
315 cacheQueryReport(Cache * cache, CacheQueryStats * stats)
316 {
317 fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
318 cache->name,
319 stats->query_count, xpercentInt(stats->query_count, cache->req_count)
320 );
321 fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
322 cache->name,
323 stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
324 stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
325 stats->true_hit_count + stats->true_miss_count,
326 xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
327 );
328 fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
329 cache->name,
330 stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
331 stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
332 stats->false_hit_count + stats->false_miss_count,
333 xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
334 );
335 }
336
337 static void
338 cacheReport(Cache * cache)
339 {
340 fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
341 cache->name, cache->count, cache->req_count,
342 cache->bad_add_count, cache->bad_del_count);
343
344 }
345
346 static void
347 cacheFetch(Cache * cache, const RawAccessLogEntry * e)
348 {
349 assert(e);
350 ++ cache->req_count;
351
352 if (e->use_icp)
353 cacheQueryPeer(cache, e->key);
354 }
355
356 static fr_result
357 swapStateReader(FileIterator * fi)
358 {
359 storeSwapLogData *entry;
360
361 if (!fi->entry)
362 fi->entry = xcalloc(1, sizeof(storeSwapLogData));
363
364 entry = (storeSwapLogData *)fi->entry;
365
366 if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
367 return frEof;
368
369 fi->inner_time = entry->lastref;
370
371 if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
372 fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
373 exit(-3);
374 }
375
376 return frOk;
377 }
378
379 static fr_result
380 accessLogReader(FileIterator * fi)
381 {
382 static char buf[4096];
383 RawAccessLogEntry *entry;
384 char *url;
385 char *method;
386 HttpRequestMethod method_id = METHOD_NONE;
387 char *hier = NULL;
388
389 assert(fi);
390
391 if (!fi->entry)
392 fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
393 else
394 memset(fi->entry, 0, sizeof(RawAccessLogEntry));
395
396 entry = (RawAccessLogEntry*)fi->entry;
397
398 if (!fgets(buf, sizeof(buf), fi->file))
399 return frEof; /* eof */
400
401 entry->timestamp = fi->inner_time = (time_t) atoi(buf);
402
403 url = strstr(buf, "://");
404
405 hier = url ? strstr(url, " - ") : NULL;
406
407 if (!url || !hier) {
408 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
409 * fname, scanned_count, buf); */
410 return frError;
411 }
412
413 method = url;
414
415 while (!xisdigit(*method)) {
416 if (*method == ' ')
417 *method = '\0';
418
419 --method;
420 }
421
422 method += 2;
423 method_id = methodStrToId(method);
424
425 if (method_id == METHOD_NONE) {
426 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
427 * fname, scanned_count, method, buf); */
428 return frError;
429 }
430
431 while (*url)
432 --url;
433
434 ++url;
435
436 *hier = '\0';
437
438 hier += 3;
439
440 *strchr(hier, '/') = '\0';
441
442 /*fprintf(stdout, "%s:%d: %s %s %s\n",
443 * fname, count, method, url, hier); */
444 entry->use_icp = strcmp(hier, "NONE");
445
446 /* no ICP lookup for these status codes */
447 /* strcmp(hier, "NONE") &&
448 * strcmp(hier, "DIRECT") &&
449 * strcmp(hier, "FIREWALL_IP_DIRECT") &&
450 * strcmp(hier, "LOCAL_IP_DIRECT") &&
451 * strcmp(hier, "NO_DIRECT_FAIL") &&
452 * strcmp(hier, "NO_PARENT_DIRECT") &&
453 * strcmp(hier, "SINGLE_PARENT") &&
454 * strcmp(hier, "PASSTHROUGH_PARENT") &&
455 * strcmp(hier, "SSL_PARENT_MISS") &&
456 * strcmp(hier, "DEFAULT_PARENT");
457 */
458 memcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
459
460 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
461 * fname, count, method, storeKeyText(entry->key), url, hier); */
462 return frOk;
463 }
464
465 static void
466 cachePurge(Cache * cache, storeSwapLogData * s, int update_digest)
467 {
468 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
469
470 if (!olde) {
471 ++ cache->bad_del_count;
472 } else {
473 assert(cache->count);
474 hash_remove_link(cache->hash, (hash_link *) olde);
475
476 if (update_digest)
477 cacheDigestDel(cache->digest, s->key);
478
479 cacheEntryDestroy(olde);
480
481 -- cache->count;
482 }
483 }
484
485 static void
486 cacheStore(Cache * cache, storeSwapLogData * s, int update_digest)
487 {
488 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
489
490 if (olde) {
491 ++ cache->bad_add_count;
492 } else {
493 CacheEntry *e = cacheEntryCreate(s);
494 hash_join(cache->hash, (hash_link *)&e->key);
495 ++ cache->count;
496
497 if (update_digest)
498 cacheDigestAdd(cache->digest, e->key);
499 }
500 }
501
502 static void
503 cacheUpdateStore(Cache * cache, storeSwapLogData * s, int update_digest)
504 {
505 switch (s->op) {
506
507 case SWAP_LOG_ADD:
508 cacheStore(cache, s, update_digest);
509 break;
510
511 case SWAP_LOG_DEL:
512 cachePurge(cache, s, update_digest);
513 break;
514
515 default:
516 assert(0);
517 }
518 }
519
520 static int
521 usage(const char *prg_name)
522 {
523 fprintf(stderr, "usage: %s <access_log> <swap_state> ...\n",
524 prg_name);
525 return -1;
526 }
527
528 int
529 main(int argc, char *argv[])
530 {
531 FileIterator **fis = NULL;
532 const int fi_count = argc - 1;
533 int active_fi_count = 0;
534 time_t ready_time;
535 Cache *them, *us;
536 int i;
537
538 if (argc < 3)
539 return usage(argv[0]);
540
541 them = cacheCreate("them");
542
543 us = cacheCreate("us");
544
545 them->peer = us;
546
547 us->peer = them;
548
549 fis = (FileIterator **)xcalloc(fi_count, sizeof(FileIterator *));
550
551 /* init iterators with files */
552 fis[0] = fileIteratorCreate(argv[1], accessLogReader);
553
554 for (i = 2; i < argc; ++i)
555 fis[i - 1] = fileIteratorCreate(argv[i], swapStateReader);
556
557 /* check that all files were found */
558 for (i = 0; i < fi_count; ++i)
559 if (!fis[i])
560 return -2;
561
562 /* read prefix to get start-up contents of the peer cache */
563 ready_time = -1;
564
565 for (i = 1; i < fi_count; ++i) {
566 FileIterator *fi = fis[i];
567
568 while (fi->inner_time > 0) {
569 if (((storeSwapLogData *) fi->entry)->op == SWAP_LOG_DEL) {
570 cachePurge(them, (storeSwapLogData *)fi->entry, 0);
571
572 if (ready_time < 0)
573 ready_time = fi->inner_time;
574 } else {
575 if (ready_time > 0 && fi->inner_time > ready_time)
576 break;
577
578 cacheStore(them, (storeSwapLogData *)fi->entry, 0);
579 }
580
581 fileIteratorAdvance(fi);
582 }
583 }
584
585 /* digest peer cache content */
586 cacheResetDigest(them);
587
588 us->digest = cacheDigestClone(them->digest); /* @netw@ */
589
590 /* shift the time in access log to match ready_time */
591 fileIteratorSetCurTime(fis[0], ready_time);
592
593 /* iterate, use the iterator with the smallest positive inner_time */
594 cur_time = -1;
595
596 do {
597 int next_i = -1;
598 time_t next_time = -1;
599 active_fi_count = 0;
600
601 for (i = 0; i < fi_count; ++i) {
602 if (fis[i]->inner_time >= 0) {
603 if (!active_fi_count || fis[i]->inner_time < next_time) {
604 next_i = i;
605 next_time = fis[i]->inner_time;
606 }
607
608 ++active_fi_count;
609 }
610 }
611
612 if (next_i >= 0) {
613 cur_time = next_time;
614 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time)); */
615
616 if (next_i == 0)
617 cacheFetch(us, (RawAccessLogEntry *)fis[next_i]->entry);
618 else
619 cacheUpdateStore(them, (storeSwapLogData *)fis[next_i]->entry, 1);
620
621 fileIteratorAdvance(fis[next_i]);
622 }
623 } while (active_fi_count);
624
625 /* report */
626 cacheReport(them);
627
628 cacheReport(us);
629
630 cacheQueryReport(us, &us->qstats);
631
632 /* clean */
633 for (i = 0; i < argc - 1; ++i) {
634 fileIteratorDestroy(fis[i]);
635 }
636
637 xfree(fis);
638 cacheDestroy(them);
639 cacheDestroy(us);
640 return 0;
641 }
642