]> git.ipfire.org Git - thirdparty/squid.git/blob - src/test_cache_digest.cc
- tmp modified icp detection algorithm: access log entries with hierarchy
[thirdparty/squid.git] / src / test_cache_digest.cc
1
2 /*
3 * $Id: test_cache_digest.cc,v 1.14 1998/04/01 17:53:59 rousskov Exp $
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
8 * --------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from the
11 * Internet community. Development is led by Duane Wessels of the
12 * National Laboratory for Applied Network Research and funded by
13 * the National Science Foundation.
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 *
29 */
30
31 /*
32 * Test-suite for playing with cache digests
33 */
34
35 #include "squid.h"
36
37 typedef struct {
38 int query_count;
39 int true_hit_count;
40 int true_miss_count;
41 int false_hit_count;
42 int false_miss_count;
43 } CacheQueryStats;
44
45 typedef struct _Cache Cache;
46 struct _Cache {
47 const char *name;
48 hash_table *hash;
49 CacheDigest *digest;
50 Cache *peer;
51 CacheQueryStats qstats;
52 int count; /* #currently cached entries */
53 int req_count; /* #requests to this cache */
54 int bad_add_count; /* #duplicate adds */
55 int bad_del_count; /* #dels with no prior add */
56 };
57
58
59 typedef struct _CacheEntry {
60 const cache_key *key;
61 struct _CacheEntry *next;
62 unsigned char key_arr[MD5_DIGEST_CHARS];
63 /* storeSwapLogData s; */
64 } CacheEntry;
65
66 /* parsed access log entry */
67 typedef struct {
68 cache_key key[MD5_DIGEST_CHARS];
69 time_t timestamp;
70 short int use_icp; /* true/false */
71 } RawAccessLogEntry;
72
73 typedef enum { frError = -2, frMore = -1, frEof = 0, frOk = 1 } fr_result;
74 typedef struct _FileIterator FileIterator;
75 typedef fr_result (*FI_READER)(FileIterator *fi);
76
77 struct _FileIterator {
78 const char *fname;
79 FILE *file;
80 time_t inner_time; /* timestamp of the current entry */
81 time_t time_offset; /* to adjust time set by reader */
82 int line_count; /* number of lines scanned */
83 int bad_line_count; /* number of parsing errors */
84 int time_warp_count;/* number of out-of-order entries in the file */
85 FI_READER reader; /* reads next entry and updates inner_time */
86 void *entry; /* buffer for the current entry, freed with xfree() */
87 };
88
89 /* globals */
90 static time_t cur_time = -1; /* timestamp of the current log entry */
91
92 #if 0
93
94 static int cacheIndexScanCleanPrefix(CacheIndex * idx, const char *fname, FILE * file);
95 static int cacheIndexScanAccessLog(CacheIndex * idx, const char *fname, FILE * file);
96
97 #endif
98
99 /* copied from url.c */
100 const char *RequestMethodStr[] =
101 {
102 "NONE",
103 "GET",
104 "POST",
105 "PUT",
106 "HEAD",
107 "CONNECT",
108 "TRACE",
109 "PURGE"
110 };
111 /* copied from url.c */
112 static method_t
113 methodStrToId(const char *s)
114 {
115 if (strcasecmp(s, "GET") == 0) {
116 return METHOD_GET;
117 } else if (strcasecmp(s, "POST") == 0) {
118 return METHOD_POST;
119 } else if (strcasecmp(s, "PUT") == 0) {
120 return METHOD_PUT;
121 } else if (strcasecmp(s, "HEAD") == 0) {
122 return METHOD_HEAD;
123 } else if (strcasecmp(s, "CONNECT") == 0) {
124 return METHOD_CONNECT;
125 } else if (strcasecmp(s, "TRACE") == 0) {
126 return METHOD_TRACE;
127 } else if (strcasecmp(s, "PURGE") == 0) {
128 return METHOD_PURGE;
129 }
130 return METHOD_NONE;
131 }
132
133 /* FileIterator */
134
135 static void fileIteratorAdvance(FileIterator *fi);
136
137 static FileIterator *
138 fileIteratorCreate(const char *fname, FI_READER reader)
139 {
140 FileIterator *fi = xcalloc(1, sizeof(FileIterator));
141 assert(fname && reader);
142 fi->fname = fname;
143 fi->reader = reader;
144 fi->file = fopen(fname, "r");
145 if (!fi->file) {
146 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
147 return NULL;
148 } else
149 fprintf(stderr, "opened %s\n", fname);
150 fileIteratorAdvance(fi);
151 return fi;
152 }
153
154 static void
155 fileIteratorDestroy(FileIterator *fi)
156 {
157 assert(fi);
158 if (fi->file) {
159 fclose(fi->file);
160 fprintf(stderr, "closed %s\n", fi->fname);
161 }
162 xfree(fi->entry);
163 xfree(fi);
164 }
165
166 static void
167 fileIteratorSetCurTime(FileIterator *fi, time_t ct)
168 {
169 assert(fi);
170 assert(fi->inner_time > 0);
171 fi->time_offset = ct - fi->inner_time;
172 }
173
174 static void
175 fileIteratorAdvance(FileIterator *fi)
176 {
177 int res;
178 assert(fi);
179 do {
180 const time_t last_time = fi->inner_time;
181 fi->inner_time = -1;
182 res = fi->reader(fi);
183 fi->line_count++;
184 if (fi->inner_time < 0)
185 fi->inner_time = last_time;
186 else
187 fi->inner_time += fi->time_offset;
188 if (res == frError)
189 fi->bad_line_count++;
190 else
191 if (res == frEof) {
192 fprintf(stderr, "exhausted %s (%d entries) at %s",
193 fi->fname, fi->line_count, ctime(&fi->inner_time));
194 fi->inner_time = -1;
195 } else
196 if (fi->inner_time < last_time) {
197 assert(last_time >= 0);
198 fi->time_warp_count++;
199 fi->inner_time = last_time;
200 }
201 /* report progress */
202 if (!(fi->line_count % 50000))
203 fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
204 fi->fname, fi->line_count / 1000, fi->bad_line_count,
205 ctime(&fi->inner_time));
206 } while (res < 0);
207 }
208
209 /* CacheEntry */
210
211 static CacheEntry *
212 cacheEntryCreate(const storeSwapLogData * s)
213 {
214 CacheEntry *e = xcalloc(1, sizeof(CacheEntry));
215 assert(s);
216 /* e->s = *s; */
217 xmemcpy(e->key_arr, s->key, MD5_DIGEST_CHARS);
218 e->key = &e->key_arr[0];
219 return e;
220 }
221
222 static void
223 cacheEntryDestroy(CacheEntry * e)
224 {
225 assert(e);
226 xfree(e);
227 }
228
229
230 /* Cache */
231
232 static Cache *
233 cacheCreate(const char *name)
234 {
235 Cache *c;
236 assert(name && strlen(name));
237 c = xcalloc(1, sizeof(Cache));
238 c->name = name;
239 c->hash = hash_create(storeKeyHashCmp, 2e6, storeKeyHashHash);
240 return c;
241 }
242
243 static void
244 cacheDestroy(Cache * cache)
245 {
246 CacheEntry *e = NULL;
247 hash_table *hash;
248 assert(cache);
249 hash = cache->hash;
250 /* destroy hash table contents */
251 for (e = hash_first(hash); e; e = hash_next(hash)) {
252 hash_remove_link(hash, (hash_link*)e);
253 cacheEntryDestroy(e);
254 }
255 /* destroy the hash table itself */
256 hashFreeMemory(hash);
257 if (cache->digest)
258 cacheDigestDestroy(cache->digest);
259 xfree(cache);
260 }
261
262 /* re-digests currently hashed entries */
263 static void
264 cacheResetDigest(Cache * cache)
265 {
266 CacheEntry *e = NULL;
267 hash_table *hash;
268 struct timeval t_start, t_end;
269
270 assert(cache);
271 fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
272 if (cache->digest)
273 cacheDigestDestroy(cache->digest);
274 hash = cache->hash;
275 cache->digest = cacheDigestCreate(2 * cache->count + 1); /* 50% utilization */
276 if (!cache->count)
277 return;
278 gettimeofday(&t_start, NULL);
279 for (e = hash_first(hash); e; e = hash_next(hash)) {
280 cacheDigestAdd(cache->digest, e->key);
281 }
282 gettimeofday(&t_end, NULL);
283 assert(cache->digest->count == cache->count);
284 fprintf(stderr, "%s: init-ed digest with %d entries\n",
285 cache->name, cache->digest->count);
286 fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
287 cache->name,
288 tvSubDsec(t_start, t_end),
289 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
290 /* check how long it takes to traverse the hash */
291 gettimeofday(&t_start, NULL);
292 for (e = hash_first(hash); e; e = hash_next(hash)) {
293 }
294 gettimeofday(&t_end, NULL);
295 fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
296 cache->name,
297 tvSubDsec(t_start, t_end),
298 (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
299 }
300
301 static void
302 cacheQueryPeer(Cache * cache, const cache_key * key)
303 {
304 const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
305 const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
306
307 cache->qstats.query_count++;
308 if (peer_has_it) {
309 if (we_think_we_have_it)
310 cache->qstats.true_hit_count++;
311 else
312 cache->qstats.false_miss_count++;
313 } else {
314 if (we_think_we_have_it)
315 cache->qstats.false_hit_count++;
316 else
317 cache->qstats.true_miss_count++;
318 }
319 }
320
321 static void
322 cacheQueryReport(Cache * cache, CacheQueryStats *stats)
323 {
324 fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
325 cache->name,
326 stats->query_count, xpercentInt(stats->query_count, cache->req_count)
327 );
328 fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
329 cache->name,
330 stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
331 stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
332 stats->true_hit_count + stats->true_miss_count,
333 xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
334 );
335 fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
336 cache->name,
337 stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
338 stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
339 stats->false_hit_count + stats->false_miss_count,
340 xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
341 );
342 }
343
344 static void
345 cacheReport(Cache * cache)
346 {
347 fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
348 cache->name, cache->count, cache->req_count,
349 cache->bad_add_count, cache->bad_del_count);
350
351 if (cache->digest) {
352 int bit_count, on_count;
353 cacheDigestUtil(cache->digest, &bit_count, &on_count);
354 fprintf(stdout, "%s: digest entries: cnt: %d (-=%d) cap: %d util: %d%% size: %d b\n",
355 cache->name,
356 cache->digest->count, cache->digest->del_count,
357 cache->digest->capacity,
358 xpercentInt(cache->digest->count, cache->digest->capacity),
359 bit_count/8
360 );
361 fprintf(stdout, "%s: digest bits: on: %d cap: %d util: %d%%\n",
362 cache->name,
363 on_count, bit_count,
364 xpercentInt(on_count, bit_count)
365 );
366 }
367 }
368
369 static void
370 cacheFetch(Cache *cache, const RawAccessLogEntry *e)
371 {
372 assert(e);
373 cache->req_count++;
374 if (e->use_icp)
375 cacheQueryPeer(cache, e->key);
376 }
377
378 static fr_result
379 swapStateReader(FileIterator * fi)
380 {
381 storeSwapLogData *entry;
382 if (!fi->entry)
383 fi->entry = xcalloc(1, sizeof(storeSwapLogData));
384 entry = fi->entry;
385 if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
386 return frEof;
387 fi->inner_time = entry->lastref;
388 if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
389 fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
390 exit(-3);
391 }
392 return frOk;
393 }
394
395 static fr_result
396 accessLogReader(FileIterator * fi)
397 {
398 static char buf[4096];
399 RawAccessLogEntry *entry;
400 char *url;
401 char *method;
402 int method_id = METHOD_NONE;
403 char *hier = NULL;
404
405 assert(fi);
406 if (!fi->entry)
407 fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
408 else
409 memset(fi->entry, 0, sizeof(RawAccessLogEntry));
410 entry = fi->entry;
411 if (!fgets(buf, sizeof(buf), fi->file))
412 return frEof; /* eof */
413 entry->timestamp = fi->inner_time = (time_t)atoi(buf);
414 url = strstr(buf, "://");
415 hier = url ? strstr(url, " - ") : NULL;
416
417 if (!url || !hier) {
418 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
419 * fname, scanned_count, buf); */
420 return frError;
421 }
422 method = url;
423 while (!isdigit(*method)) {
424 if (*method == ' ')
425 *method = '\0';
426 --method;
427 }
428 method += 2;
429 method_id = methodStrToId(method);
430 if (method_id == METHOD_NONE) {
431 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
432 * fname, scanned_count, method, buf); */
433 return frError;
434 }
435 while (*url) url--;
436 url++;
437 *hier = '\0';
438 hier += 3;
439 *strchr(hier, '/') = '\0';
440 /*fprintf(stdout, "%s:%d: %s %s %s\n",
441 * fname, count, method, url, hier); */
442 entry->use_icp = strcmp(hier, "NONE");
443 /* no ICP lookup for these status codes */
444 /* strcmp(hier, "NONE") &&
445 strcmp(hier, "DIRECT") &&
446 strcmp(hier, "FIREWALL_IP_DIRECT") &&
447 strcmp(hier, "LOCAL_IP_DIRECT") &&
448 strcmp(hier, "NO_DIRECT_FAIL") &&
449 strcmp(hier, "NO_PARENT_DIRECT") &&
450 strcmp(hier, "SINGLE_PARENT") &&
451 strcmp(hier, "PASSTHROUGH_PARENT") &&
452 strcmp(hier, "SSL_PARENT_MISS") &&
453 strcmp(hier, "DEFAULT_PARENT");
454 */
455 memcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
456 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
457 fname, count, method, storeKeyText(entry->key), url, hier); */
458 return frOk;
459 }
460
461
462 static void
463 cachePurge(Cache *cache, storeSwapLogData *s, int update_digest)
464 {
465 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
466 if (!olde) {
467 cache->bad_del_count++;
468 } else {
469 assert(cache->count);
470 hash_remove_link(cache->hash, (hash_link *) olde);
471 if (update_digest)
472 cacheDigestDel(cache->digest, s->key);
473 cacheEntryDestroy(olde);
474 cache->count--;
475 }
476 }
477
478 static void
479 cacheStore(Cache *cache, storeSwapLogData *s, int update_digest)
480 {
481 CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
482 if (olde) {
483 cache->bad_add_count++;
484 } else {
485 CacheEntry *e = cacheEntryCreate(s);
486 hash_join(cache->hash, (hash_link *) e);
487 cache->count++;
488 if (update_digest)
489 cacheDigestAdd(cache->digest, e->key);
490 }
491 }
492
493 static void
494 cacheUpdateStore(Cache *cache, storeSwapLogData *s, int update_digest)
495 {
496 switch (s->op) {
497 case SWAP_LOG_ADD:
498 cacheStore(cache, s, update_digest);
499 break;
500 case SWAP_LOG_DEL:
501 cachePurge(cache, s, update_digest);
502 break;
503 default:
504 assert(0);
505 }
506 }
507
508 static int
509 usage(const char *prg_name)
510 {
511 fprintf(stderr, "usage: %s <access_log> <swap_state> ...\n",
512 prg_name);
513 return -1;
514 }
515
516 int
517 main(int argc, char *argv[])
518 {
519 FileIterator **fis = NULL;
520 const int fi_count = argc-1;
521 int active_fi_count = 0;
522 time_t ready_time;
523 Cache *them, *us;
524 int i;
525
526 if (argc < 3)
527 return usage(argv[0]);
528
529 them = cacheCreate("them");
530 us = cacheCreate("us");
531 them->peer = us;
532 us->peer = them;
533
534 fis = xcalloc(fi_count, sizeof(FileIterator *));
535 /* init iterators with files */
536 fis[0] = fileIteratorCreate(argv[1], accessLogReader);
537 for (i = 2; i < argc; ++i)
538 fis[i-1] = fileIteratorCreate(argv[i], swapStateReader);
539 /* check that all files were found */
540 for (i = 0; i < fi_count; ++i)
541 if (fis[i]) return -2;
542 /* read prefix to get start-up contents of the peer cache */
543 ready_time = -1;
544 for (i = 1; i < fi_count; ++i) {
545 FileIterator *fi = fis[i];
546 while (fi->inner_time > 0) {
547 if (((storeSwapLogData*)fi->entry)->op == SWAP_LOG_DEL) {
548 cachePurge(them, fi->entry, 0);
549 if (ready_time < 0)
550 ready_time = fi->inner_time;
551 } else {
552 if (ready_time > 0 && fi->inner_time > ready_time)
553 break;
554 cacheStore(them, fi->entry, 0);
555 }
556 fileIteratorAdvance(fi);
557 }
558 }
559 /* digest peer cache content */
560 cacheResetDigest(them);
561 us->digest = cacheDigestClone(them->digest); /* @netw@ */
562
563 /* shift the time in access log to match ready_time */
564 fileIteratorSetCurTime(fis[0], ready_time);
565
566 /* iterate, use the iterator with the smallest positive inner_time */
567 cur_time = -1;
568 do {
569 int next_i = -1;
570 time_t next_time = -1;
571 active_fi_count = 0;
572 for (i = 0; i < fi_count; ++i) {
573 if (fis[i]->inner_time >= 0) {
574 if (!active_fi_count || fis[i]->inner_time < next_time) {
575 next_i = i;
576 next_time = fis[i]->inner_time;
577 }
578 active_fi_count++;
579 }
580 }
581 if (next_i >= 0) {
582 cur_time = next_time;
583 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time));*/
584 if (next_i == 0)
585 cacheFetch(us, fis[next_i]->entry);
586 else
587 cacheUpdateStore(them, fis[next_i]->entry, 1);
588 fileIteratorAdvance(fis[next_i]);
589 }
590 } while (active_fi_count);
591
592 /* report */
593 cacheReport(them);
594 cacheReport(us);
595 cacheQueryReport(us, &us->qstats);
596
597 /* clean */
598 for (i = 0; i < argc-1; ++i) {
599 fileIteratorDestroy(fis[i]);
600 }
601 xfree(fis);
602 cacheDestroy(them);
603 cacheDestroy(us);
604 return 0;
605 }