3 * $Id: test_cache_digest.cc,v 1.14 1998/04/01 17:53:59 rousskov Exp $
5 * AUTHOR: Alex Rousskov
7 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
8 * --------------------------------------------------------
10 * Squid is the result of efforts by numerous individuals from the
11 * Internet community. Development is led by Duane Wessels of the
12 * National Laboratory for Applied Network Research and funded by
13 * the National Science Foundation.
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 * Test-suite for playing with cache digests
45 typedef struct _Cache Cache
;
51 CacheQueryStats qstats
;
52 int count
; /* #currently cached entries */
53 int req_count
; /* #requests to this cache */
54 int bad_add_count
; /* #duplicate adds */
55 int bad_del_count
; /* #dels with no prior add */
59 typedef struct _CacheEntry
{
61 struct _CacheEntry
*next
;
62 unsigned char key_arr
[MD5_DIGEST_CHARS
];
63 /* storeSwapLogData s; */
66 /* parsed access log entry */
68 cache_key key
[MD5_DIGEST_CHARS
];
70 short int use_icp
; /* true/false */
73 typedef enum { frError
= -2, frMore
= -1, frEof
= 0, frOk
= 1 } fr_result
;
74 typedef struct _FileIterator FileIterator
;
75 typedef fr_result (*FI_READER
)(FileIterator
*fi
);
77 struct _FileIterator
{
80 time_t inner_time
; /* timestamp of the current entry */
81 time_t time_offset
; /* to adjust time set by reader */
82 int line_count
; /* number of lines scanned */
83 int bad_line_count
; /* number of parsing errors */
84 int time_warp_count
;/* number of out-of-order entries in the file */
85 FI_READER reader
; /* reads next entry and updates inner_time */
86 void *entry
; /* buffer for the current entry, freed with xfree() */
90 static time_t cur_time
= -1; /* timestamp of the current log entry */
94 static int cacheIndexScanCleanPrefix(CacheIndex
* idx
, const char *fname
, FILE * file
);
95 static int cacheIndexScanAccessLog(CacheIndex
* idx
, const char *fname
, FILE * file
);
99 /* copied from url.c */
100 const char *RequestMethodStr
[] =
111 /* copied from url.c */
113 methodStrToId(const char *s
)
115 if (strcasecmp(s
, "GET") == 0) {
117 } else if (strcasecmp(s
, "POST") == 0) {
119 } else if (strcasecmp(s
, "PUT") == 0) {
121 } else if (strcasecmp(s
, "HEAD") == 0) {
123 } else if (strcasecmp(s
, "CONNECT") == 0) {
124 return METHOD_CONNECT
;
125 } else if (strcasecmp(s
, "TRACE") == 0) {
127 } else if (strcasecmp(s
, "PURGE") == 0) {
135 static void fileIteratorAdvance(FileIterator
*fi
);
137 static FileIterator
*
138 fileIteratorCreate(const char *fname
, FI_READER reader
)
140 FileIterator
*fi
= xcalloc(1, sizeof(FileIterator
));
141 assert(fname
&& reader
);
144 fi
->file
= fopen(fname
, "r");
146 fprintf(stderr
, "cannot open %s: %s\n", fname
, strerror(errno
));
149 fprintf(stderr
, "opened %s\n", fname
);
150 fileIteratorAdvance(fi
);
155 fileIteratorDestroy(FileIterator
*fi
)
160 fprintf(stderr
, "closed %s\n", fi
->fname
);
167 fileIteratorSetCurTime(FileIterator
*fi
, time_t ct
)
170 assert(fi
->inner_time
> 0);
171 fi
->time_offset
= ct
- fi
->inner_time
;
175 fileIteratorAdvance(FileIterator
*fi
)
180 const time_t last_time
= fi
->inner_time
;
182 res
= fi
->reader(fi
);
184 if (fi
->inner_time
< 0)
185 fi
->inner_time
= last_time
;
187 fi
->inner_time
+= fi
->time_offset
;
189 fi
->bad_line_count
++;
192 fprintf(stderr
, "exhausted %s (%d entries) at %s",
193 fi
->fname
, fi
->line_count
, ctime(&fi
->inner_time
));
196 if (fi
->inner_time
< last_time
) {
197 assert(last_time
>= 0);
198 fi
->time_warp_count
++;
199 fi
->inner_time
= last_time
;
201 /* report progress */
202 if (!(fi
->line_count
% 50000))
203 fprintf(stderr
, "%s scanned %d K entries (%d bad) at %s",
204 fi
->fname
, fi
->line_count
/ 1000, fi
->bad_line_count
,
205 ctime(&fi
->inner_time
));
212 cacheEntryCreate(const storeSwapLogData
* s
)
214 CacheEntry
*e
= xcalloc(1, sizeof(CacheEntry
));
217 xmemcpy(e
->key_arr
, s
->key
, MD5_DIGEST_CHARS
);
218 e
->key
= &e
->key_arr
[0];
223 cacheEntryDestroy(CacheEntry
* e
)
233 cacheCreate(const char *name
)
236 assert(name
&& strlen(name
));
237 c
= xcalloc(1, sizeof(Cache
));
239 c
->hash
= hash_create(storeKeyHashCmp
, 2e6
, storeKeyHashHash
);
244 cacheDestroy(Cache
* cache
)
246 CacheEntry
*e
= NULL
;
250 /* destroy hash table contents */
251 for (e
= hash_first(hash
); e
; e
= hash_next(hash
)) {
252 hash_remove_link(hash
, (hash_link
*)e
);
253 cacheEntryDestroy(e
);
255 /* destroy the hash table itself */
256 hashFreeMemory(hash
);
258 cacheDigestDestroy(cache
->digest
);
262 /* re-digests currently hashed entries */
264 cacheResetDigest(Cache
* cache
)
266 CacheEntry
*e
= NULL
;
268 struct timeval t_start
, t_end
;
271 fprintf(stderr
, "%s: init-ing digest with %d entries\n", cache
->name
, cache
->count
);
273 cacheDigestDestroy(cache
->digest
);
275 cache
->digest
= cacheDigestCreate(2 * cache
->count
+ 1); /* 50% utilization */
278 gettimeofday(&t_start
, NULL
);
279 for (e
= hash_first(hash
); e
; e
= hash_next(hash
)) {
280 cacheDigestAdd(cache
->digest
, e
->key
);
282 gettimeofday(&t_end
, NULL
);
283 assert(cache
->digest
->count
== cache
->count
);
284 fprintf(stderr
, "%s: init-ed digest with %d entries\n",
285 cache
->name
, cache
->digest
->count
);
286 fprintf(stderr
, "%s: init took: %f sec, %f sec/M\n",
288 tvSubDsec(t_start
, t_end
),
289 (double) 1e6
* tvSubDsec(t_start
, t_end
) / cache
->count
);
290 /* check how long it takes to traverse the hash */
291 gettimeofday(&t_start
, NULL
);
292 for (e
= hash_first(hash
); e
; e
= hash_next(hash
)) {
294 gettimeofday(&t_end
, NULL
);
295 fprintf(stderr
, "%s: hash scan took: %f sec, %f sec/M\n",
297 tvSubDsec(t_start
, t_end
),
298 (double) 1e6
* tvSubDsec(t_start
, t_end
) / cache
->count
);
302 cacheQueryPeer(Cache
* cache
, const cache_key
* key
)
304 const int peer_has_it
= hash_lookup(cache
->peer
->hash
, key
) != NULL
;
305 const int we_think_we_have_it
= cacheDigestTest(cache
->digest
, key
);
307 cache
->qstats
.query_count
++;
309 if (we_think_we_have_it
)
310 cache
->qstats
.true_hit_count
++;
312 cache
->qstats
.false_miss_count
++;
314 if (we_think_we_have_it
)
315 cache
->qstats
.false_hit_count
++;
317 cache
->qstats
.true_miss_count
++;
322 cacheQueryReport(Cache
* cache
, CacheQueryStats
*stats
)
324 fprintf(stdout
, "%s: peer queries: %d (%d%%)\n",
326 stats
->query_count
, xpercentInt(stats
->query_count
, cache
->req_count
)
328 fprintf(stdout
, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
330 stats
->true_hit_count
, xpercentInt(stats
->true_hit_count
, stats
->query_count
),
331 stats
->true_miss_count
, xpercentInt(stats
->true_miss_count
, stats
->query_count
),
332 stats
->true_hit_count
+ stats
->true_miss_count
,
333 xpercentInt(stats
->true_hit_count
+ stats
->true_miss_count
, stats
->query_count
)
335 fprintf(stdout
, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
337 stats
->false_hit_count
, xpercentInt(stats
->false_hit_count
, stats
->query_count
),
338 stats
->false_miss_count
, xpercentInt(stats
->false_miss_count
, stats
->query_count
),
339 stats
->false_hit_count
+ stats
->false_miss_count
,
340 xpercentInt(stats
->false_hit_count
+ stats
->false_miss_count
, stats
->query_count
)
345 cacheReport(Cache
* cache
)
347 fprintf(stdout
, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
348 cache
->name
, cache
->count
, cache
->req_count
,
349 cache
->bad_add_count
, cache
->bad_del_count
);
352 int bit_count
, on_count
;
353 cacheDigestUtil(cache
->digest
, &bit_count
, &on_count
);
354 fprintf(stdout
, "%s: digest entries: cnt: %d (-=%d) cap: %d util: %d%% size: %d b\n",
356 cache
->digest
->count
, cache
->digest
->del_count
,
357 cache
->digest
->capacity
,
358 xpercentInt(cache
->digest
->count
, cache
->digest
->capacity
),
361 fprintf(stdout
, "%s: digest bits: on: %d cap: %d util: %d%%\n",
364 xpercentInt(on_count
, bit_count
)
370 cacheFetch(Cache
*cache
, const RawAccessLogEntry
*e
)
375 cacheQueryPeer(cache
, e
->key
);
379 swapStateReader(FileIterator
* fi
)
381 storeSwapLogData
*entry
;
383 fi
->entry
= xcalloc(1, sizeof(storeSwapLogData
));
385 if (fread(entry
, sizeof(*entry
), 1, fi
->file
) != 1)
387 fi
->inner_time
= entry
->lastref
;
388 if (entry
->op
!= SWAP_LOG_ADD
&& entry
->op
!= SWAP_LOG_DEL
) {
389 fprintf(stderr
, "%s:%d: unknown swap log action\n", fi
->fname
, fi
->line_count
);
396 accessLogReader(FileIterator
* fi
)
398 static char buf
[4096];
399 RawAccessLogEntry
*entry
;
402 int method_id
= METHOD_NONE
;
407 fi
->entry
= xcalloc(1, sizeof(RawAccessLogEntry
));
409 memset(fi
->entry
, 0, sizeof(RawAccessLogEntry
));
411 if (!fgets(buf
, sizeof(buf
), fi
->file
))
412 return frEof
; /* eof */
413 entry
->timestamp
= fi
->inner_time
= (time_t)atoi(buf
);
414 url
= strstr(buf
, "://");
415 hier
= url
? strstr(url
, " - ") : NULL
;
418 /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
419 * fname, scanned_count, buf); */
423 while (!isdigit(*method
)) {
429 method_id
= methodStrToId(method
);
430 if (method_id
== METHOD_NONE
) {
431 /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
432 * fname, scanned_count, method, buf); */
439 *strchr(hier
, '/') = '\0';
440 /*fprintf(stdout, "%s:%d: %s %s %s\n",
441 * fname, count, method, url, hier); */
442 entry
->use_icp
= strcmp(hier
, "NONE");
443 /* no ICP lookup for these status codes */
444 /* strcmp(hier, "NONE") &&
445 strcmp(hier, "DIRECT") &&
446 strcmp(hier, "FIREWALL_IP_DIRECT") &&
447 strcmp(hier, "LOCAL_IP_DIRECT") &&
448 strcmp(hier, "NO_DIRECT_FAIL") &&
449 strcmp(hier, "NO_PARENT_DIRECT") &&
450 strcmp(hier, "SINGLE_PARENT") &&
451 strcmp(hier, "PASSTHROUGH_PARENT") &&
452 strcmp(hier, "SSL_PARENT_MISS") &&
453 strcmp(hier, "DEFAULT_PARENT");
455 memcpy(entry
->key
, storeKeyPublic(url
, method_id
), sizeof(entry
->key
));
456 /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
457 fname, count, method, storeKeyText(entry->key), url, hier); */
463 cachePurge(Cache
*cache
, storeSwapLogData
*s
, int update_digest
)
465 CacheEntry
*olde
= (CacheEntry
*) hash_lookup(cache
->hash
, s
->key
);
467 cache
->bad_del_count
++;
469 assert(cache
->count
);
470 hash_remove_link(cache
->hash
, (hash_link
*) olde
);
472 cacheDigestDel(cache
->digest
, s
->key
);
473 cacheEntryDestroy(olde
);
479 cacheStore(Cache
*cache
, storeSwapLogData
*s
, int update_digest
)
481 CacheEntry
*olde
= (CacheEntry
*) hash_lookup(cache
->hash
, s
->key
);
483 cache
->bad_add_count
++;
485 CacheEntry
*e
= cacheEntryCreate(s
);
486 hash_join(cache
->hash
, (hash_link
*) e
);
489 cacheDigestAdd(cache
->digest
, e
->key
);
494 cacheUpdateStore(Cache
*cache
, storeSwapLogData
*s
, int update_digest
)
498 cacheStore(cache
, s
, update_digest
);
501 cachePurge(cache
, s
, update_digest
);
509 usage(const char *prg_name
)
511 fprintf(stderr
, "usage: %s <access_log> <swap_state> ...\n",
517 main(int argc
, char *argv
[])
519 FileIterator
**fis
= NULL
;
520 const int fi_count
= argc
-1;
521 int active_fi_count
= 0;
527 return usage(argv
[0]);
529 them
= cacheCreate("them");
530 us
= cacheCreate("us");
534 fis
= xcalloc(fi_count
, sizeof(FileIterator
*));
535 /* init iterators with files */
536 fis
[0] = fileIteratorCreate(argv
[1], accessLogReader
);
537 for (i
= 2; i
< argc
; ++i
)
538 fis
[i
-1] = fileIteratorCreate(argv
[i
], swapStateReader
);
539 /* check that all files were found */
540 for (i
= 0; i
< fi_count
; ++i
)
541 if (fis
[i
]) return -2;
542 /* read prefix to get start-up contents of the peer cache */
544 for (i
= 1; i
< fi_count
; ++i
) {
545 FileIterator
*fi
= fis
[i
];
546 while (fi
->inner_time
> 0) {
547 if (((storeSwapLogData
*)fi
->entry
)->op
== SWAP_LOG_DEL
) {
548 cachePurge(them
, fi
->entry
, 0);
550 ready_time
= fi
->inner_time
;
552 if (ready_time
> 0 && fi
->inner_time
> ready_time
)
554 cacheStore(them
, fi
->entry
, 0);
556 fileIteratorAdvance(fi
);
559 /* digest peer cache content */
560 cacheResetDigest(them
);
561 us
->digest
= cacheDigestClone(them
->digest
); /* @netw@ */
563 /* shift the time in access log to match ready_time */
564 fileIteratorSetCurTime(fis
[0], ready_time
);
566 /* iterate, use the iterator with the smallest positive inner_time */
570 time_t next_time
= -1;
572 for (i
= 0; i
< fi_count
; ++i
) {
573 if (fis
[i
]->inner_time
>= 0) {
574 if (!active_fi_count
|| fis
[i
]->inner_time
< next_time
) {
576 next_time
= fis
[i
]->inner_time
;
582 cur_time
= next_time
;
583 /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time));*/
585 cacheFetch(us
, fis
[next_i
]->entry
);
587 cacheUpdateStore(them
, fis
[next_i
]->entry
, 1);
588 fileIteratorAdvance(fis
[next_i
]);
590 } while (active_fi_count
);
595 cacheQueryReport(us
, &us
->qstats
);
598 for (i
= 0; i
< argc
-1; ++i
) {
599 fileIteratorDestroy(fis
[i
]);