+
/*
- * $Id: test_cache_digest.cc,v 1.3 1998/03/31 00:03:55 rousskov Exp $
+ * $Id: test_cache_digest.cc,v 1.29 2003/01/23 00:37:27 robertc Exp $
*
* AUTHOR: Alex Rousskov
*
- * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
- * --------------------------------------------------------
+ * SQUID Web Proxy Cache http://www.squid-cache.org/
+ * ----------------------------------------------------------
*
- * Squid is the result of efforts by numerous individuals from the
- * Internet community. Development is led by Duane Wessels of the
- * National Laboratory for Applied Network Research and funded by
- * the National Science Foundation.
+ * Squid is the result of efforts by numerous individuals from
+ * the Internet community; see the CONTRIBUTORS file for full
+ * details. Many organizations have provided support for Squid's
+ * development; see the SPONSORS file for full details. Squid is
+ * Copyrighted (C) 2001 by the Regents of the University of
+ * California; see the COPYRIGHT file for full details. Squid
+ * incorporates software developed and/or copyrighted by other
+ * sources; see the CREDITS file for full details.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
*/
/*
#include "squid.h"
typedef struct {
+ int query_count;
+ int true_hit_count;
+ int true_miss_count;
+ int false_hit_count;
+ int false_miss_count;
+} CacheQueryStats;
+
+typedef struct _Cache Cache;
+struct _Cache {
const char *name;
hash_table *hash;
CacheDigest *digest;
- int count; /* #currently cached entries */
- int scanned_count; /* #scanned entries */
- int bad_add_count; /* #duplicate adds */
- int bad_del_count; /* #dels with no prior add */
-} CacheIndex;
+ Cache *peer;
+ CacheQueryStats qstats;
+ int count; /* #currently cached entries */
+ int req_count; /* #requests to this cache */
+ int bad_add_count; /* #duplicate adds */
+ int bad_del_count; /* #dels with no prior add */
+};
typedef struct _CacheEntry {
const cache_key *key;
struct _CacheEntry *next;
- /* storeSwapLogData s; */
unsigned char key_arr[MD5_DIGEST_CHARS];
+ /* storeSwapLogData s; */
} CacheEntry;
+/* parsed access log entry */
+typedef struct {
+ cache_key key[MD5_DIGEST_CHARS];
+ time_t timestamp;
+ short int use_icp; /* true/false */
+} RawAccessLogEntry;
+
+typedef enum {
+ frError = -2, frMore = -1, frEof = 0, frOk = 1
+} fr_result;
+typedef struct _FileIterator FileIterator;
+typedef fr_result(*FI_READER) (FileIterator * fi);
+
+struct _FileIterator {
+ const char *fname;
+ FILE *file;
+ time_t inner_time; /* timestamp of the current entry */
+ time_t time_offset; /* to adjust time set by reader */
+ int line_count; /* number of lines scanned */
+ int bad_line_count; /* number of parsing errors */
+ int time_warp_count; /* number of out-of-order entries in the file */
+ FI_READER reader; /* reads next entry and updates inner_time */
+ void *entry; /* buffer for the current entry, freed with xfree() */
+};
+
+/* globals */
+static time_t cur_time = -1; /* timestamp of the current log entry */
/* copied from url.c */
const char *RequestMethodStr[] =
"TRACE",
"PURGE"
};
-
-
-static CacheIndex *Peer = NULL;
-
-static int cacheIndexScanCleanPrefix(CacheIndex *idx, const char *fname, FILE *file);
-static int cacheIndexScanAccessLog(CacheIndex *idx, const char *fname, FILE *file);
-
/* copied from url.c */
static method_t
-cacheIndexParseMethod(const char *s)
+methodStrToId(const char *s)
{
if (strcasecmp(s, "GET") == 0) {
- return METHOD_GET;
+ return METHOD_GET;
} else if (strcasecmp(s, "POST") == 0) {
- return METHOD_POST;
+ return METHOD_POST;
} else if (strcasecmp(s, "PUT") == 0) {
- return METHOD_PUT;
+ return METHOD_PUT;
} else if (strcasecmp(s, "HEAD") == 0) {
- return METHOD_HEAD;
+ return METHOD_HEAD;
} else if (strcasecmp(s, "CONNECT") == 0) {
- return METHOD_CONNECT;
+ return METHOD_CONNECT;
} else if (strcasecmp(s, "TRACE") == 0) {
- return METHOD_TRACE;
+ return METHOD_TRACE;
} else if (strcasecmp(s, "PURGE") == 0) {
- return METHOD_PURGE;
+ return METHOD_PURGE;
}
return METHOD_NONE;
}
+/* FileIterator */
+
+static void fileIteratorAdvance(FileIterator * fi);
+
+static FileIterator *
+fileIteratorCreate(const char *fname, FI_READER reader)
+{
+ FileIterator *fi = (FileIterator *)xcalloc(1, sizeof(FileIterator));
+ assert(fname && reader);
+ fi->fname = fname;
+ fi->reader = reader;
+ fi->file = fopen(fname, "r");
+ if (!fi->file) {
+ fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
+ return NULL;
+ } else
+ fprintf(stderr, "opened %s\n", fname);
+ fileIteratorAdvance(fi);
+ return fi;
+}
+
+static void
+fileIteratorDestroy(FileIterator * fi)
+{
+ assert(fi);
+ if (fi->file) {
+ fclose(fi->file);
+ fprintf(stderr, "closed %s\n", fi->fname);
+ }
+ xfree(fi->entry);
+ xfree(fi);
+}
+
+static void
+fileIteratorSetCurTime(FileIterator * fi, time_t ct)
+{
+ assert(fi);
+ assert(fi->inner_time > 0);
+ fi->time_offset = ct - fi->inner_time;
+}
+
+static void
+fileIteratorAdvance(FileIterator * fi)
+{
+ int res;
+ assert(fi);
+ do {
+ const time_t last_time = fi->inner_time;
+ fi->inner_time = -1;
+ res = fi->reader(fi);
+ fi->line_count++;
+ if (fi->inner_time < 0)
+ fi->inner_time = last_time;
+ else
+ fi->inner_time += fi->time_offset;
+ if (res == frError)
+ fi->bad_line_count++;
+ else if (res == frEof) {
+ fprintf(stderr, "exhausted %s (%d entries) at %s",
+ fi->fname, fi->line_count, ctime(&fi->inner_time));
+ fi->inner_time = -1;
+ } else if (fi->inner_time < last_time) {
+ assert(last_time >= 0);
+ fi->time_warp_count++;
+ fi->inner_time = last_time;
+ }
+ /* report progress */
+ if (!(fi->line_count % 50000))
+ fprintf(stderr, "%s scanned %d K entries (%d bad) at %s",
+ fi->fname, fi->line_count / 1000, fi->bad_line_count,
+ ctime(&fi->inner_time));
+ } while (res < 0);
+}
+
+/* CacheEntry */
static CacheEntry *
-cacheEntryCreate(const storeSwapLogData *s)
+cacheEntryCreate(const storeSwapLogData * s)
{
- CacheEntry *e = xcalloc(1, sizeof(CacheEntry));
+ CacheEntry *e = (CacheEntry *)xcalloc(1, sizeof(CacheEntry));
assert(s);
/* e->s = *s; */
xmemcpy(e->key_arr, s->key, MD5_DIGEST_CHARS);
}
static void
-cacheEntryDestroy(CacheEntry *e)
+cacheEntryDestroy(CacheEntry * e)
{
assert(e);
xfree(e);
}
-static CacheIndex *
-cacheIndexCreate(const char *name)
-{
- CacheIndex *idx;
- if (!name || !strlen(name))
- return NULL;
- idx = xcalloc(1, sizeof(CacheIndex));
- idx->name = name;
- idx->hash = hash_create(storeKeyHashCmp, 2e6, storeKeyHashHash);
+/* Cache */
- return idx;
+static Cache *
+cacheCreate(const char *name)
+{
+ Cache *c;
+ assert(name && strlen(name));
+ c = (Cache *)xcalloc(1, sizeof(Cache));
+ c->name = name;
+ c->hash = hash_create(storeKeyHashCmp, (int)2e6, storeKeyHashHash);
+ return c;
}
static void
-cacheIndexDestroy(CacheIndex *idx)
+cacheDestroy(Cache * cache)
{
- hash_link *hashr = NULL;
- if (idx) {
- /* destroy hash list contents */
- for (hashr = hash_first(idx->hash); hashr; hashr = hash_next(idx->hash)) {
- hash_remove_link(idx->hash, hashr);
- cacheEntryDestroy((CacheEntry*)hashr);
- }
- /* destroy the hash table itself */
- hashFreeMemory(idx->hash);
- if (idx->digest)
- cacheDigestDestroy(idx->digest);
- xfree(idx);
+ CacheEntry *e = NULL;
+ hash_table *hash;
+ assert(cache);
+ hash = cache->hash;
+ /* destroy hash table contents */
+ hash_first(hash);
+ while ((e = (CacheEntry *)hash_next(hash))) {
+ hash_remove_link(hash, (hash_link *) e);
+ cacheEntryDestroy(e);
}
+ /* destroy the hash table itself */
+ hashFreeMemory(hash);
+ if (cache->digest)
+ cacheDigestDestroy(cache->digest);
+ xfree(cache);
}
-/* makes digest based on currently hashed entries */
+/* re-digests currently hashed entries */
static void
-cacheIndexInitDigest(CacheIndex *idx)
+cacheResetDigest(Cache * cache)
{
- hash_link *hashr = NULL;
+ CacheEntry *e = NULL;
+ hash_table *hash;
struct timeval t_start, t_end;
- assert(idx && !idx->digest);
- fprintf(stderr, "%s: init-ing digest with %d entries\n", idx->name, idx->count);
- idx->digest = cacheDigestCreate(2*idx->count); /* 50% utilization */
+
+ assert(cache);
+ fprintf(stderr, "%s: init-ing digest with %d entries\n", cache->name, cache->count);
+ if (cache->digest)
+ cacheDigestDestroy(cache->digest);
+ hash = cache->hash;
+ cache->digest = cacheDigestCreate(cache->count + 1, 6);
+ if (!cache->count)
+ return;
gettimeofday(&t_start, NULL);
- for (hashr = hash_first(idx->hash); hashr; hashr = hash_next(idx->hash)) {
- cacheDigestAdd(idx->digest, hashr->key);
+ hash_first(hash);
+ while ((e = (CacheEntry *)hash_next(hash))) {
+ cacheDigestAdd(cache->digest, e->key);
}
gettimeofday(&t_end, NULL);
- assert(idx->digest->count == idx->count);
- fprintf(stderr, "%s: init-ed digest with %d entries\n",
- idx->name, idx->digest->count);
+ assert(cache->digest->count == cache->count);
+ fprintf(stderr, "%s: init-ed digest with %d entries\n",
+ cache->name, cache->digest->count);
fprintf(stderr, "%s: init took: %f sec, %f sec/M\n",
- idx->name,
+ cache->name,
tvSubDsec(t_start, t_end),
- (double)1e6*tvSubDsec(t_start, t_end)/idx->count);
+ (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
/* check how long it takes to traverse the hash */
gettimeofday(&t_start, NULL);
- for (hashr = hash_first(idx->hash); hashr; hashr = hash_next(idx->hash)) {
+ hash_first(hash);
+ for (e = (CacheEntry *)hash_next(hash); e; e = (CacheEntry *)hash_next(hash)) {
}
gettimeofday(&t_end, NULL);
fprintf(stderr, "%s: hash scan took: %f sec, %f sec/M\n",
- idx->name,
+ cache->name,
tvSubDsec(t_start, t_end),
- (double)1e6*tvSubDsec(t_start, t_end)/idx->count);
+ (double) 1e6 * tvSubDsec(t_start, t_end) / cache->count);
}
-static int
-cacheIndexAddLog(CacheIndex *idx, const char *fname)
+static void
+cacheQueryPeer(Cache * cache, const cache_key * key)
{
- FILE *file;
- int scanned_count = 0;
- assert(idx);
- assert(fname && strlen(fname));
+ const int peer_has_it = hash_lookup(cache->peer->hash, key) != NULL;
+ const int we_think_we_have_it = cacheDigestTest(cache->digest, key);
- file = fopen(fname, "r");
- if (!file) {
- fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
- return 0;
+ cache->qstats.query_count++;
+ if (peer_has_it) {
+ if (we_think_we_have_it)
+ cache->qstats.true_hit_count++;
+ else
+ cache->qstats.false_miss_count++;
+ } else {
+ if (we_think_we_have_it)
+ cache->qstats.false_hit_count++;
+ else
+ cache->qstats.true_miss_count++;
}
- scanned_count = cacheIndexScanCleanPrefix(idx, fname, file);
- fclose(file);
- return scanned_count;
}
static void
-cacheIndexInitReport(CacheIndex *idx)
+cacheQueryReport(Cache * cache, CacheQueryStats * stats)
{
- assert(idx);
- fprintf(stderr, "%s: bad swap_add: %d\n",
- idx->name, idx->bad_add_count);
- fprintf(stderr, "%s: bad swap_del: %d\n",
- idx->name, idx->bad_del_count);
- fprintf(stderr, "%s: scanned lines: %d\n",
- idx->name, idx->scanned_count);
+ fprintf(stdout, "%s: peer queries: %d (%d%%)\n",
+ cache->name,
+ stats->query_count, xpercentInt(stats->query_count, cache->req_count)
+ );
+ fprintf(stdout, "%s: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
+ cache->name,
+ stats->true_hit_count, xpercentInt(stats->true_hit_count, stats->query_count),
+ stats->true_miss_count, xpercentInt(stats->true_miss_count, stats->query_count),
+ stats->true_hit_count + stats->true_miss_count,
+ xpercentInt(stats->true_hit_count + stats->true_miss_count, stats->query_count)
+ );
+ fprintf(stdout, "%s: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
+ cache->name,
+ stats->false_hit_count, xpercentInt(stats->false_hit_count, stats->query_count),
+ stats->false_miss_count, xpercentInt(stats->false_miss_count, stats->query_count),
+ stats->false_hit_count + stats->false_miss_count,
+ xpercentInt(stats->false_hit_count + stats->false_miss_count, stats->query_count)
+ );
}
-#if 0
-static int
-cacheIndexGetLogEntry(CacheIndex *idx, storeSwapLogData *s)
+static void
+cacheReport(Cache * cache)
{
- if (!idx->has_log_entry)
- cacheIndexStepLogEntry();
- if (idx->has_log_entry) {
- *s = idx->log_entry_buf;
- return 1;
- }
- return 0;
+ fprintf(stdout, "%s: entries: %d reqs: %d bad-add: %d bad-del: %d\n",
+ cache->name, cache->count, cache->req_count,
+ cache->bad_add_count, cache->bad_del_count);
+
}
-static int
-cacheIndexStepLogEntry(CacheIndex *idx)
+static void
+cacheFetch(Cache * cache, const RawAccessLogEntry * e)
{
- if (fread(&idx->log_entry_buf, sizeof(idx->log_entry_buf), 1, idx->log) == 1) {
- int op = (int) idx->log_entry_buf.op;
- idx->scanned_count++;
- idx->has_log_entry = 1;
- if (op != SWAP_LOG_ADD && op != SWAP_LOG_DEL) {
- fprintf(stderr, "%s:%d: unknown swap log action %d\n", idx->log_fname, idx->scanned_count, op);
- exit(-3);
- }
- } else
- idx->has_log_entry = 0;
+ assert(e);
+ cache->req_count++;
+ if (e->use_icp)
+ cacheQueryPeer(cache, e->key);
}
-static int
-cacheIndexScan(CacheIndex *idx, const char *fname, FILE *file)
+static fr_result
+swapStateReader(FileIterator * fi)
{
- int count = 0;
- int del_count = 0;
- storeSwapLogData s;
- fprintf(stderr, "%s scanning\n", fname);
- while (fread(&s, sizeof(s), 1, file) == 1) {
- count++;
- idx->scanned_count++;
- if (s.op == SWAP_LOG_ADD) {
- CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
- if (olde) {
- idx->bad_add_count++;
- } else {
- CacheEntry *e = cacheEntryCreate(&s);
- hash_join(idx->hash, (hash_link*) e);
- idx->count++;
- }
- } else
- if (s.op == SWAP_LOG_DEL) {
- CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
- if (!olde)
- idx->bad_del_count++;
- else {
- assert(idx->count);
- hash_remove_link(idx->hash, (hash_link*) olde);
- cacheEntryDestroy(olde);
- idx->count--;
- }
- del_count++;
- } else {
- fprintf(stderr, "%s:%d: unknown swap log action\n", fname, count);
- exit(-3);
- }
+ storeSwapLogData *entry;
+ if (!fi->entry)
+ fi->entry = xcalloc(1, sizeof(storeSwapLogData));
+ entry = (storeSwapLogData *)fi->entry;
+ if (fread(entry, sizeof(*entry), 1, fi->file) != 1)
+ return frEof;
+ fi->inner_time = entry->lastref;
+ if (entry->op != SWAP_LOG_ADD && entry->op != SWAP_LOG_DEL) {
+ fprintf(stderr, "%s:%d: unknown swap log action\n", fi->fname, fi->line_count);
+ exit(-3);
}
- fprintf(stderr, "%s scanned %d entries, alloc: %d bytes\n",
- fname, count,
- (int)(count*sizeof(CacheEntry)));
- return count;
+ return frOk;
}
-#endif
-static int
-cacheIndexScanCleanPrefix(CacheIndex *idx, const char *fname, FILE *file)
+static fr_result
+accessLogReader(FileIterator * fi)
{
- int count = 0;
- storeSwapLogData s;
- fprintf(stderr, "%s scanning\n", fname);
- while (fread(&s, sizeof(s), 1, file) == 1) {
- count++;
- idx->scanned_count++;
- if (s.op == SWAP_LOG_ADD) {
- CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
- if (olde) {
- idx->bad_add_count++;
- } else {
- CacheEntry *e = cacheEntryCreate(&s);
- hash_join(idx->hash, (hash_link*) e);
- idx->count++;
- }
- } else
- if (s.op == SWAP_LOG_DEL) {
- break;
- } else {
- fprintf(stderr, "%s:%d: unknown swap log action\n", fname, count);
- exit(-3);
- }
+ static char buf[4096];
+ RawAccessLogEntry *entry;
+ char *url;
+ char *method;
+ method_t method_id = METHOD_NONE;
+ char *hier = NULL;
+
+ assert(fi);
+ if (!fi->entry)
+ fi->entry = xcalloc(1, sizeof(RawAccessLogEntry));
+ else
+ memset(fi->entry, 0, sizeof(RawAccessLogEntry));
+ entry = (RawAccessLogEntry*)fi->entry;
+ if (!fgets(buf, sizeof(buf), fi->file))
+ return frEof; /* eof */
+ entry->timestamp = fi->inner_time = (time_t) atoi(buf);
+ url = strstr(buf, "://");
+ hier = url ? strstr(url, " - ") : NULL;
+
+ if (!url || !hier) {
+ /*fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
+ * fname, scanned_count, buf); */
+ return frError;
}
- fprintf(stderr, "%s scanned %d entries, alloc: %d bytes\n",
- fname, count,
- (int)(count*sizeof(CacheEntry)));
- return count;
+ method = url;
+ while (!isdigit(*method)) {
+ if (*method == ' ')
+ *method = '\0';
+ --method;
+ }
+ method += 2;
+ method_id = methodStrToId(method);
+ if (method_id == METHOD_NONE) {
+ /*fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
+ * fname, scanned_count, method, buf); */
+ return frError;
+ }
+ while (*url)
+ url--;
+ url++;
+ *hier = '\0';
+ hier += 3;
+ *strchr(hier, '/') = '\0';
+ /*fprintf(stdout, "%s:%d: %s %s %s\n",
+ * fname, count, method, url, hier); */
+ entry->use_icp = strcmp(hier, "NONE");
+ /* no ICP lookup for these status codes */
+/* strcmp(hier, "NONE") &&
+ * strcmp(hier, "DIRECT") &&
+ * strcmp(hier, "FIREWALL_IP_DIRECT") &&
+ * strcmp(hier, "LOCAL_IP_DIRECT") &&
+ * strcmp(hier, "NO_DIRECT_FAIL") &&
+ * strcmp(hier, "NO_PARENT_DIRECT") &&
+ * strcmp(hier, "SINGLE_PARENT") &&
+ * strcmp(hier, "PASSTHROUGH_PARENT") &&
+ * strcmp(hier, "SSL_PARENT_MISS") &&
+ * strcmp(hier, "DEFAULT_PARENT");
+ */
+ xmemcpy(entry->key, storeKeyPublic(url, method_id), sizeof(entry->key));
+ /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
+ * fname, count, method, storeKeyText(entry->key), url, hier); */
+ return frOk;
}
-/* Us */
-
-static int we_icp_query_count = 0;
-static int we_true_hit_count = 0;
-static int we_true_miss_count = 0;
-static int we_false_hit_count = 0;
-static int we_false_miss_count = 0;
static void
-cacheIndexQueryPeer(CacheIndex *idx, const cache_key *key)
+cachePurge(Cache * cache, storeSwapLogData * s, int update_digest)
{
- const int peer_has_it = hash_lookup(Peer->hash, key) != NULL;
- const int we_think_we_have_it = cacheDigestTest(Peer->digest, key);
-
- we_icp_query_count++;
- if (peer_has_it)
- if (we_think_we_have_it)
- we_true_hit_count++;
- else
- we_false_miss_count++;
- else
- if (we_think_we_have_it)
- we_false_hit_count++;
- else
- we_true_miss_count++;
+ CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
+ if (!olde) {
+ cache->bad_del_count++;
+ } else {
+ assert(cache->count);
+ hash_remove_link(cache->hash, (hash_link *) olde);
+ if (update_digest)
+ cacheDigestDel(cache->digest, s->key);
+ cacheEntryDestroy(olde);
+ cache->count--;
+ }
}
static void
-cacheIndexIcpReport(CacheIndex *idx)
+cacheStore(Cache * cache, storeSwapLogData * s, int update_digest)
{
- fprintf(stdout, "we: icp: %d\n", we_icp_query_count);
- fprintf(stdout, "we: t-hit: %d (%d%%) t-miss: %d (%d%%) t-*: %d (%d%%)\n",
- we_true_hit_count, xpercentInt(we_true_hit_count, we_icp_query_count),
- we_true_miss_count, xpercentInt(we_true_miss_count, we_icp_query_count),
- we_true_hit_count+we_true_miss_count,
- xpercentInt(we_true_hit_count+we_true_miss_count, we_icp_query_count)
- );
- fprintf(stdout, "we: f-hit: %d (%d%%) f-miss: %d (%d%%) f-*: %d (%d%%)\n",
- we_false_hit_count, xpercentInt(we_false_hit_count, we_icp_query_count),
- we_false_miss_count, xpercentInt(we_false_miss_count, we_icp_query_count),
- we_false_hit_count+we_false_miss_count,
- xpercentInt(we_false_hit_count+we_false_miss_count, we_icp_query_count)
- );
-}
-
-static int
-cacheIndexAddAccessLog(CacheIndex *idx, const char *fname)
-{
- FILE *file;
- int scanned_count = 0;
- assert(!idx);
- assert(fname && strlen(fname));
-
- file = fopen(fname, "r");
- if (!file) {
- fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
- return 0;
+ CacheEntry *olde = (CacheEntry *) hash_lookup(cache->hash, s->key);
+ if (olde) {
+ cache->bad_add_count++;
+ } else {
+ CacheEntry *e = cacheEntryCreate(s);
+ hash_join(cache->hash, (hash_link *)&e->key);
+ cache->count++;
+ if (update_digest)
+ cacheDigestAdd(cache->digest, e->key);
}
- scanned_count = cacheIndexScanAccessLog(idx, fname, file);
- fclose(file);
- return scanned_count;
}
-static int
-cacheIndexScanAccessLog(CacheIndex *idx, const char *fname, FILE *file)
+static void
+cacheUpdateStore(Cache * cache, storeSwapLogData * s, int update_digest)
{
- static char buf[4096];
- int count = 0;
- int scanned_count = 0;
- int icp_count = 0;
- assert(!idx);
- fprintf(stderr, "%s scanning\n", fname);
- while (fgets(buf, sizeof(buf), file)) {
- char *url = strstr(buf, "://");
- char *method;
- int method_id = METHOD_NONE;
- char *hier = url ? strstr(url, " - ") : NULL;
- const cache_key *key = NULL;
-
- scanned_count++;
- if (!(scanned_count % 50000))
- fprintf(stderr, "%s scanned %d K entries (%d bad)\n",
- fname, scanned_count/1000, scanned_count-count-1);
- if (!url || !hier) {
- fprintf(stderr, "%s:%d: strange access log entry '%s'\n",
- fname, scanned_count, buf);
- continue;
- }
- method = url;
- while (!isdigit(*method)) {
- if (*method == ' ')
- *method = '\0';
- --method;
- }
- method += 2;
- method_id = cacheIndexParseMethod(method);
- if (method_id == METHOD_NONE) {
- fprintf(stderr, "%s:%d: invalid method %s in '%s'\n",
- fname, scanned_count, method, buf);
- continue;
- }
- while (*url) url--;
- url++;
- *hier = '\0';
- hier += 3;
- *strchr(hier, '/') = '\0';
- /*fprintf(stdout, "%s:%d: %s %s %s\n",
- fname, count, method, url, hier);*/
- count++;
- /* no ICP lookup for these status codes */
- if (!strcmp(hier, "NONE") ||
- !strcmp(hier, "DIRECT") ||
- !strcmp(hier, "FIREWALL_IP_DIRECT") ||
- !strcmp(hier, "LOCAL_IP_DIRECT") ||
- !strcmp(hier, "NO_DIRECT_FAIL") ||
- !strcmp(hier, "NO_PARENT_DIRECT") ||
- !strcmp(hier, "SINGLE_PARENT") ||
- !strcmp(hier, "PASSTHROUGH_PARENT") ||
- !strcmp(hier, "SSL_PARENT_MISS") ||
- !strcmp(hier, "DEFAULT_PARENT"))
- continue;
- key = storeKeyPublic(url, method_id);
- /*fprintf(stdout, "%s:%d: %s %s %s %s\n",
- fname, count, method, storeKeyText(key), url, hier);*/
- cacheIndexQueryPeer(idx, key);
- icp_count++;
+ switch (s->op) {
+ case SWAP_LOG_ADD:
+ cacheStore(cache, s, update_digest);
+ break;
+ case SWAP_LOG_DEL:
+ cachePurge(cache, s, update_digest);
+ break;
+ default:
+ assert(0);
}
- fprintf(stderr, "%s: scanned %d access log entries; bad: %d\n",
- fname, scanned_count, scanned_count-count);
- fprintf(stderr, "%s: icp: %d (%d%%)\n",
- fname, icp_count, xpercentInt(icp_count, count));
- return count;
}
static int
int
main(int argc, char *argv[])
{
- CacheIndex *they = NULL;
+ FileIterator **fis = NULL;
+ const int fi_count = argc - 1;
+ int active_fi_count = 0;
+ time_t ready_time;
+ Cache *them, *us;
int i;
if (argc < 3)
return usage(argv[0]);
- they = Peer = cacheIndexCreate("they");
- for (i = 2; i < argc; ++i) {
- cacheIndexAddLog(they, argv[i]);
+ them = cacheCreate("them");
+ us = cacheCreate("us");
+ them->peer = us;
+ us->peer = them;
+
+ fis = (FileIterator **)xcalloc(fi_count, sizeof(FileIterator *));
+ /* init iterators with files */
+ fis[0] = fileIteratorCreate(argv[1], accessLogReader);
+ for (i = 2; i < argc; ++i)
+ fis[i - 1] = fileIteratorCreate(argv[i], swapStateReader);
+ /* check that all files were found */
+ for (i = 0; i < fi_count; ++i)
+ if (!fis[i])
+ return -2;
+ /* read prefix to get start-up contents of the peer cache */
+ ready_time = -1;
+ for (i = 1; i < fi_count; ++i) {
+ FileIterator *fi = fis[i];
+ while (fi->inner_time > 0) {
+ if (((storeSwapLogData *) fi->entry)->op == SWAP_LOG_DEL) {
+ cachePurge(them, (storeSwapLogData *)fi->entry, 0);
+ if (ready_time < 0)
+ ready_time = fi->inner_time;
+ } else {
+ if (ready_time > 0 && fi->inner_time > ready_time)
+ break;
+ cacheStore(them, (storeSwapLogData *)fi->entry, 0);
+ }
+ fileIteratorAdvance(fi);
+ }
}
- cacheIndexInitDigest(they);
- cacheIndexInitReport(they);
-
- if (!cacheIndexAddAccessLog(NULL, argv[1]))
- return 1;
- cacheIndexIcpReport(NULL);
+ /* digest peer cache content */
+ cacheResetDigest(them);
+ us->digest = cacheDigestClone(them->digest); /* @netw@ */
+
+ /* shift the time in access log to match ready_time */
+ fileIteratorSetCurTime(fis[0], ready_time);
+
+ /* iterate, use the iterator with the smallest positive inner_time */
+ cur_time = -1;
+ do {
+ int next_i = -1;
+ time_t next_time = -1;
+ active_fi_count = 0;
+ for (i = 0; i < fi_count; ++i) {
+ if (fis[i]->inner_time >= 0) {
+ if (!active_fi_count || fis[i]->inner_time < next_time) {
+ next_i = i;
+ next_time = fis[i]->inner_time;
+ }
+ active_fi_count++;
+ }
+ }
+ if (next_i >= 0) {
+ cur_time = next_time;
+ /*fprintf(stderr, "%2d time: %d %s", next_i, (int)cur_time, ctime(&cur_time)); */
+ if (next_i == 0)
+ cacheFetch(us, (RawAccessLogEntry *)fis[next_i]->entry);
+ else
+ cacheUpdateStore(them, (storeSwapLogData *)fis[next_i]->entry, 1);
+ fileIteratorAdvance(fis[next_i]);
+ }
+ } while (active_fi_count);
- cacheIndexDestroy(they);
+ /* report */
+ cacheReport(them);
+ cacheReport(us);
+ cacheQueryReport(us, &us->qstats);
+ /* clean */
+ for (i = 0; i < argc - 1; ++i) {
+ fileIteratorDestroy(fis[i]);
+ }
+ xfree(fis);
+ cacheDestroy(them);
+ cacheDestroy(us);
return 0;
}