]> git.ipfire.org Git - thirdparty/squid.git/blob - src/cache_diff.cc
From Guido (with additions by me):
[thirdparty/squid.git] / src / cache_diff.cc
1
2 /*
3 * $Id: cache_diff.cc,v 1.19 2003/04/24 06:35:08 hno Exp $
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /*
36 * Computes the difference between the contents of two caches
37 * using swap logs
38 * Reports the percentage of common files and other stats
39 */
40
41 #include "squid.h"
42
43 typedef struct
44 {
45 const char *name;
46 hash_table *hash;
47 int count; /* #currently cached entries */
48 int scanned_count; /* #scanned entries */
49 int bad_add_count; /* #duplicate adds */
50 int bad_del_count; /* #dels with no prior add */
51 }
52
53 CacheIndex;
54
55
56 typedef struct _CacheEntry
57 {
58 const cache_key *key;
59
60 struct _CacheEntry *next;
61 /* storeSwapLogData s; */
62 unsigned char key_arr[MD5_DIGEST_CHARS];
63 }
64
65 CacheEntry;
66
67
68 /* copied from url.c */
69 const char *RequestMethodStr[] =
70 {
71 "NONE",
72 "GET",
73 "POST",
74 "PUT",
75 "HEAD",
76 "CONNECT",
77 "TRACE",
78 "PURGE"
79 };
80
81
82 static int cacheIndexScan(CacheIndex * idx, const char *fname, FILE * file);
83
84
85 static CacheEntry *
86 cacheEntryCreate(const storeSwapLogData * s)
87 {
88 CacheEntry *e = xcalloc(1, sizeof(CacheEntry));
89 assert(s);
90 /* e->s = *s; */
91 xmemcpy(e->key_arr, s->key, MD5_DIGEST_CHARS);
92 e->key = &e->key_arr[0];
93 return e;
94 }
95
96 static void
97 cacheEntryDestroy(CacheEntry * e)
98 {
99 assert(e);
100 xfree(e);
101 }
102
103 static CacheIndex *
104 cacheIndexCreate(const char *name)
105 {
106 CacheIndex *idx;
107
108 if (!name || !strlen(name))
109 return NULL;
110
111 idx = xcalloc(1, sizeof(CacheIndex));
112
113 idx->name = name;
114
115 idx->hash = hash_create(storeKeyHashCmp, 2e6, storeKeyHashHash);
116
117 return idx;
118 }
119
120 static void
121 cacheIndexDestroy(CacheIndex * idx)
122 {
123 hash_link *hashr = NULL;
124
125 if (idx) {
126 /* destroy hash list contents */
127 hash_first(idx->hash);
128
129 while (hashr = hash_next(idx->hash)) {
130 hash_remove_link(idx->hash, hashr);
131 cacheEntryDestroy((CacheEntry *) hashr);
132 }
133
134 /* destroy the hash table itself */
135 hashFreeMemory(idx->hash);
136
137 xfree(idx);
138 }
139 }
140
141 static int
142 cacheIndexAddLog(CacheIndex * idx, const char *fname)
143 {
144 FILE *file;
145 int scanned_count = 0;
146 assert(idx);
147 assert(fname && strlen(fname));
148
149 file = fopen(fname, "r");
150
151 if (!file) {
152 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
153 return 0;
154 }
155
156 #ifdef _SQUID_WIN32_
157 setmode(fileno(file), O_BINARY);
158
159 #endif
160
161 scanned_count = cacheIndexScan(idx, fname, file);
162
163 fclose(file);
164
165 return scanned_count;
166 }
167
168 static void
169 cacheIndexInitReport(CacheIndex * idx)
170 {
171 assert(idx);
172 fprintf(stderr, "%s: bad swap_add: %d\n",
173 idx->name, idx->bad_add_count);
174 fprintf(stderr, "%s: bad swap_del: %d\n",
175 idx->name, idx->bad_del_count);
176 fprintf(stderr, "%s: scanned lines: %d\n",
177 idx->name, idx->scanned_count);
178 }
179
180 static int
181 cacheIndexScan(CacheIndex * idx, const char *fname, FILE * file)
182 {
183 int count = 0;
184 storeSwapLogData s;
185 fprintf(stderr, "%s scanning\n", fname);
186
187 while (fread(&s, sizeof(s), 1, file) == 1) {
188 count++;
189 idx->scanned_count++;
190 /* if (s.op <= SWAP_LOG_NOP || s.op >= SWAP_LOG_MAX)
191 * continue; */
192
193 if (s.op == SWAP_LOG_ADD) {
194 CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
195
196 if (olde) {
197 idx->bad_add_count++;
198 } else {
199 CacheEntry *e = cacheEntryCreate(&s);
200 hash_join(idx->hash, &e->hash);
201 idx->count++;
202 }
203 } else if (s.op == SWAP_LOG_DEL) {
204 CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
205
206 if (!olde)
207 idx->bad_del_count++;
208 else {
209 assert(idx->count);
210 hash_remove_link(idx->hash, (hash_link *) olde);
211 cacheEntryDestroy(olde);
212 idx->count--;
213 }
214 } else {
215 fprintf(stderr, "%s:%d: unknown swap log action\n", fname, count);
216 exit(-3);
217 }
218 }
219
220 fprintf(stderr, "%s:%d: scanned (size: %d bytes)\n",
221 fname, count, (int) (count * sizeof(CacheEntry)));
222 return count;
223 }
224
225 static void
226 cacheIndexCmpReport(CacheIndex * idx, int shared_count)
227 {
228 assert(idx && shared_count <= idx->count);
229
230 printf("%s:\t %7d = %7d + %7d (%7.2f%% + %7.2f%%)\n",
231 idx->name,
232 idx->count,
233 idx->count - shared_count,
234 shared_count,
235 xpercent(idx->count - shared_count, idx->count),
236 xpercent(shared_count, idx->count));
237 }
238
239 static void
240 cacheIndexCmp(CacheIndex * idx1, CacheIndex * idx2)
241 {
242 int shared_count = 0;
243 int hashed_count = 0;
244 hash_link *hashr = NULL;
245 CacheIndex *small_idx = idx1;
246 CacheIndex *large_idx = idx2;
247 assert(idx1 && idx2);
248
249 /* check our guess */
250
251 if (idx1->count > idx2->count) {
252 small_idx = idx2;
253 large_idx = idx1;
254 }
255
256 /* find shared_count */
257 hash_first(small_idx->hash);
258
259 for (hashr = hash_next(small_idx->hash)) {
260 hashed_count++;
261
262 if (hash_lookup(large_idx->hash, hashr->key))
263 shared_count++;
264 }
265
266 assert(hashed_count == small_idx->count);
267
268 cacheIndexCmpReport(idx1, shared_count);
269 cacheIndexCmpReport(idx2, shared_count);
270 }
271
272
273 static int
274 usage(const char *prg_name)
275 {
276 fprintf(stderr, "usage: %s <label1>: <swap_state>... <label2>: <swap_state>...\n",
277 prg_name);
278 return -1;
279 }
280
281 int
282 main(int argc, char *argv[])
283 {
284 CacheIndex *CacheIdx[2];
285 CacheIndex *idx = NULL;
286 int idxCount = 0;
287 int i;
288
289 if (argc < 5)
290 return usage(argv[0]);
291
292 for (i = 1; i < argc; ++i) {
293 const int len = strlen(argv[i]);
294
295 if (!len)
296 return usage(argv[0]);
297
298 if (argv[i][len - 1] == ':') {
299 idxCount++;
300
301 if (len < 2 || idxCount > 2)
302 return usage(argv[0]);
303
304 idx = cacheIndexCreate(argv[i]);
305
306 CacheIdx[idxCount - 1] = idx;
307 } else {
308 if (!idx)
309 return usage(argv[0]);
310
311 cacheIndexAddLog(idx, argv[i]);
312 }
313 }
314
315 if (idxCount != 2)
316 return usage(argv[0]);
317
318 cacheIndexInitReport(CacheIdx[0]);
319
320 cacheIndexInitReport(CacheIdx[1]);
321
322 cacheIndexCmp(CacheIdx[0], CacheIdx[1]);
323
324 cacheIndexDestroy(CacheIdx[0]);
325
326 cacheIndexDestroy(CacheIdx[1]);
327
328 return 1;
329 }