]> git.ipfire.org Git - thirdparty/squid.git/blob - src/cache_diff.cc
Merge from trunk
[thirdparty/squid.git] / src / cache_diff.cc
1
2 /*
3 * $Id: cache_diff.cc,v 1.21 2007/11/15 16:47:35 wessels Exp $
4 *
5 * AUTHOR: Alex Rousskov
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /*
36 * Computes the difference between the contents of two caches
37 * using swap logs
38 * Reports the percentage of common files and other stats
39 */
40
41 #include "squid.h"
42
43 typedef struct {
44 const char *name;
45 hash_table *hash;
46 int count; /* #currently cached entries */
47 int scanned_count; /* #scanned entries */
48 int bad_add_count; /* #duplicate adds */
49 int bad_del_count; /* #dels with no prior add */
50 } CacheIndex;
51
52
53 typedef struct _CacheEntry {
54 const cache_key *key;
55
56 struct _CacheEntry *next;
57 /* StoreSwapLogData s; */
58 unsigned char key_arr[SQUID_MD5_DIGEST_LENGTH];
59 } CacheEntry;
60
61
62 /* copied from url.c */
63 const char *RequestMethodStr[] = {
64 "NONE",
65 "GET",
66 "POST",
67 "PUT",
68 "HEAD",
69 "CONNECT",
70 "TRACE",
71 "PURGE"
72 };
73
74
75 static int cacheIndexScan(CacheIndex * idx, const char *fname, FILE * file);
76
77
78 static CacheEntry *
79 cacheEntryCreate(const StoreSwapLogData * s)
80 {
81 CacheEntry *e = xcalloc(1, sizeof(CacheEntry));
82 assert(s);
83 /* e->s = *s; */
84 xmemcpy(e->key_arr, s->key, SQUID_MD5_DIGEST_LENGTH);
85 e->key = &e->key_arr[0];
86 return e;
87 }
88
89 static void
90 cacheEntryDestroy(CacheEntry * e)
91 {
92 assert(e);
93 xfree(e);
94 }
95
96 static CacheIndex *
97 cacheIndexCreate(const char *name)
98 {
99 CacheIndex *idx;
100
101 if (!name || !strlen(name))
102 return NULL;
103
104 idx = xcalloc(1, sizeof(CacheIndex));
105
106 idx->name = name;
107
108 idx->hash = hash_create(storeKeyHashCmp, 2e6, storeKeyHashHash);
109
110 return idx;
111 }
112
113 static void
114 cacheIndexDestroy(CacheIndex * idx)
115 {
116 hash_link *hashr = NULL;
117
118 if (idx) {
119 /* destroy hash list contents */
120 hash_first(idx->hash);
121
122 while (hashr = hash_next(idx->hash)) {
123 hash_remove_link(idx->hash, hashr);
124 cacheEntryDestroy((CacheEntry *) hashr);
125 }
126
127 /* destroy the hash table itself */
128 hashFreeMemory(idx->hash);
129
130 xfree(idx);
131 }
132 }
133
134 static int
135 cacheIndexAddLog(CacheIndex * idx, const char *fname)
136 {
137 FILE *file;
138 int scanned_count = 0;
139 assert(idx);
140 assert(fname && strlen(fname));
141
142 file = fopen(fname, "r");
143
144 if (!file) {
145 fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
146 return 0;
147 }
148
149 #ifdef _SQUID_WIN32_
150 setmode(fileno(file), O_BINARY);
151
152 #endif
153
154 scanned_count = cacheIndexScan(idx, fname, file);
155
156 fclose(file);
157
158 return scanned_count;
159 }
160
161 static void
162 cacheIndexInitReport(CacheIndex * idx)
163 {
164 assert(idx);
165 fprintf(stderr, "%s: bad swap_add: %d\n",
166 idx->name, idx->bad_add_count);
167 fprintf(stderr, "%s: bad swap_del: %d\n",
168 idx->name, idx->bad_del_count);
169 fprintf(stderr, "%s: scanned lines: %d\n",
170 idx->name, idx->scanned_count);
171 }
172
173 static int
174 cacheIndexScan(CacheIndex * idx, const char *fname, FILE * file)
175 {
176 int count = 0;
177 StoreSwapLogData s;
178 fprintf(stderr, "%s scanning\n", fname);
179
180 while (fread(&s, sizeof(s), 1, file) == 1) {
181 count++;
182 idx->scanned_count++;
183 /* if (s.op <= SWAP_LOG_NOP || s.op >= SWAP_LOG_MAX)
184 * continue; */
185
186 if (s.op == SWAP_LOG_ADD) {
187 CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
188
189 if (olde) {
190 idx->bad_add_count++;
191 } else {
192 CacheEntry *e = cacheEntryCreate(&s);
193 hash_join(idx->hash, &e->hash);
194 idx->count++;
195 }
196 } else if (s.op == SWAP_LOG_DEL) {
197 CacheEntry *olde = (CacheEntry *) hash_lookup(idx->hash, s.key);
198
199 if (!olde)
200 idx->bad_del_count++;
201 else {
202 assert(idx->count);
203 hash_remove_link(idx->hash, (hash_link *) olde);
204 cacheEntryDestroy(olde);
205 idx->count--;
206 }
207 } else {
208 fprintf(stderr, "%s:%d: unknown swap log action\n", fname, count);
209 exit(-3);
210 }
211 }
212
213 fprintf(stderr, "%s:%d: scanned (size: %d bytes)\n",
214 fname, count, (int) (count * sizeof(CacheEntry)));
215 return count;
216 }
217
218 static void
219 cacheIndexCmpReport(CacheIndex * idx, int shared_count)
220 {
221 assert(idx && shared_count <= idx->count);
222
223 printf("%s:\t %7d = %7d + %7d (%7.2f%% + %7.2f%%)\n",
224 idx->name,
225 idx->count,
226 idx->count - shared_count,
227 shared_count,
228 xpercent(idx->count - shared_count, idx->count),
229 xpercent(shared_count, idx->count));
230 }
231
232 static void
233 cacheIndexCmp(CacheIndex * idx1, CacheIndex * idx2)
234 {
235 int shared_count = 0;
236 int hashed_count = 0;
237 hash_link *hashr = NULL;
238 CacheIndex *small_idx = idx1;
239 CacheIndex *large_idx = idx2;
240 assert(idx1 && idx2);
241
242 /* check our guess */
243
244 if (idx1->count > idx2->count) {
245 small_idx = idx2;
246 large_idx = idx1;
247 }
248
249 /* find shared_count */
250 hash_first(small_idx->hash);
251
252 for (hashr = hash_next(small_idx->hash)) {
253 hashed_count++;
254
255 if (hash_lookup(large_idx->hash, hashr->key))
256 shared_count++;
257 }
258
259 assert(hashed_count == small_idx->count);
260
261 cacheIndexCmpReport(idx1, shared_count);
262 cacheIndexCmpReport(idx2, shared_count);
263 }
264
265
266 static int
267 usage(const char *prg_name)
268 {
269 fprintf(stderr, "usage: %s <label1>: <swap_state>... <label2>: <swap_state>...\n",
270 prg_name);
271 return -1;
272 }
273
274 int
275 main(int argc, char *argv[])
276 {
277 CacheIndex *CacheIdx[2];
278 CacheIndex *idx = NULL;
279 int idxCount = 0;
280 int i;
281
282 if (argc < 5)
283 return usage(argv[0]);
284
285 for (i = 1; i < argc; ++i) {
286 const int len = strlen(argv[i]);
287
288 if (!len)
289 return usage(argv[0]);
290
291 if (argv[i][len - 1] == ':') {
292 idxCount++;
293
294 if (len < 2 || idxCount > 2)
295 return usage(argv[0]);
296
297 idx = cacheIndexCreate(argv[i]);
298
299 CacheIdx[idxCount - 1] = idx;
300 } else {
301 if (!idx)
302 return usage(argv[0]);
303
304 cacheIndexAddLog(idx, argv[i]);
305 }
306 }
307
308 if (idxCount != 2)
309 return usage(argv[0]);
310
311 cacheIndexInitReport(CacheIdx[0]);
312
313 cacheIndexInitReport(CacheIdx[1]);
314
315 cacheIndexCmp(CacheIdx[0], CacheIdx[1]);
316
317 cacheIndexDestroy(CacheIdx[0]);
318
319 cacheIndexDestroy(CacheIdx[1]);
320
321 return 1;
322 }