]> git.ipfire.org Git - thirdparty/squid.git/blob - src/store_rebuild.cc
Merged from trunk
[thirdparty/squid.git] / src / store_rebuild.cc
1 /*
2 * DEBUG: section 20 Store Rebuild Routines
3 * AUTHOR: Duane Wessels
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "event.h"
35 #include "globals.h"
36 #include "md5.h"
37 #include "StatCounters.h"
38 #include "Store.h"
39 #include "store_key_md5.h"
40 #include "SwapDir.h"
41 #include "store_digest.h"
42 #include "store_rebuild.h"
43 #include "StoreSearch.h"
44 #include "SquidConfig.h"
45 #include "SquidTime.h"
46
47 #if HAVE_ERRNO_H
48 #include <errno.h>
49 #endif
50 static StoreRebuildData counts;
51
52 static struct timeval rebuild_start;
53 static void storeCleanup(void *);
54
55 typedef struct {
56 /* total number of "swap.state" entries that will be read */
57 int total;
58 /* number of entries read so far */
59 int scanned;
60 } store_rebuild_progress;
61
62 static store_rebuild_progress *RebuildProgress = NULL;
63
64 static int
65 storeCleanupDoubleCheck(StoreEntry * e)
66 {
67 SwapDir *SD = dynamic_cast<SwapDir *>(INDEXSD(e->swap_dirn));
68 return (SD->doubleCheck(*e));
69 }
70
71 static void
72 storeCleanup(void *datanotused)
73 {
74 static int store_errors = 0;
75 static StoreSearchPointer currentSearch;
76 static int validated = 0;
77
78 if (currentSearch == NULL || currentSearch->isDone())
79 currentSearch = Store::Root().search(NULL, NULL);
80
81 size_t statCount = 500;
82
83 // TODO: Avoid the loop (and ENTRY_VALIDATED) unless opt_store_doublecheck.
84 while (statCount-- && !currentSearch->isDone() && currentSearch->next()) {
85 StoreEntry *e;
86
87 e = currentSearch->currentItem();
88
89 if (EBIT_TEST(e->flags, ENTRY_VALIDATED))
90 continue;
91
92 /*
93 * Calling StoreEntry->release() has no effect because we're
94 * still in 'store_rebuilding' state
95 */
96 if (e->swap_filen < 0)
97 continue;
98
99 if (opt_store_doublecheck)
100 if (storeCleanupDoubleCheck(e))
101 ++store_errors;
102
103 EBIT_SET(e->flags, ENTRY_VALIDATED);
104
105 /*
106 * Only set the file bit if we know its a valid entry
107 * otherwise, set it in the validation procedure
108 */
109
110 if ((++validated & 0x3FFFF) == 0)
111 /* TODO format the int with with a stream operator */
112 debugs(20, DBG_IMPORTANT, " " << validated << " Entries Validated so far.");
113 }
114
115 if (currentSearch->isDone()) {
116 debugs(20, DBG_IMPORTANT, " Completed Validation Procedure");
117 debugs(20, DBG_IMPORTANT, " Validated " << validated << " Entries");
118 debugs(20, DBG_IMPORTANT, " store_swap_size = " << Store::Root().currentSize() / 1024.0 << " KB");
119 --StoreController::store_dirs_rebuilding;
120 assert(0 == StoreController::store_dirs_rebuilding);
121
122 if (opt_store_doublecheck && store_errors) {
123 fatalf("Quitting after finding %d cache index inconsistencies. " \
124 "Removing cache index will force its slow rebuild. " \
125 "Removing -S will let Squid start with an inconsistent " \
126 "cache index (at your own risk).\n", store_errors);
127 }
128
129 if (store_digest)
130 storeDigestNoteStoreReady();
131
132 currentSearch = NULL;
133 } else
134 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
135 }
136
137 /* meta data recreated from disk image in swap directory */
138 void
139
140 storeRebuildComplete(StoreRebuildData *dc)
141 {
142 double dt;
143 counts.objcount += dc->objcount;
144 counts.expcount += dc->expcount;
145 counts.scancount += dc->scancount;
146 counts.clashcount += dc->clashcount;
147 counts.dupcount += dc->dupcount;
148 counts.cancelcount += dc->cancelcount;
149 counts.invalid += dc->invalid;
150 counts.badflags += dc->badflags;
151 counts.bad_log_op += dc->bad_log_op;
152 counts.zero_object_sz += dc->zero_object_sz;
153 /*
154 * When store_dirs_rebuilding == 1, it means we are done reading
155 * or scanning all cache_dirs. Now report the stats and start
156 * the validation (storeCleanup()) thread.
157 */
158
159 if (StoreController::store_dirs_rebuilding > 1)
160 return;
161
162 dt = tvSubDsec(rebuild_start, current_time);
163
164 debugs(20, DBG_IMPORTANT, "Finished rebuilding storage from disk.");
165 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.scancount << " Entries scanned");
166 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.invalid << " Invalid entries.");
167 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.badflags << " With invalid flags.");
168 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.objcount << " Objects loaded.");
169 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.expcount << " Objects expired.");
170 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.cancelcount << " Objects cancelled.");
171 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.dupcount << " Duplicate URLs purged.");
172 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.clashcount << " Swapfile clashes avoided.");
173 debugs(20, DBG_IMPORTANT, " Took "<< std::setw(3)<< std::setprecision(2) << dt << " seconds ("<< std::setw(6) <<
174 ((double) counts.objcount / (dt > 0.0 ? dt : 1.0)) << " objects/sec).");
175 debugs(20, DBG_IMPORTANT, "Beginning Validation Procedure");
176
177 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
178
179 xfree(RebuildProgress);
180
181 RebuildProgress = NULL;
182 }
183
184 /*
185 * this is ugly. We don't actually start any rebuild threads here,
186 * but only initialize counters, etc. The rebuild threads are
187 * actually started by the filesystem "fooDirInit" function.
188 */
189 void
190 storeRebuildStart(void)
191 {
192 memset(&counts, '\0', sizeof(counts));
193 rebuild_start = current_time;
194 /*
195 * Note: store_dirs_rebuilding is initialized to 1.
196 *
197 * When we parse the configuration and construct each swap dir,
198 * the construction of that raises the rebuild count.
199 *
200 * This prevents us from trying to write clean logs until we
201 * finished rebuilding - including after a reconfiguration that opens an
202 * existing swapdir. The corresponding decrement * occurs in
203 * storeCleanup(), when it is finished.
204 */
205 RebuildProgress = (store_rebuild_progress *)xcalloc(Config.cacheSwap.n_configured,
206 sizeof(store_rebuild_progress));
207 }
208
209 /*
210 * A fs-specific rebuild procedure periodically reports its
211 * progress.
212 */
213 void
214 storeRebuildProgress(int sd_index, int total, int sofar)
215 {
216 static time_t last_report = 0;
217 double n = 0.0;
218 double d = 0.0;
219
220 if (sd_index < 0)
221 return;
222
223 if (sd_index >= Config.cacheSwap.n_configured)
224 return;
225
226 if (NULL == RebuildProgress)
227 return;
228
229 RebuildProgress[sd_index].total = total;
230
231 RebuildProgress[sd_index].scanned = sofar;
232
233 if (squid_curtime - last_report < 15)
234 return;
235
236 for (sd_index = 0; sd_index < Config.cacheSwap.n_configured; ++sd_index) {
237 n += (double) RebuildProgress[sd_index].scanned;
238 d += (double) RebuildProgress[sd_index].total;
239 }
240
241 debugs(20, DBG_IMPORTANT, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete");
242 last_report = squid_curtime;
243 }
244
245 #include "fde.h"
246 #include "StoreMetaUnpacker.h"
247 #include "StoreMeta.h"
248 #include "Generic.h"
249
250 struct InitStoreEntry : public unary_function<StoreMeta, void> {
251 InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
252
253 void operator()(StoreMeta const &x) {
254 switch (x.getType()) {
255
256 case STORE_META_KEY:
257 assert(x.length == SQUID_MD5_DIGEST_LENGTH);
258 memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
259 break;
260
261 case STORE_META_STD:
262 struct old_metahdr {
263 time_t timestamp;
264 time_t lastref;
265 time_t expires;
266 time_t lastmod;
267 size_t swap_file_sz;
268 uint16_t refcount;
269 uint16_t flags;
270 } *tmp;
271 tmp = (struct old_metahdr *)x.value;
272 assert(x.length == STORE_HDR_METASIZE_OLD);
273 what->timestamp = tmp->timestamp;
274 what->lastref = tmp->lastref;
275 what->expires = tmp->expires;
276 what->lastmod = tmp->lastmod;
277 what->swap_file_sz = tmp->swap_file_sz;
278 what->refcount = tmp->refcount;
279 what->flags = tmp->flags;
280 break;
281
282 case STORE_META_STD_LFS:
283 assert(x.length == STORE_HDR_METASIZE);
284 memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
285 break;
286
287 default:
288 break;
289 }
290 }
291
292 StoreEntry *what;
293 cache_key *index;
294 };
295
296 bool
297 storeRebuildLoadEntry(int fd, int diskIndex, MemBuf &buf,
298 StoreRebuildData &counts)
299 {
300 if (fd < 0)
301 return false;
302
303 assert(buf.hasSpace()); // caller must allocate
304
305 const int len = FD_READ_METHOD(fd, buf.space(), buf.spaceSize());
306 ++ statCounter.syscalls.disk.reads;
307 if (len < 0) {
308 const int xerrno = errno;
309 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << diskIndex << "]: " <<
310 "Ignoring cached entry after meta data read failure: " << xstrerr(xerrno));
311 return false;
312 }
313
314 buf.appended(len);
315 return true;
316 }
317
318 bool
319 storeRebuildParseEntry(MemBuf &buf, StoreEntry &tmpe, cache_key *key,
320 StoreRebuildData &counts,
321 uint64_t expectedSize)
322 {
323 int swap_hdr_len = 0;
324 StoreMetaUnpacker aBuilder(buf.content(), buf.contentSize(), &swap_hdr_len);
325 if (aBuilder.isBufferZero()) {
326 debugs(47,5, HERE << "skipping empty record.");
327 return false;
328 }
329
330 if (!aBuilder.isBufferSane()) {
331 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring malformed cache entry.");
332 return false;
333 }
334
335 StoreMeta *tlv_list = aBuilder.createStoreMeta();
336 if (!tlv_list) {
337 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry with invalid " <<
338 "meta data");
339 return false;
340 }
341
342 // TODO: consume parsed metadata?
343
344 debugs(47,7, HERE << "successful swap meta unpacking");
345 memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
346
347 InitStoreEntry visitor(&tmpe, key);
348 for_each(*tlv_list, visitor);
349 storeSwapTLVFree(tlv_list);
350 tlv_list = NULL;
351
352 if (storeKeyNull(key)) {
353 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring keyless cache entry");
354 return false;
355 }
356
357 tmpe.key = key;
358 /* check sizes */
359
360 if (expectedSize > 0) {
361 if (tmpe.swap_file_sz == 0) {
362 tmpe.swap_file_sz = expectedSize;
363 } else if (tmpe.swap_file_sz == (uint64_t)(expectedSize - swap_hdr_len)) {
364 tmpe.swap_file_sz = expectedSize;
365 } else if (tmpe.swap_file_sz != expectedSize) {
366 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry due to a " <<
367 "SIZE MISMATCH " << tmpe.swap_file_sz << "!=" << expectedSize);
368 return false;
369 }
370 } else if (tmpe.swap_file_sz <= 0) {
371 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry with " <<
372 "unknown size: " << tmpe);
373 return false;
374 }
375
376 if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
377 ++ counts.badflags;
378 return false;
379 }
380
381 return true;
382 }
383
384 bool
385 storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key,
386 StoreRebuildData &counts)
387 {
388 /* this needs to become
389 * 1) unpack url
390 * 2) make synthetic request with headers ?? or otherwise search
391 * for a matching object in the store
392 * TODO FIXME change to new async api
393 * TODO FIXME I think there is a race condition here with the
394 * async api :
395 * store A reads in object foo, searchs for it, and finds nothing.
396 * store B reads in object foo, searchs for it, finds nothing.
397 * store A gets called back with nothing, so registers the object
398 * store B gets called back with nothing, so registers the object,
399 * which will conflict when the in core index gets around to scanning
400 * store B.
401 *
402 * this suggests that rather than searching for duplicates, the
403 * index rebuild should just assume its the most recent accurate
404 * store entry and whoever indexes the stores handles duplicates.
405 */
406 if (StoreEntry *e = Store::Root().get(key)) {
407
408 if (e->lastref >= tmpe.lastref) {
409 /* key already exists, old entry is newer */
410 /* keep old, ignore new */
411 ++counts.dupcount;
412
413 // For some stores, get() creates/unpacks a store entry. Signal
414 // such stores that we will no longer use the get() result:
415 e->lock();
416 e->unlock();
417
418 return false;
419 } else {
420 /* URL already exists, this swapfile not being used */
421 /* junk old, load new */
422 e->release(); /* release old entry */
423 ++counts.dupcount;
424 }
425 }
426
427 return true;
428 }