]> git.ipfire.org Git - thirdparty/squid.git/blob - src/store_rebuild.cc
Merged from trunk 13199.
[thirdparty/squid.git] / src / store_rebuild.cc
1 /*
2 * DEBUG: section 20 Store Rebuild Routines
3 * AUTHOR: Duane Wessels
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "event.h"
35 #include "globals.h"
36 #include "md5.h"
37 #include "SquidConfig.h"
38 #include "SquidTime.h"
39 #include "StatCounters.h"
40 #include "Store.h"
41 #include "store_digest.h"
42 #include "store_key_md5.h"
43 #include "store_rebuild.h"
44 #include "StoreSearch.h"
45 #include "SwapDir.h"
46
47 #if HAVE_ERRNO_H
48 #include <errno.h>
49 #endif
50 static StoreRebuildData counts;
51
52 static struct timeval rebuild_start;
53 static void storeCleanup(void *);
54
55 typedef struct {
56 /* total number of "swap.state" entries that will be read */
57 int total;
58 /* number of entries read so far */
59 int scanned;
60 } store_rebuild_progress;
61
62 static store_rebuild_progress *RebuildProgress = NULL;
63
64 static int
65 storeCleanupDoubleCheck(StoreEntry * e)
66 {
67 SwapDir *SD = dynamic_cast<SwapDir *>(INDEXSD(e->swap_dirn));
68 return (SD->doubleCheck(*e));
69 }
70
71 static void
72 storeCleanup(void *datanotused)
73 {
74 static int store_errors = 0;
75 static StoreSearchPointer currentSearch;
76 static int validated = 0;
77 static int seen = 0;
78
79 if (currentSearch == NULL || currentSearch->isDone())
80 currentSearch = Store::Root().search(NULL, NULL);
81
82 size_t statCount = 500;
83
84 // TODO: Avoid the loop (and ENTRY_VALIDATED) unless opt_store_doublecheck.
85 while (statCount-- && !currentSearch->isDone() && currentSearch->next()) {
86 StoreEntry *e;
87
88 e = currentSearch->currentItem();
89
90 ++seen;
91
92 if (EBIT_TEST(e->flags, ENTRY_VALIDATED))
93 continue;
94
95 /*
96 * Calling StoreEntry->release() has no effect because we're
97 * still in 'store_rebuilding' state
98 */
99 if (e->swap_filen < 0)
100 continue;
101
102 if (opt_store_doublecheck)
103 if (storeCleanupDoubleCheck(e))
104 ++store_errors;
105
106 EBIT_SET(e->flags, ENTRY_VALIDATED);
107
108 /*
109 * Only set the file bit if we know its a valid entry
110 * otherwise, set it in the validation procedure
111 */
112
113 if ((++validated & 0x3FFFF) == 0)
114 /* TODO format the int with with a stream operator */
115 debugs(20, DBG_IMPORTANT, " " << validated << " Entries Validated so far.");
116 }
117
118 if (currentSearch->isDone()) {
119 debugs(20, 2, "Seen: " << seen << " entries");
120 debugs(20, DBG_IMPORTANT, " Completed Validation Procedure");
121 debugs(20, DBG_IMPORTANT, " Validated " << validated << " Entries");
122 debugs(20, DBG_IMPORTANT, " store_swap_size = " << Store::Root().currentSize() / 1024.0 << " KB");
123 --StoreController::store_dirs_rebuilding;
124 assert(0 == StoreController::store_dirs_rebuilding);
125
126 if (opt_store_doublecheck && store_errors) {
127 fatalf("Quitting after finding %d cache index inconsistencies. " \
128 "Removing cache index will force its slow rebuild. " \
129 "Removing -S will let Squid start with an inconsistent " \
130 "cache index (at your own risk).\n", store_errors);
131 }
132
133 if (store_digest)
134 storeDigestNoteStoreReady();
135
136 currentSearch = NULL;
137 } else
138 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
139 }
140
141 /* meta data recreated from disk image in swap directory */
142 void
143
144 storeRebuildComplete(StoreRebuildData *dc)
145 {
146 double dt;
147 counts.objcount += dc->objcount;
148 counts.expcount += dc->expcount;
149 counts.scancount += dc->scancount;
150 counts.clashcount += dc->clashcount;
151 counts.dupcount += dc->dupcount;
152 counts.cancelcount += dc->cancelcount;
153 counts.invalid += dc->invalid;
154 counts.badflags += dc->badflags;
155 counts.bad_log_op += dc->bad_log_op;
156 counts.zero_object_sz += dc->zero_object_sz;
157 /*
158 * When store_dirs_rebuilding == 1, it means we are done reading
159 * or scanning all cache_dirs. Now report the stats and start
160 * the validation (storeCleanup()) thread.
161 */
162
163 if (StoreController::store_dirs_rebuilding > 1)
164 return;
165
166 dt = tvSubDsec(rebuild_start, current_time);
167
168 debugs(20, DBG_IMPORTANT, "Finished rebuilding storage from disk.");
169 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.scancount << " Entries scanned");
170 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.invalid << " Invalid entries.");
171 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.badflags << " With invalid flags.");
172 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.objcount << " Objects loaded.");
173 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.expcount << " Objects expired.");
174 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.cancelcount << " Objects cancelled.");
175 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.dupcount << " Duplicate URLs purged.");
176 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.clashcount << " Swapfile clashes avoided.");
177 debugs(20, DBG_IMPORTANT, " Took "<< std::setw(3)<< std::setprecision(2) << dt << " seconds ("<< std::setw(6) <<
178 ((double) counts.objcount / (dt > 0.0 ? dt : 1.0)) << " objects/sec).");
179 debugs(20, DBG_IMPORTANT, "Beginning Validation Procedure");
180
181 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
182
183 xfree(RebuildProgress);
184
185 RebuildProgress = NULL;
186 }
187
188 /*
189 * this is ugly. We don't actually start any rebuild threads here,
190 * but only initialize counters, etc. The rebuild threads are
191 * actually started by the filesystem "fooDirInit" function.
192 */
193 void
194 storeRebuildStart(void)
195 {
196 memset(&counts, '\0', sizeof(counts));
197 rebuild_start = current_time;
198 /*
199 * Note: store_dirs_rebuilding is initialized to 1.
200 *
201 * When we parse the configuration and construct each swap dir,
202 * the construction of that raises the rebuild count.
203 *
204 * This prevents us from trying to write clean logs until we
205 * finished rebuilding - including after a reconfiguration that opens an
206 * existing swapdir. The corresponding decrement * occurs in
207 * storeCleanup(), when it is finished.
208 */
209 RebuildProgress = (store_rebuild_progress *)xcalloc(Config.cacheSwap.n_configured,
210 sizeof(store_rebuild_progress));
211 }
212
213 /*
214 * A fs-specific rebuild procedure periodically reports its
215 * progress.
216 */
217 void
218 storeRebuildProgress(int sd_index, int total, int sofar)
219 {
220 static time_t last_report = 0;
221 double n = 0.0;
222 double d = 0.0;
223
224 if (sd_index < 0)
225 return;
226
227 if (sd_index >= Config.cacheSwap.n_configured)
228 return;
229
230 if (NULL == RebuildProgress)
231 return;
232
233 RebuildProgress[sd_index].total = total;
234
235 RebuildProgress[sd_index].scanned = sofar;
236
237 if (squid_curtime - last_report < 15)
238 return;
239
240 for (sd_index = 0; sd_index < Config.cacheSwap.n_configured; ++sd_index) {
241 n += (double) RebuildProgress[sd_index].scanned;
242 d += (double) RebuildProgress[sd_index].total;
243 }
244
245 debugs(20, DBG_IMPORTANT, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete");
246 last_report = squid_curtime;
247 }
248
249 #include "fde.h"
250 #include "Generic.h"
251 #include "StoreMeta.h"
252 #include "StoreMetaUnpacker.h"
253
254 struct InitStoreEntry : public unary_function<StoreMeta, void> {
255 InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
256
257 void operator()(StoreMeta const &x) {
258 switch (x.getType()) {
259
260 case STORE_META_KEY:
261 assert(x.length == SQUID_MD5_DIGEST_LENGTH);
262 memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
263 break;
264
265 case STORE_META_STD:
266 struct old_metahdr {
267 time_t timestamp;
268 time_t lastref;
269 time_t expires;
270 time_t lastmod;
271 size_t swap_file_sz;
272 uint16_t refcount;
273 uint16_t flags;
274 } *tmp;
275 tmp = (struct old_metahdr *)x.value;
276 assert(x.length == STORE_HDR_METASIZE_OLD);
277 what->timestamp = tmp->timestamp;
278 what->lastref = tmp->lastref;
279 what->expires = tmp->expires;
280 what->lastmod = tmp->lastmod;
281 what->swap_file_sz = tmp->swap_file_sz;
282 what->refcount = tmp->refcount;
283 what->flags = tmp->flags;
284 break;
285
286 case STORE_META_STD_LFS:
287 assert(x.length == STORE_HDR_METASIZE);
288 memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
289 break;
290
291 default:
292 break;
293 }
294 }
295
296 StoreEntry *what;
297 cache_key *index;
298 };
299
300 bool
301 storeRebuildLoadEntry(int fd, int diskIndex, MemBuf &buf, StoreRebuildData &)
302 {
303 if (fd < 0)
304 return false;
305
306 assert(buf.hasSpace()); // caller must allocate
307
308 const int len = FD_READ_METHOD(fd, buf.space(), buf.spaceSize());
309 ++ statCounter.syscalls.disk.reads;
310 if (len < 0) {
311 const int xerrno = errno;
312 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << diskIndex << "]: " <<
313 "Ignoring cached entry after meta data read failure: " << xstrerr(xerrno));
314 return false;
315 }
316
317 buf.appended(len);
318 return true;
319 }
320
321 bool
322 storeRebuildParseEntry(MemBuf &buf, StoreEntry &tmpe, cache_key *key,
323 StoreRebuildData &stats,
324 uint64_t expectedSize)
325 {
326 int swap_hdr_len = 0;
327 StoreMetaUnpacker aBuilder(buf.content(), buf.contentSize(), &swap_hdr_len);
328 if (aBuilder.isBufferZero()) {
329 debugs(47,5, HERE << "skipping empty record.");
330 return false;
331 }
332
333 if (!aBuilder.isBufferSane()) {
334 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring malformed cache entry.");
335 return false;
336 }
337
338 StoreMeta *tlv_list = aBuilder.createStoreMeta();
339 if (!tlv_list) {
340 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry with invalid " <<
341 "meta data");
342 return false;
343 }
344
345 // TODO: consume parsed metadata?
346
347 debugs(47,7, "successful swap meta unpacking; swap_file_sz=" << tmpe.swap_file_sz);
348 memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
349
350 InitStoreEntry visitor(&tmpe, key);
351 for_each(*tlv_list, visitor);
352 storeSwapTLVFree(tlv_list);
353 tlv_list = NULL;
354
355 if (storeKeyNull(key)) {
356 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring keyless cache entry");
357 return false;
358 }
359
360 tmpe.key = key;
361 /* check sizes */
362
363 if (expectedSize > 0) {
364 if (tmpe.swap_file_sz == 0) {
365 tmpe.swap_file_sz = expectedSize;
366 } else if (tmpe.swap_file_sz == (uint64_t)(expectedSize - swap_hdr_len)) {
367 tmpe.swap_file_sz = expectedSize;
368 } else if (tmpe.swap_file_sz != expectedSize) {
369 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry due to a " <<
370 "SIZE MISMATCH " << tmpe.swap_file_sz << "!=" << expectedSize);
371 return false;
372 }
373 } else if (tmpe.swap_file_sz <= 0) {
374 // if caller cannot handle unknown sizes, it must check after the call.
375 debugs(47, 7, "unknown size: " << tmpe);
376 }
377
378 if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
379 ++ stats.badflags;
380 return false;
381 }
382
383 return true;
384 }
385
386 bool
387 storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key, StoreRebuildData &stats)
388 {
389 /* this needs to become
390 * 1) unpack url
391 * 2) make synthetic request with headers ?? or otherwise search
392 * for a matching object in the store
393 * TODO FIXME change to new async api
394 * TODO FIXME I think there is a race condition here with the
395 * async api :
396 * store A reads in object foo, searchs for it, and finds nothing.
397 * store B reads in object foo, searchs for it, finds nothing.
398 * store A gets called back with nothing, so registers the object
399 * store B gets called back with nothing, so registers the object,
400 * which will conflict when the in core index gets around to scanning
401 * store B.
402 *
403 * this suggests that rather than searching for duplicates, the
404 * index rebuild should just assume its the most recent accurate
405 * store entry and whoever indexes the stores handles duplicates.
406 */
407 if (StoreEntry *e = Store::Root().get(key)) {
408
409 if (e->lastref >= tmpe.lastref) {
410 /* key already exists, old entry is newer */
411 /* keep old, ignore new */
412 ++stats.dupcount;
413
414 // For some stores, get() creates/unpacks a store entry. Signal
415 // such stores that we will no longer use the get() result:
416 e->lock("storeRebuildKeepEntry");
417 e->unlock("storeRebuildKeepEntry");
418
419 return false;
420 } else {
421 /* URL already exists, this swapfile not being used */
422 /* junk old, load new */
423 e->release(); /* release old entry */
424 ++stats.dupcount;
425 }
426 }
427
428 return true;
429 }