]> git.ipfire.org Git - thirdparty/squid.git/blob - src/store_rebuild.cc
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / store_rebuild.cc
1 /*
2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 20 Store Rebuild Routines */
10
11 #include "squid.h"
12 #include "event.h"
13 #include "globals.h"
14 #include "md5.h"
15 #include "SquidConfig.h"
16 #include "SquidTime.h"
17 #include "StatCounters.h"
18 #include "Store.h"
19 #include "store/Disk.h"
20 #include "store_digest.h"
21 #include "store_key_md5.h"
22 #include "store_rebuild.h"
23 #include "StoreSearch.h"
24 // for tvSubDsec() which should be in SquidTime.h
25 #include "util.h"
26
27 #include <cerrno>
28
29 static StoreRebuildData counts;
30
31 static struct timeval rebuild_start;
32 static void storeCleanup(void *);
33
34 typedef struct {
35 /* total number of "swap.state" entries that will be read */
36 int total;
37 /* number of entries read so far */
38 int scanned;
39 } store_rebuild_progress;
40
41 static store_rebuild_progress *RebuildProgress = NULL;
42
43 static int
44 storeCleanupDoubleCheck(StoreEntry * e)
45 {
46 SwapDir *SD = dynamic_cast<SwapDir *>(INDEXSD(e->swap_dirn));
47 return (SD->doubleCheck(*e));
48 }
49
50 static void
51 storeCleanup(void *)
52 {
53 static int store_errors = 0;
54 static StoreSearchPointer currentSearch;
55 static int validated = 0;
56 static int seen = 0;
57
58 if (currentSearch == NULL || currentSearch->isDone())
59 currentSearch = Store::Root().search();
60
61 size_t statCount = 500;
62
63 // TODO: Avoid the loop (and ENTRY_VALIDATED) unless opt_store_doublecheck.
64 while (statCount-- && !currentSearch->isDone() && currentSearch->next()) {
65 StoreEntry *e;
66
67 e = currentSearch->currentItem();
68
69 ++seen;
70
71 if (EBIT_TEST(e->flags, ENTRY_VALIDATED))
72 continue;
73
74 /*
75 * Calling StoreEntry->release() has no effect because we're
76 * still in 'store_rebuilding' state
77 */
78 if (e->swap_filen < 0)
79 continue;
80
81 if (opt_store_doublecheck)
82 if (storeCleanupDoubleCheck(e))
83 ++store_errors;
84
85 EBIT_SET(e->flags, ENTRY_VALIDATED);
86
87 /*
88 * Only set the file bit if we know its a valid entry
89 * otherwise, set it in the validation procedure
90 */
91
92 if ((++validated & 0x3FFFF) == 0)
93 /* TODO format the int with with a stream operator */
94 debugs(20, DBG_IMPORTANT, " " << validated << " Entries Validated so far.");
95 }
96
97 if (currentSearch->isDone()) {
98 debugs(20, 2, "Seen: " << seen << " entries");
99 debugs(20, DBG_IMPORTANT, " Completed Validation Procedure");
100 debugs(20, DBG_IMPORTANT, " Validated " << validated << " Entries");
101 debugs(20, DBG_IMPORTANT, " store_swap_size = " << Store::Root().currentSize() / 1024.0 << " KB");
102 --StoreController::store_dirs_rebuilding;
103 assert(0 == StoreController::store_dirs_rebuilding);
104
105 if (opt_store_doublecheck && store_errors) {
106 fatalf("Quitting after finding %d cache index inconsistencies. " \
107 "Removing cache index will force its slow rebuild. " \
108 "Removing -S will let Squid start with an inconsistent " \
109 "cache index (at your own risk).\n", store_errors);
110 }
111
112 if (store_digest)
113 storeDigestNoteStoreReady();
114
115 currentSearch = NULL;
116 } else
117 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
118 }
119
120 /* meta data recreated from disk image in swap directory */
121 void
122
123 storeRebuildComplete(StoreRebuildData *dc)
124 {
125 double dt;
126 counts.objcount += dc->objcount;
127 counts.expcount += dc->expcount;
128 counts.scancount += dc->scancount;
129 counts.clashcount += dc->clashcount;
130 counts.dupcount += dc->dupcount;
131 counts.cancelcount += dc->cancelcount;
132 counts.invalid += dc->invalid;
133 counts.badflags += dc->badflags;
134 counts.bad_log_op += dc->bad_log_op;
135 counts.zero_object_sz += dc->zero_object_sz;
136 /*
137 * When store_dirs_rebuilding == 1, it means we are done reading
138 * or scanning all cache_dirs. Now report the stats and start
139 * the validation (storeCleanup()) thread.
140 */
141
142 if (StoreController::store_dirs_rebuilding > 1)
143 return;
144
145 dt = tvSubDsec(rebuild_start, current_time);
146
147 debugs(20, DBG_IMPORTANT, "Finished rebuilding storage from disk.");
148 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.scancount << " Entries scanned");
149 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.invalid << " Invalid entries.");
150 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.badflags << " With invalid flags.");
151 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.objcount << " Objects loaded.");
152 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.expcount << " Objects expired.");
153 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.cancelcount << " Objects cancelled.");
154 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.dupcount << " Duplicate URLs purged.");
155 debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.clashcount << " Swapfile clashes avoided.");
156 debugs(20, DBG_IMPORTANT, " Took "<< std::setw(3)<< std::setprecision(2) << dt << " seconds ("<< std::setw(6) <<
157 ((double) counts.objcount / (dt > 0.0 ? dt : 1.0)) << " objects/sec).");
158 debugs(20, DBG_IMPORTANT, "Beginning Validation Procedure");
159
160 eventAdd("storeCleanup", storeCleanup, NULL, 0.0, 1);
161
162 xfree(RebuildProgress);
163
164 RebuildProgress = NULL;
165 }
166
167 /*
168 * this is ugly. We don't actually start any rebuild threads here,
169 * but only initialize counters, etc. The rebuild threads are
170 * actually started by the filesystem "fooDirInit" function.
171 */
172 void
173 storeRebuildStart(void)
174 {
175 memset(&counts, '\0', sizeof(counts));
176 rebuild_start = current_time;
177 /*
178 * Note: store_dirs_rebuilding is initialized to 1.
179 *
180 * When we parse the configuration and construct each swap dir,
181 * the construction of that raises the rebuild count.
182 *
183 * This prevents us from trying to write clean logs until we
184 * finished rebuilding - including after a reconfiguration that opens an
185 * existing swapdir. The corresponding decrement * occurs in
186 * storeCleanup(), when it is finished.
187 */
188 RebuildProgress = (store_rebuild_progress *)xcalloc(Config.cacheSwap.n_configured,
189 sizeof(store_rebuild_progress));
190 }
191
192 /*
193 * A fs-specific rebuild procedure periodically reports its
194 * progress.
195 */
196 void
197 storeRebuildProgress(int sd_index, int total, int sofar)
198 {
199 static time_t last_report = 0;
200 double n = 0.0;
201 double d = 0.0;
202
203 if (sd_index < 0)
204 return;
205
206 if (sd_index >= Config.cacheSwap.n_configured)
207 return;
208
209 if (NULL == RebuildProgress)
210 return;
211
212 RebuildProgress[sd_index].total = total;
213
214 RebuildProgress[sd_index].scanned = sofar;
215
216 if (squid_curtime - last_report < 15)
217 return;
218
219 for (sd_index = 0; sd_index < Config.cacheSwap.n_configured; ++sd_index) {
220 n += (double) RebuildProgress[sd_index].scanned;
221 d += (double) RebuildProgress[sd_index].total;
222 }
223
224 debugs(20, DBG_IMPORTANT, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete");
225 last_report = squid_curtime;
226 }
227
228 #include "fde.h"
229 #include "Generic.h"
230 #include "StoreMeta.h"
231 #include "StoreMetaUnpacker.h"
232
233 struct InitStoreEntry : public unary_function<StoreMeta, void> {
234 InitStoreEntry(StoreEntry *anEntry, cache_key *aKey):what(anEntry),index(aKey) {}
235
236 void operator()(StoreMeta const &x) {
237 switch (x.getType()) {
238
239 case STORE_META_KEY:
240 assert(x.length == SQUID_MD5_DIGEST_LENGTH);
241 memcpy(index, x.value, SQUID_MD5_DIGEST_LENGTH);
242 break;
243
244 case STORE_META_STD:
245 struct old_metahdr {
246 time_t timestamp;
247 time_t lastref;
248 time_t expires;
249 time_t lastmod;
250 size_t swap_file_sz;
251 uint16_t refcount;
252 uint16_t flags;
253 } *tmp;
254 tmp = (struct old_metahdr *)x.value;
255 assert(x.length == STORE_HDR_METASIZE_OLD);
256 what->timestamp = tmp->timestamp;
257 what->lastref = tmp->lastref;
258 what->expires = tmp->expires;
259 what->lastModified(tmp->lastmod);
260 what->swap_file_sz = tmp->swap_file_sz;
261 what->refcount = tmp->refcount;
262 what->flags = tmp->flags;
263 break;
264
265 case STORE_META_STD_LFS:
266 assert(x.length == STORE_HDR_METASIZE);
267 memcpy(&what->timestamp, x.value, STORE_HDR_METASIZE);
268 break;
269
270 default:
271 break;
272 }
273 }
274
275 StoreEntry *what;
276 cache_key *index;
277 };
278
279 bool
280 storeRebuildLoadEntry(int fd, int diskIndex, MemBuf &buf, StoreRebuildData &)
281 {
282 if (fd < 0)
283 return false;
284
285 assert(buf.hasSpace()); // caller must allocate
286
287 const int len = FD_READ_METHOD(fd, buf.space(), buf.spaceSize());
288 ++ statCounter.syscalls.disk.reads;
289 if (len < 0) {
290 const int xerrno = errno;
291 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << diskIndex << "]: " <<
292 "Ignoring cached entry after meta data read failure: " << xstrerr(xerrno));
293 return false;
294 }
295
296 buf.appended(len);
297 return true;
298 }
299
300 bool
301 storeRebuildParseEntry(MemBuf &buf, StoreEntry &tmpe, cache_key *key,
302 StoreRebuildData &stats,
303 uint64_t expectedSize)
304 {
305 int swap_hdr_len = 0;
306 StoreMetaUnpacker aBuilder(buf.content(), buf.contentSize(), &swap_hdr_len);
307 if (aBuilder.isBufferZero()) {
308 debugs(47,5, HERE << "skipping empty record.");
309 return false;
310 }
311
312 StoreMeta *tlv_list = nullptr;
313 try {
314 tlv_list = aBuilder.createStoreMeta();
315 } catch (const std::exception &e) {
316 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring store entry because " << e.what());
317 return false;
318 }
319 assert(tlv_list);
320
321 // TODO: consume parsed metadata?
322
323 debugs(47,7, "successful swap meta unpacking; swap_file_sz=" << tmpe.swap_file_sz);
324 memset(key, '\0', SQUID_MD5_DIGEST_LENGTH);
325
326 InitStoreEntry visitor(&tmpe, key);
327 for_each(*tlv_list, visitor);
328 storeSwapTLVFree(tlv_list);
329 tlv_list = NULL;
330
331 if (storeKeyNull(key)) {
332 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring keyless cache entry");
333 return false;
334 }
335
336 tmpe.key = key;
337 /* check sizes */
338
339 if (expectedSize > 0) {
340 if (tmpe.swap_file_sz == 0) {
341 tmpe.swap_file_sz = expectedSize;
342 } else if (tmpe.swap_file_sz == (uint64_t)(expectedSize - swap_hdr_len)) {
343 tmpe.swap_file_sz = expectedSize;
344 } else if (tmpe.swap_file_sz != expectedSize) {
345 debugs(47, DBG_IMPORTANT, "WARNING: Ignoring cache entry due to a " <<
346 "SIZE MISMATCH " << tmpe.swap_file_sz << "!=" << expectedSize);
347 return false;
348 }
349 } else if (tmpe.swap_file_sz <= 0) {
350 // if caller cannot handle unknown sizes, it must check after the call.
351 debugs(47, 7, "unknown size: " << tmpe);
352 }
353
354 if (EBIT_TEST(tmpe.flags, KEY_PRIVATE)) {
355 ++ stats.badflags;
356 return false;
357 }
358
359 return true;
360 }
361
362 bool
363 storeRebuildKeepEntry(const StoreEntry &tmpe, const cache_key *key, StoreRebuildData &stats)
364 {
365 /* this needs to become
366 * 1) unpack url
367 * 2) make synthetic request with headers ?? or otherwise search
368 * for a matching object in the store
369 * TODO FIXME change to new async api
370 * TODO FIXME I think there is a race condition here with the
371 * async api :
372 * store A reads in object foo, searchs for it, and finds nothing.
373 * store B reads in object foo, searchs for it, finds nothing.
374 * store A gets called back with nothing, so registers the object
375 * store B gets called back with nothing, so registers the object,
376 * which will conflict when the in core index gets around to scanning
377 * store B.
378 *
379 * this suggests that rather than searching for duplicates, the
380 * index rebuild should just assume its the most recent accurate
381 * store entry and whoever indexes the stores handles duplicates.
382 */
383 if (StoreEntry *e = Store::Root().get(key)) {
384
385 if (e->lastref >= tmpe.lastref) {
386 /* key already exists, old entry is newer */
387 /* keep old, ignore new */
388 ++stats.dupcount;
389
390 // For some stores, get() creates/unpacks a store entry. Signal
391 // such stores that we will no longer use the get() result:
392 e->lock("storeRebuildKeepEntry");
393 e->unlock("storeRebuildKeepEntry");
394
395 return false;
396 } else {
397 /* URL already exists, this swapfile not being used */
398 /* junk old, load new */
399 e->release(); /* release old entry */
400 ++stats.dupcount;
401 }
402 }
403
404 return true;
405 }
406