]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs/rock/RockRebuild.cc
Source Format Enforcement (#1234)
[thirdparty/squid.git] / src / fs / rock / RockRebuild.cc
CommitLineData
e2851fe7 1/*
b8ae064d 2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
e2851fe7
AR
7 */
8
bbc27441
AJ
9/* DEBUG: section 79 Disk IO Routines */
10
f7f3304a 11#include "squid.h"
2745fea5 12#include "base/AsyncJobCalls.h"
675b8408 13#include "debug/Messages.h"
602d9612 14#include "fs/rock/RockDbCell.h"
e2851fe7
AR
15#include "fs/rock/RockRebuild.h"
16#include "fs/rock/RockSwapDir.h"
b3f7fd88 17#include "fs_io.h"
67679543 18#include "globals.h"
582c2af2 19#include "md5.h"
8ecbe78d 20#include "sbuf/Stream.h"
2745fea5 21#include "Store.h"
602d9612 22#include "tools.h"
e2851fe7 23
d448e1eb 24#include <array>
1a30fdf5 25#include <cerrno>
d448e1eb 26#include <cstring>
21d845b1 27
e2851fe7
AR
28CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
29
e4d13993
AR
30/**
31 \defgroup RockFsRebuild Rock Store Rebuild
32 \ingroup Filesystems
33 *
f439fbd2 34 \section RockFsRebuildOverview Overview
e4d13993
AR
35 * Several layers of information are manipualted during the rebuild:
36 \par
37 * Store Entry: Response message plus all the metainformation associated with
38 * it. Identified by store key. At any given time, from Squid point
39 * of view, there is only one entry with a given key, but several
40 * different entries with the same key can be observed in any historical
41 * archive (such as an access log or a store database).
42 \par
43 * Slot chain: A sequence of db slots representing a Store Entry state at
44 * some point in time. Identified by key+version combination. Due to
45 * transaction aborts, crashes, and idle periods, some chains may contain
46 * incomplete or stale information. We assume that no two different chains
47 * have the same key and version. If that assumption fails, we may serve a
48 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
49 \par
abf396ec
AR
50 * iNode: The very first db slot in an entry slot chain. This slot contains
51 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
52 * the entire metadata, HTTP headers, and HTTP body.
53 \par
e4d13993
AR
54 * Db slot: A db record containing a piece of a single store entry and linked
55 * to other slots with the same key and version fields, forming a chain.
56 * Slots are identified by their absolute position in the database file,
57 * which is naturally unique.
58 \par
e4d13993
AR
59 * When information from the newly loaded db slot contradicts the entry-level
60 * information collected so far (e.g., the versions do not match or the total
61 * chain size after the slot contribution exceeds the expected number), the
62 * whole entry (and not just the chain or the slot!) is declared corrupted.
63 \par
64 * Why invalidate the whole entry? Rock Store is written for high-load
65 * environments with large caches, where there is usually very few idle slots
66 * in the database. A space occupied by a purged entry is usually immediately
67 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
68 * leave a relatively large number of stale slots in the database. Thus, the
69 * number of potentially corrupted entries is relatively small. On the other
70 * hand, the damage from serving a single hadgepodge entry may be significant
71 * to the user. In such an environment, invalidating the whole entry has
72 * negligible performance impact but saves us from high-damage bugs.
73 */
74
9d4e9cfb
AR
75namespace Rock
76{
50dc81ec 77
8ecbe78d
EB
78static bool
79DoneLoading(const int64_t loadingPos, const int64_t dbSlotLimit)
80{
81 return loadingPos >= dbSlotLimit;
82}
83
84static bool
85DoneValidating(const int64_t validationPos, const int64_t dbSlotLimit, const int64_t dbEntryLimit)
86{
87 // paranoid slot checking is only enabled with squid -S
88 const auto extraWork = opt_store_doublecheck ? dbSlotLimit : 0;
89 return validationPos >= (dbEntryLimit + extraWork);
90}
91
abf396ec
AR
92/// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
93class LoadingFlags
9d4e9cfb 94{
50dc81ec 95public:
abf396ec 96 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
50dc81ec 97
abf396ec
AR
98 /* for LoadingEntry */
99 uint8_t state:3; ///< current entry state (one of the LoadingEntry::State values)
36c84e19 100 uint8_t anchored:1; ///< whether we loaded the inode slot for this entry
50dc81ec 101
abf396ec
AR
102 /* for LoadingSlot */
103 uint8_t mapped:1; ///< whether the slot was added to a mapped entry
104 uint8_t finalized:1; ///< whether finalizeOrThrow() has scanned the slot
105 uint8_t freed:1; ///< whether the slot was given to the map as free space
106};
107
108/// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
109class LoadingEntry
110{
111public:
112 LoadingEntry(const sfileno fileNo, LoadingParts &source);
113
114 uint64_t &size; ///< payload seen so far
115 uint32_t &version; ///< DbCellHeader::version to distinguish same-URL chains
50dc81ec 116
abf396ec 117 /// possible store entry states during index rebuild
50dc81ec 118 typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State;
abf396ec
AR
119
120 /* LoadingFlags::state */
121 State state() const { return static_cast<State>(flags.state); }
122 void state(State aState) const { flags.state = aState; }
123
124 /* LoadingFlags::anchored */
125 bool anchored() const { return flags.anchored; }
126 void anchored(const bool beAnchored) { flags.anchored = beAnchored; }
127
128private:
129 LoadingFlags &flags; ///< entry flags (see the above accessors) are ours
130};
131
132/// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
133class LoadingSlot
134{
135public:
136 LoadingSlot(const SlotId slotId, LoadingParts &source);
137
138 /// another slot in some chain belonging to the same entry (unordered!)
139 Ipc::StoreMapSliceId &more;
140
141 /* LoadingFlags::mapped */
142 bool mapped() const { return flags.mapped; }
143 void mapped(const bool beMapped) { flags.mapped = beMapped; }
144
145 /* LoadingFlags::finalized */
146 bool finalized() const { return flags.finalized; }
147 void finalized(const bool beFinalized) { flags.finalized = beFinalized; }
148
149 /* LoadingFlags::freed */
150 bool freed() const { return flags.freed; }
151 void freed(const bool beFreed) { flags.freed = beFreed; }
152
153 bool used() const { return freed() || mapped() || more != -1; }
154
155private:
156 LoadingFlags &flags; ///< slot flags (see the above accessors) are ours
157};
158
159/// information about store entries being loaded from disk (and their slots)
160/// used for identifying partially stored/loaded entries
161class LoadingParts
162{
163public:
8ecbe78d
EB
164 using Sizes = Ipc::StoreMapItems<uint64_t>;
165 using Versions = Ipc::StoreMapItems<uint32_t>;
166 using Mores = Ipc::StoreMapItems<Ipc::StoreMapSliceId>;
167 using Flags = Ipc::StoreMapItems<LoadingFlags>;
abf396ec 168
8ecbe78d
EB
169 LoadingParts(const SwapDir &dir, const bool resuming);
170 ~LoadingParts();
abf396ec 171
8ecbe78d
EB
172 // lacking copying/moving code and often too huge to copy
173 LoadingParts(LoadingParts&&) = delete;
174
175 Sizes &sizes() const { return *sizesOwner->object(); }
176 Versions &versions() const { return *versionsOwner->object(); }
177 Mores &mores() const { return *moresOwner->object(); }
178 Flags &flags() const { return *flagsOwner->object(); }
179
180private:
abf396ec
AR
181 /* Anti-padding storage. With millions of entries, padding matters! */
182
183 /* indexed by sfileno */
8ecbe78d
EB
184 Sizes::Owner *sizesOwner; ///< LoadingEntry::size for all entries
185 Versions::Owner *versionsOwner; ///< LoadingEntry::version for all entries
abf396ec
AR
186
187 /* indexed by SlotId */
8ecbe78d 188 Mores::Owner *moresOwner; ///< LoadingSlot::more for all slots
abf396ec
AR
189
190 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
8ecbe78d 191 Flags::Owner *flagsOwner; ///< all LoadingEntry and LoadingSlot flags
50dc81ec
AR
192};
193
194} /* namespace Rock */
195
abf396ec
AR
196/* LoadingEntry */
197
198Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source):
8ecbe78d
EB
199 size(source.sizes().at(fileNo)),
200 version(source.versions().at(fileNo)),
201 flags(source.flags().at(fileNo))
abf396ec
AR
202{
203}
204
205/* LoadingSlot */
206
207Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source):
8ecbe78d
EB
208 more(source.mores().at(slotId)),
209 flags(source.flags().at(slotId))
abf396ec
AR
210{
211}
212
213/* LoadingParts */
214
8ecbe78d
EB
215template <class T>
216inline typename T::Owner *
217createOwner(const char *dirPath, const char *sfx, const int64_t limit, const bool resuming)
218{
219 auto id = Ipc::Mem::Segment::Name(SBuf(dirPath), sfx);
220 return resuming ? Ipc::Mem::Owner<T>::Old(id.c_str()) : shm_new(T)(id.c_str(), limit);
221}
222
223Rock::LoadingParts::LoadingParts(const SwapDir &dir, const bool resuming):
224 sizesOwner(createOwner<Sizes>(dir.path, "rebuild_sizes", dir.entryLimitActual(), resuming)),
225 versionsOwner(createOwner<Versions>(dir.path, "rebuild_versions", dir.entryLimitActual(), resuming)),
226 moresOwner(createOwner<Mores>(dir.path, "rebuild_mores", dir.slotLimitActual(), resuming)),
227 flagsOwner(createOwner<Flags>(dir.path, "rebuild_flags", dir.slotLimitActual(), resuming))
abf396ec 228{
8ecbe78d
EB
229 assert(sizes().capacity == versions().capacity); // every entry has both fields
230 assert(sizes().capacity <= mores().capacity); // every entry needs slot(s)
231 assert(mores().capacity == flags().capacity); // every slot needs a set of flags
232
233 if (!resuming) {
234 // other parts rely on shared memory segments being zero-initialized
235 // TODO: refactor the next slot pointer to use 0 for nil values
236 mores().fill(-1);
237 }
238}
239
240Rock::LoadingParts::~LoadingParts()
241{
242 delete sizesOwner;
243 delete versionsOwner;
244 delete moresOwner;
245 delete flagsOwner;
246}
247
248/* Rock::Rebuild::Stats */
249
250SBuf
251Rock::Rebuild::Stats::Path(const char *dirPath)
252{
253 return Ipc::Mem::Segment::Name(SBuf(dirPath), "rebuild_stats");
254}
255
256Ipc::Mem::Owner<Rock::Rebuild::Stats>*
257Rock::Rebuild::Stats::Init(const SwapDir &dir)
258{
259 return shm_new(Stats)(Path(dir.path).c_str());
260}
261
262bool
b4bae09e 263Rock::Rebuild::Stats::completed(const SwapDir &dir) const
8ecbe78d 264{
b4bae09e
JR
265 return DoneLoading(counts.scancount, dir.slotLimitActual()) &&
266 DoneValidating(counts.validations, dir.slotLimitActual(), dir.entryLimitActual());
abf396ec
AR
267}
268
269/* Rebuild */
270
8ecbe78d 271bool
8b082ed9 272Rock::Rebuild::IsResponsible(const SwapDir &)
8ecbe78d
EB
273{
274 // in SMP mode, only the disker is responsible for populating the map
275 return !UsingSmp() || IamDiskProcess();
276}
277
278bool
279Rock::Rebuild::Start(SwapDir &dir)
280{
281 if (!IsResponsible(dir)) {
282 debugs(47, 2, "not responsible for indexing cache_dir #" <<
283 dir.index << " from " << dir.filePath);
284 return false;
285 }
286
287 const auto stats = shm_old(Rebuild::Stats)(Stats::Path(dir.path).c_str());
288 if (stats->completed(dir)) {
289 debugs(47, 2, "already indexed cache_dir #" <<
290 dir.index << " from " << dir.filePath);
291 return false;
292 }
293
2b6b1bcb 294 AsyncJob::Start(new Rebuild(&dir, stats));
8ecbe78d
EB
295 return true;
296}
297
298Rock::Rebuild::Rebuild(SwapDir *dir, const Ipc::Mem::Pointer<Stats> &s): AsyncJob("Rock::Rebuild"),
f53969cc 299 sd(dir),
abf396ec 300 parts(nullptr),
8ecbe78d 301 stats(s),
f53969cc
SM
302 dbSize(0),
303 dbSlotSize(0),
304 dbSlotLimit(0),
305 dbEntryLimit(0),
306 fd(-1),
307 dbOffset(0),
8ecbe78d
EB
308 loadingPos(stats->counts.scancount),
309 validationPos(stats->counts.validations),
310 counts(stats->counts),
311 resuming(stats->counts.started())
e2851fe7
AR
312{
313 assert(sd);
e2851fe7 314 dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
36c84e19
AR
315 dbSlotSize = sd->slotSize;
316 dbEntryLimit = sd->entryLimitActual();
317 dbSlotLimit = sd->slotLimitActual();
318 assert(dbEntryLimit <= dbSlotLimit);
8ecbe78d 319 registerRunner();
e2851fe7
AR
320}
321
322Rock::Rebuild::~Rebuild()
323{
324 if (fd >= 0)
325 file_close(fd);
8ecbe78d
EB
326 // normally, segments are used until the Squid instance quits,
327 // but these indexing-only segments are no longer needed
abf396ec 328 delete parts;
e2851fe7
AR
329}
330
8ecbe78d
EB
331void
332Rock::Rebuild::startShutdown()
333{
334 mustStop("startShutdown");
335}
336
e2851fe7
AR
337/// prepares and initiates entry loading sequence
338void
9199139f
AR
339Rock::Rebuild::start()
340{
8ecbe78d 341 assert(IsResponsible(*sd));
078274f6 342
8ecbe78d 343 if (!resuming) {
c59baaa8 344 debugs(47, Important(18), "Loading cache_dir #" << sd->index <<
8ecbe78d
EB
345 " from " << sd->filePath);
346 } else {
c59baaa8 347 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd->index <<
70ac5b29 348 " from " << sd->filePath << ':' << progressDescription());
8ecbe78d 349 }
e2851fe7
AR
350
351 fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
352 if (fd < 0)
353 failure("cannot open db", errno);
354
50dc81ec
AR
355 char hdrBuf[SwapDir::HeaderSize];
356 if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize)
e2851fe7
AR
357 failure("cannot read db header", errno);
358
50dc81ec
AR
359 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
360 assert(sizeof(DbCellHeader) < SM_PAGE_SIZE);
361 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
362
8ecbe78d 363 dbOffset = SwapDir::HeaderSize + loadingPos * dbSlotSize;
50dc81ec 364
8ecbe78d
EB
365 assert(!parts);
366 parts = new LoadingParts(*sd, resuming);
367
368 counts.updateStartTime(current_time);
e2851fe7
AR
369
370 checkpoint();
371}
372
078274f6 373/// continues after a pause if not done
e2851fe7
AR
374void
375Rock::Rebuild::checkpoint()
376{
50dc81ec 377 if (!done())
e2851fe7 378 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
078274f6
AR
379}
380
abf396ec
AR
381bool
382Rock::Rebuild::doneLoading() const
383{
8ecbe78d 384 return DoneLoading(loadingPos, dbSlotLimit);
abf396ec
AR
385}
386
387bool
388Rock::Rebuild::doneValidating() const
389{
8ecbe78d 390 return DoneValidating(validationPos, dbSlotLimit, dbEntryLimit);
abf396ec
AR
391}
392
078274f6
AR
393bool
394Rock::Rebuild::doneAll() const
395{
abf396ec 396 return doneLoading() && doneValidating() && AsyncJob::doneAll();
e2851fe7
AR
397}
398
399void
400Rock::Rebuild::Steps(void *data)
401{
078274f6
AR
402 // use async call to enable job call protection that time events lack
403 CallJobHere(47, 5, static_cast<Rebuild*>(data), Rock::Rebuild, steps);
e2851fe7
AR
404}
405
93910d5c 406void
50dc81ec 407Rock::Rebuild::steps()
93910d5c 408{
abf396ec 409 if (!doneLoading())
50dc81ec
AR
410 loadingSteps();
411 else
412 validationSteps();
413
414 checkpoint();
93910d5c
AR
415}
416
e2851fe7 417void
50dc81ec 418Rock::Rebuild::loadingSteps()
9199139f 419{
539283df 420 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 421 dbOffset << " <= " << dbSize);
e2851fe7 422
386d28bf 423 // Balance our desire to maximize the number of entries processed at once
9199139f 424 // (and, hence, minimize overheads and total rebuild time) with a
386d28bf
AR
425 // requirement to also process Coordinator events, disk I/Os, etc.
426 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
427 const timeval loopStart = current_time;
428
8ecbe78d 429 int64_t loaded = 0;
abf396ec 430 while (!doneLoading()) {
50dc81ec 431 loadOneSlot();
36c84e19 432 dbOffset += dbSlotSize;
6d68a230 433 ++loadingPos;
386d28bf 434 ++loaded;
e2851fe7
AR
435
436 if (counts.scancount % 1000 == 0)
36c84e19 437 storeRebuildProgress(sd->index, dbSlotLimit, counts.scancount);
386d28bf
AR
438
439 if (opt_foreground_rebuild)
440 continue; // skip "few entries at a time" check below
441
442 getCurrentTime();
443 const double elapsedMsec = tvSubMsec(loopStart, current_time);
444 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
bf95c10a 445 debugs(47, 5, "pausing after " << loaded << " entries in " <<
9199139f 446 elapsedMsec << "ms; " << (elapsedMsec/loaded) << "ms per entry");
386d28bf
AR
447 break;
448 }
449 }
e2851fe7
AR
450}
451
abf396ec
AR
452Rock::LoadingEntry
453Rock::Rebuild::loadingEntry(const sfileno fileNo)
454{
455 Must(0 <= fileNo && fileNo < dbEntryLimit);
456 return LoadingEntry(fileNo, *parts);
457}
458
459Rock::LoadingSlot
460Rock::Rebuild::loadingSlot(const SlotId slotId)
461{
462 Must(0 <= slotId && slotId < dbSlotLimit);
463 Must(slotId <= loadingPos); // cannot look ahead
464 return LoadingSlot(slotId, *parts);
465}
466
93910d5c 467void
50dc81ec 468Rock::Rebuild::loadOneSlot()
93910d5c 469{
539283df 470 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 471 dbOffset << " <= " << dbSize);
e2851fe7 472
8ecbe78d
EB
473 // increment before loadingPos to avoid getting stuck at a slot
474 // in a case of crash
c728b6f9
AR
475 ++counts.scancount;
476
e2851fe7
AR
477 if (lseek(fd, dbOffset, SEEK_SET) < 0)
478 failure("cannot seek to db entry", errno);
479
50dc81ec 480 buf.reset();
c728b6f9
AR
481
482 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
483 return;
484
6d68a230 485 const SlotId slotId = loadingPos;
93910d5c 486
50dc81ec
AR
487 // get our header
488 DbCellHeader header;
c728b6f9 489 if (buf.contentSize() < static_cast<mb_size_t>(sizeof(header))) {
51618c6a 490 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
ce44c1ea
AR
491 "Ignoring truncated " << buf.contentSize() << "-byte " <<
492 "cache entry meta data at " << dbOffset);
abf396ec 493 freeUnusedSlot(slotId, true);
c728b6f9
AR
494 return;
495 }
50dc81ec
AR
496 memcpy(&header, buf.content(), sizeof(header));
497 if (header.empty()) {
abf396ec 498 freeUnusedSlot(slotId, false);
50dc81ec
AR
499 return;
500 }
36c84e19 501 if (!header.sane(dbSlotSize, dbSlotLimit)) {
51618c6a 502 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
9199139f 503 "Ignoring malformed cache entry meta data at " << dbOffset);
abf396ec 504 freeUnusedSlot(slotId, true);
e2851fe7 505 return;
9199139f 506 }
50dc81ec
AR
507 buf.consume(sizeof(header)); // optimize to avoid memmove()
508
509 useNewSlot(slotId, header);
510}
511
d448e1eb
EB
512/// whether the given slot buffer is likely to have nothing but zeros, as is
513/// common to slots in pre-initialized (with zeros) db files
514static bool
515ZeroedSlot(const MemBuf &buf)
516{
517 // We could memcmp the entire buffer, but it is probably safe enough to test
518 // a few bytes because even if we do not detect a corrupted entry, it is not
519 // a big deal: Store::UnpackPrefix() rejects all-0s metadata prefix.
520 static const std::array<char, 10> zeros = {};
521
522 if (static_cast<size_t>(buf.contentSize()) < zeros.size())
523 return false; // cannot be sure enough
524
525 return memcmp(buf.content(), zeros.data(), zeros.size()) == 0;
526}
527
50dc81ec
AR
528/// parse StoreEntry basics and add them to the map, returning true on success
529bool
530Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header)
531{
532 cache_key key[SQUID_MD5_DIGEST_LENGTH];
533 StoreEntry loadedE;
50dc81ec 534 const uint64_t knownSize = header.entrySize > 0 ?
d2b13bab 535 header.entrySize : anchor.basics.swap_file_sz.load();
d448e1eb
EB
536
537 if (ZeroedSlot(buf))
538 return false;
539
ce44c1ea
AR
540 if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize))
541 return false;
542
abf396ec 543 // the entry size may be unknown, but if it is known, it is authoritative
50dc81ec 544
ce44c1ea 545 debugs(47, 8, "importing basics for entry " << fileno <<
abf396ec 546 " inode.entrySize: " << header.entrySize <<
ce44c1ea 547 " swap_file_sz: " << loadedE.swap_file_sz);
50dc81ec
AR
548 anchor.set(loadedE);
549
550 // we have not validated whether all db cells for this entry were loaded
551 EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED);
552
553 // loadedE->dump(5);
554
555 return true;
93910d5c 556}
e2851fe7 557
93910d5c 558void
50dc81ec 559Rock::Rebuild::validationSteps()
93910d5c 560{
50dc81ec 561 debugs(47, 5, sd->index << " validating from " << validationPos);
93910d5c 562
50dc81ec
AR
563 // see loadingSteps() for the rationale; TODO: avoid duplication
564 const int maxSpentMsec = 50; // keep small: validation does not do I/O
565 const timeval loopStart = current_time;
e2851fe7 566
8ecbe78d 567 int64_t validated = 0;
abf396ec 568 while (!doneValidating()) {
8ecbe78d
EB
569 // increment before validationPos to avoid getting stuck at a slot
570 // in a case of crash
571 ++counts.validations;
abf396ec
AR
572 if (validationPos < dbEntryLimit)
573 validateOneEntry(validationPos);
574 else
575 validateOneSlot(validationPos - dbEntryLimit);
50dc81ec
AR
576 ++validationPos;
577 ++validated;
93910d5c 578
50dc81ec
AR
579 if (validationPos % 1000 == 0)
580 debugs(20, 2, "validated: " << validationPos);
e2851fe7 581
50dc81ec
AR
582 if (opt_foreground_rebuild)
583 continue; // skip "few entries at a time" check below
584
585 getCurrentTime();
586 const double elapsedMsec = tvSubMsec(loopStart, current_time);
587 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
588 debugs(47, 5, "pausing after " << validated << " entries in " <<
589 elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry");
590 break;
591 }
592 }
593}
594
abf396ec
AR
595/// Either make the entry accessible to all or throw.
596/// This method assumes it is called only when no more entry slots are expected.
597void
598Rock::Rebuild::finalizeOrThrow(const sfileno fileNo, LoadingEntry &le)
599{
600 // walk all map-linked slots, starting from inode, and mark each
601 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileNo);
602 Must(le.size > 0); // paranoid
603 uint64_t mappedSize = 0;
604 SlotId slotId = anchor.start;
605 while (slotId >= 0 && mappedSize < le.size) {
606 LoadingSlot slot = loadingSlot(slotId); // throws if we have not loaded that slot
607 Must(!slot.finalized()); // no loops or stealing from other entries
608 Must(slot.mapped()); // all our slots should be in the sd->map
609 Must(!slot.freed()); // all our slots should still be present
610 slot.finalized(true);
611
612 Ipc::StoreMapSlice &mapSlice = sd->map->writeableSlice(fileNo, slotId);
613 Must(mapSlice.size > 0); // paranoid
614 mappedSize += mapSlice.size;
615 slotId = mapSlice.next;
616 }
617 /* no hodgepodge entries: one entry - one full chain and no leftovers */
618 Must(slotId < 0);
619 Must(mappedSize == le.size);
620
621 if (!anchor.basics.swap_file_sz)
622 anchor.basics.swap_file_sz = le.size;
623 EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED);
624 le.state(LoadingEntry::leLoaded);
8253d451 625 sd->map->closeForWriting(fileNo);
abf396ec
AR
626 ++counts.objcount;
627}
628
629/// Either make the entry accessible to all or free it.
630/// This method must only be called when no more entry slots are expected.
50dc81ec 631void
abf396ec 632Rock::Rebuild::finalizeOrFree(const sfileno fileNo, LoadingEntry &le)
50dc81ec 633{
abf396ec
AR
634 try {
635 finalizeOrThrow(fileNo, le);
636 } catch (const std::exception &ex) {
637 freeBadEntry(fileNo, ex.what());
638 }
639}
50dc81ec 640
abf396ec
AR
641void
642Rock::Rebuild::validateOneEntry(const sfileno fileNo)
643{
644 LoadingEntry entry = loadingEntry(fileNo);
645 switch (entry.state()) {
50dc81ec
AR
646
647 case LoadingEntry::leLoading:
abf396ec 648 finalizeOrFree(fileNo, entry);
50dc81ec
AR
649 break;
650
abf396ec
AR
651 case LoadingEntry::leEmpty: // no entry hashed to this position
652 case LoadingEntry::leLoaded: // we have already unlocked this entry
653 case LoadingEntry::leCorrupted: // we have already removed this entry
654 case LoadingEntry::leIgnored: // we have already discarded this entry
655 break;
50dc81ec
AR
656 }
657}
658
abf396ec
AR
659void
660Rock::Rebuild::validateOneSlot(const SlotId slotId)
661{
662 const LoadingSlot slot = loadingSlot(slotId);
663 // there should not be any unprocessed slots left
664 Must(slot.freed() || (slot.mapped() && slot.finalized()));
665}
666
50dc81ec
AR
667/// Marks remaining bad entry slots as free and unlocks the entry. The map
668/// cannot do this because Loading entries may have holes in the slots chain.
669void
670Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription)
671{
672 debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription <<
673 " entry " << fileno << " is ignored during rebuild");
674
abf396ec
AR
675 LoadingEntry le = loadingEntry(fileno);
676 le.state(LoadingEntry::leCorrupted);
50dc81ec 677
abf396ec
AR
678 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
679 assert(anchor.start < 0 || le.size > 0);
680 for (SlotId slotId = anchor.start; slotId >= 0;) {
681 const SlotId next = loadingSlot(slotId).more;
682 freeSlot(slotId, true);
50dc81ec 683 slotId = next;
50dc81ec 684 }
50dc81ec
AR
685
686 sd->map->forgetWritingEntry(fileno);
e2851fe7
AR
687}
688
689void
9199139f
AR
690Rock::Rebuild::swanSong()
691{
bf95c10a 692 debugs(47,3, "cache_dir #" << sd->index << " rebuild level: " <<
9199139f 693 StoreController::store_dirs_rebuilding);
e2851fe7 694 storeRebuildComplete(&counts);
e2851fe7
AR
695}
696
697void
9199139f
AR
698Rock::Rebuild::failure(const char *msg, int errNo)
699{
539283df 700 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 701 dbOffset << " <= " << dbSize);
e2851fe7
AR
702
703 if (errNo)
f5adb654
AR
704 debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo));
705 debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?");
e2851fe7
AR
706
707 assert(sd);
708 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
9199139f 709 sd->index, sd->filePath, msg);
e2851fe7 710}
93910d5c 711
50dc81ec
AR
712/// adds slot to the free slot index
713void
714Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid)
93910d5c 715{
50dc81ec 716 debugs(47,5, sd->index << " frees slot " << slotId);
abf396ec
AR
717 LoadingSlot slot = loadingSlot(slotId);
718 assert(!slot.freed());
719 slot.freed(true);
50dc81ec
AR
720
721 if (invalid) {
722 ++counts.invalid;
723 //sd->unlink(fileno); leave garbage on disk, it should not hurt
724 }
725
726 Ipc::Mem::PageId pageId;
1fe7f70f 727 pageId.pool = Ipc::Mem::PageStack::IdForSwapDirSpace(sd->index);
50dc81ec
AR
728 pageId.number = slotId+1;
729 sd->freeSlots->push(pageId);
730}
731
abf396ec 732/// freeSlot() for never-been-mapped slots
50dc81ec 733void
abf396ec 734Rock::Rebuild::freeUnusedSlot(const SlotId slotId, const bool invalid)
50dc81ec 735{
abf396ec 736 LoadingSlot slot = loadingSlot(slotId);
50dc81ec 737 // mapped slots must be freed via freeBadEntry() to keep the map in sync
abf396ec
AR
738 assert(!slot.mapped());
739 freeSlot(slotId, invalid);
50dc81ec
AR
740}
741
742/// adds slot to the entry chain in the map
743void
744Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header)
745{
abf396ec
AR
746 LoadingSlot slot = loadingSlot(slotId);
747 assert(!slot.mapped());
748 assert(!slot.freed());
749 slot.mapped(true);
50dc81ec
AR
750
751 Ipc::StoreMapSlice slice;
752 slice.next = header.nextSlot;
753 slice.size = header.payloadSize;
754 sd->map->importSlice(slotId, slice);
755}
756
abf396ec
AR
757template <class SlotIdType> // accommodates atomic and simple SlotIds.
758void
759Rock::Rebuild::chainSlots(SlotIdType &from, const SlotId to)
760{
761 LoadingSlot slot = loadingSlot(to);
762 assert(slot.more < 0);
763 slot.more = from; // may still be unset
764 from = to;
765}
766
50dc81ec
AR
767/// adds slot to an existing entry chain; caller must check that the slot
768/// belongs to the chain it is being added to
769void
770Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
771{
abf396ec 772 LoadingEntry le = loadingEntry(fileno);
50dc81ec
AR
773 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
774
abf396ec
AR
775 debugs(47,9, "adding " << slotId << " to entry " << fileno);
776 // we do not need to preserve the order
777 if (le.anchored()) {
778 LoadingSlot inode = loadingSlot(anchor.start);
779 chainSlots(inode.more, slotId);
50dc81ec 780 } else {
abf396ec 781 chainSlots(anchor.start, slotId);
50dc81ec
AR
782 }
783
abf396ec 784 le.size += header.payloadSize; // must precede freeBadEntry() calls
50dc81ec 785
abf396ec
AR
786 if (header.firstSlot == slotId) {
787 debugs(47,5, "added inode");
788
789 if (le.anchored()) { // we have already added another inode slot
790 freeBadEntry(fileno, "inode conflict");
791 ++counts.clashcount;
792 return;
793 }
794
795 le.anchored(true);
796
797 if (!importEntry(anchor, fileno, header)) {
798 freeBadEntry(fileno, "corrupted metainfo");
799 return;
800 }
801
802 // set total entry size and/or check it for consistency
803 if (const uint64_t totalSize = header.entrySize) {
804 assert(totalSize != static_cast<uint64_t>(-1));
805 if (!anchor.basics.swap_file_sz) {
806 anchor.basics.swap_file_sz = totalSize;
807 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
808 } else if (totalSize != anchor.basics.swap_file_sz) {
809 freeBadEntry(fileno, "size mismatch");
810 return;
811 }
812 }
50dc81ec
AR
813 }
814
abf396ec 815 const uint64_t totalSize = anchor.basics.swap_file_sz; // may be 0/unknown
50dc81ec
AR
816
817 if (totalSize > 0 && le.size > totalSize) { // overflow
ce44c1ea 818 debugs(47, 8, "overflow: " << le.size << " > " << totalSize);
50dc81ec
AR
819 freeBadEntry(fileno, "overflowing");
820 return;
821 }
822
823 mapSlot(slotId, header);
abf396ec
AR
824 if (totalSize > 0 && le.size == totalSize)
825 finalizeOrFree(fileno, le); // entry is probably fully loaded now
50dc81ec
AR
826}
827
828/// initialize housekeeping information for a newly accepted entry
829void
830Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header)
831{
832 anchor.setKey(reinterpret_cast<const cache_key*>(header.key));
833 assert(header.firstSlot >= 0);
abf396ec 834 anchor.start = -1; // addSlotToEntry() will set it
50dc81ec
AR
835
836 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
837
abf396ec
AR
838 LoadingEntry le = loadingEntry(fileno);
839 le.state(LoadingEntry::leLoading);
50dc81ec
AR
840 le.version = header.version;
841 le.size = 0;
842}
843
844/// handle a slot from an entry that we have not seen before
845void
846Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
847{
50dc81ec
AR
848 // A miss may have been stored at our fileno while we were loading other
849 // slots from disk. We ought to preserve that entry because it is fresher.
850 const bool overwriteExisting = false;
851 if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) {
852 primeNewEntry(*anchor, fileno, header);
853 addSlotToEntry(fileno, slotId, header); // may fail
854 assert(anchor->basics.swap_file_sz != static_cast<uint64_t>(-1));
855 } else {
856 // A new from-network entry is occupying our map slot; let it be, but
857 // save us from the trouble of going through the above motions again.
abf396ec
AR
858 LoadingEntry le = loadingEntry(fileno);
859 le.state(LoadingEntry::leIgnored);
860 freeUnusedSlot(slotId, false);
50dc81ec
AR
861 }
862}
863
864/// does the header belong to the fileno entry being loaded?
865bool
866Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const
867{
abf396ec
AR
868 // Header updates always result in multi-start chains and often
869 // result in multi-version chains so we can only compare the keys.
50dc81ec 870 const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno);
abf396ec 871 return anchor.sameKey(reinterpret_cast<const cache_key*>(header.key));
50dc81ec
AR
872}
873
874/// handle freshly loaded (and validated) db slot header
875void
876Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header)
877{
50dc81ec
AR
878 const cache_key *const key =
879 reinterpret_cast<const cache_key*>(header.key);
abf396ec 880 const sfileno fileno = sd->map->fileNoByKey(key);
50dc81ec
AR
881 assert(0 <= fileno && fileno < dbEntryLimit);
882
abf396ec
AR
883 LoadingEntry le = loadingEntry(fileno);
884 debugs(47,9, "entry " << fileno << " state: " << le.state() << ", inode: " <<
9d4e9cfb 885 header.firstSlot << ", size: " << header.payloadSize);
50dc81ec 886
abf396ec 887 switch (le.state()) {
50dc81ec
AR
888
889 case LoadingEntry::leEmpty: {
890 startNewEntry(fileno, slotId, header);
891 break;
892 }
893
894 case LoadingEntry::leLoading: {
abf396ec
AR
895 if (sameEntry(fileno, header)) {
896 addSlotToEntry(fileno, slotId, header); // may fail
50dc81ec
AR
897 } else {
898 // either the loading chain or this slot is stale;
899 // be conservative and ignore both (and any future ones)
50dc81ec 900 freeBadEntry(fileno, "duplicated");
abf396ec 901 freeUnusedSlot(slotId, true);
50dc81ec
AR
902 ++counts.dupcount;
903 }
904 break;
905 }
906
907 case LoadingEntry::leLoaded: {
908 // either the previously loaded chain or this slot is stale;
909 // be conservative and ignore both (and any future ones)
abf396ec 910 le.state(LoadingEntry::leCorrupted);
50dc81ec 911 sd->map->freeEntry(fileno); // may not be immediately successful
abf396ec 912 freeUnusedSlot(slotId, true);
50dc81ec
AR
913 ++counts.dupcount;
914 break;
915 }
916
917 case LoadingEntry::leCorrupted: {
918 // previously seen slots messed things up so we must ignore this one
abf396ec 919 freeUnusedSlot(slotId, true);
50dc81ec
AR
920 break;
921 }
922
923 case LoadingEntry::leIgnored: {
924 // already replaced by a fresher or colliding from-network entry
abf396ec 925 freeUnusedSlot(slotId, false);
50dc81ec
AR
926 break;
927 }
928 }
93910d5c 929}
f53969cc 930
8ecbe78d
EB
931SBuf
932Rock::Rebuild::progressDescription() const
933{
934 SBufStream str;
935
936 str << Debug::Extra << "slots loaded: " << Progress(loadingPos, dbSlotLimit);
937
938 const auto validatingEntries = validationPos < dbEntryLimit;
939 const auto entriesValidated = validatingEntries ? validationPos : dbEntryLimit;
940 str << Debug::Extra << "entries validated: " << Progress(entriesValidated, dbEntryLimit);
941 if (opt_store_doublecheck) {
942 const auto slotsValidated = validatingEntries ? 0 : (validationPos - dbEntryLimit);
943 str << Debug::Extra << "slots validated: " << Progress(slotsValidated, dbSlotLimit);
944 }
945
946 return str.buf();
947}
948