]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs/rock/RockRebuild.cc
Use RegisteredRunners to initialize/clean the ESI module (#965)
[thirdparty/squid.git] / src / fs / rock / RockRebuild.cc
CommitLineData
e2851fe7 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
e2851fe7
AR
7 */
8
bbc27441
AJ
9/* DEBUG: section 79 Disk IO Routines */
10
f7f3304a 11#include "squid.h"
2745fea5 12#include "base/AsyncJobCalls.h"
c59baaa8 13#include "DebugMessages.h"
602d9612 14#include "fs/rock/RockDbCell.h"
e2851fe7
AR
15#include "fs/rock/RockRebuild.h"
16#include "fs/rock/RockSwapDir.h"
b3f7fd88 17#include "fs_io.h"
67679543 18#include "globals.h"
582c2af2 19#include "md5.h"
8ecbe78d 20#include "sbuf/Stream.h"
386d28bf 21#include "SquidTime.h"
2745fea5 22#include "Store.h"
602d9612 23#include "tools.h"
e2851fe7 24
1a30fdf5 25#include <cerrno>
21d845b1 26
e2851fe7
AR
27CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
28
e4d13993
AR
29/**
30 \defgroup RockFsRebuild Rock Store Rebuild
31 \ingroup Filesystems
32 *
f439fbd2 33 \section RockFsRebuildOverview Overview
e4d13993
AR
34 * Several layers of information are manipualted during the rebuild:
35 \par
36 * Store Entry: Response message plus all the metainformation associated with
37 * it. Identified by store key. At any given time, from Squid point
38 * of view, there is only one entry with a given key, but several
39 * different entries with the same key can be observed in any historical
40 * archive (such as an access log or a store database).
41 \par
42 * Slot chain: A sequence of db slots representing a Store Entry state at
43 * some point in time. Identified by key+version combination. Due to
44 * transaction aborts, crashes, and idle periods, some chains may contain
45 * incomplete or stale information. We assume that no two different chains
46 * have the same key and version. If that assumption fails, we may serve a
47 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
48 \par
abf396ec
AR
49 * iNode: The very first db slot in an entry slot chain. This slot contains
50 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
51 * the entire metadata, HTTP headers, and HTTP body.
52 \par
e4d13993
AR
53 * Db slot: A db record containing a piece of a single store entry and linked
54 * to other slots with the same key and version fields, forming a chain.
55 * Slots are identified by their absolute position in the database file,
56 * which is naturally unique.
57 \par
e4d13993
AR
58 * When information from the newly loaded db slot contradicts the entry-level
59 * information collected so far (e.g., the versions do not match or the total
60 * chain size after the slot contribution exceeds the expected number), the
61 * whole entry (and not just the chain or the slot!) is declared corrupted.
62 \par
63 * Why invalidate the whole entry? Rock Store is written for high-load
64 * environments with large caches, where there is usually very few idle slots
65 * in the database. A space occupied by a purged entry is usually immediately
66 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
67 * leave a relatively large number of stale slots in the database. Thus, the
68 * number of potentially corrupted entries is relatively small. On the other
69 * hand, the damage from serving a single hadgepodge entry may be significant
70 * to the user. In such an environment, invalidating the whole entry has
71 * negligible performance impact but saves us from high-damage bugs.
72 */
73
9d4e9cfb
AR
74namespace Rock
75{
50dc81ec 76
8ecbe78d
EB
77static bool
78DoneLoading(const int64_t loadingPos, const int64_t dbSlotLimit)
79{
80 return loadingPos >= dbSlotLimit;
81}
82
83static bool
84DoneValidating(const int64_t validationPos, const int64_t dbSlotLimit, const int64_t dbEntryLimit)
85{
86 // paranoid slot checking is only enabled with squid -S
87 const auto extraWork = opt_store_doublecheck ? dbSlotLimit : 0;
88 return validationPos >= (dbEntryLimit + extraWork);
89}
90
abf396ec
AR
91/// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
92class LoadingFlags
9d4e9cfb 93{
50dc81ec 94public:
abf396ec 95 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
50dc81ec 96
abf396ec
AR
97 /* for LoadingEntry */
98 uint8_t state:3; ///< current entry state (one of the LoadingEntry::State values)
36c84e19 99 uint8_t anchored:1; ///< whether we loaded the inode slot for this entry
50dc81ec 100
abf396ec
AR
101 /* for LoadingSlot */
102 uint8_t mapped:1; ///< whether the slot was added to a mapped entry
103 uint8_t finalized:1; ///< whether finalizeOrThrow() has scanned the slot
104 uint8_t freed:1; ///< whether the slot was given to the map as free space
105};
106
107/// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
108class LoadingEntry
109{
110public:
111 LoadingEntry(const sfileno fileNo, LoadingParts &source);
112
113 uint64_t &size; ///< payload seen so far
114 uint32_t &version; ///< DbCellHeader::version to distinguish same-URL chains
50dc81ec 115
abf396ec 116 /// possible store entry states during index rebuild
50dc81ec 117 typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State;
abf396ec
AR
118
119 /* LoadingFlags::state */
120 State state() const { return static_cast<State>(flags.state); }
121 void state(State aState) const { flags.state = aState; }
122
123 /* LoadingFlags::anchored */
124 bool anchored() const { return flags.anchored; }
125 void anchored(const bool beAnchored) { flags.anchored = beAnchored; }
126
127private:
128 LoadingFlags &flags; ///< entry flags (see the above accessors) are ours
129};
130
131/// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
132class LoadingSlot
133{
134public:
135 LoadingSlot(const SlotId slotId, LoadingParts &source);
136
137 /// another slot in some chain belonging to the same entry (unordered!)
138 Ipc::StoreMapSliceId &more;
139
140 /* LoadingFlags::mapped */
141 bool mapped() const { return flags.mapped; }
142 void mapped(const bool beMapped) { flags.mapped = beMapped; }
143
144 /* LoadingFlags::finalized */
145 bool finalized() const { return flags.finalized; }
146 void finalized(const bool beFinalized) { flags.finalized = beFinalized; }
147
148 /* LoadingFlags::freed */
149 bool freed() const { return flags.freed; }
150 void freed(const bool beFreed) { flags.freed = beFreed; }
151
152 bool used() const { return freed() || mapped() || more != -1; }
153
154private:
155 LoadingFlags &flags; ///< slot flags (see the above accessors) are ours
156};
157
158/// information about store entries being loaded from disk (and their slots)
159/// used for identifying partially stored/loaded entries
160class LoadingParts
161{
162public:
8ecbe78d
EB
163 using Sizes = Ipc::StoreMapItems<uint64_t>;
164 using Versions = Ipc::StoreMapItems<uint32_t>;
165 using Mores = Ipc::StoreMapItems<Ipc::StoreMapSliceId>;
166 using Flags = Ipc::StoreMapItems<LoadingFlags>;
abf396ec 167
8ecbe78d
EB
168 LoadingParts(const SwapDir &dir, const bool resuming);
169 ~LoadingParts();
abf396ec 170
8ecbe78d
EB
171 // lacking copying/moving code and often too huge to copy
172 LoadingParts(LoadingParts&&) = delete;
173
174 Sizes &sizes() const { return *sizesOwner->object(); }
175 Versions &versions() const { return *versionsOwner->object(); }
176 Mores &mores() const { return *moresOwner->object(); }
177 Flags &flags() const { return *flagsOwner->object(); }
178
179private:
abf396ec
AR
180 /* Anti-padding storage. With millions of entries, padding matters! */
181
182 /* indexed by sfileno */
8ecbe78d
EB
183 Sizes::Owner *sizesOwner; ///< LoadingEntry::size for all entries
184 Versions::Owner *versionsOwner; ///< LoadingEntry::version for all entries
abf396ec
AR
185
186 /* indexed by SlotId */
8ecbe78d 187 Mores::Owner *moresOwner; ///< LoadingSlot::more for all slots
abf396ec
AR
188
189 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
8ecbe78d 190 Flags::Owner *flagsOwner; ///< all LoadingEntry and LoadingSlot flags
50dc81ec
AR
191};
192
193} /* namespace Rock */
194
abf396ec
AR
195/* LoadingEntry */
196
197Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source):
8ecbe78d
EB
198 size(source.sizes().at(fileNo)),
199 version(source.versions().at(fileNo)),
200 flags(source.flags().at(fileNo))
abf396ec
AR
201{
202}
203
204/* LoadingSlot */
205
206Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source):
8ecbe78d
EB
207 more(source.mores().at(slotId)),
208 flags(source.flags().at(slotId))
abf396ec
AR
209{
210}
211
212/* LoadingParts */
213
8ecbe78d
EB
214template <class T>
215inline typename T::Owner *
216createOwner(const char *dirPath, const char *sfx, const int64_t limit, const bool resuming)
217{
218 auto id = Ipc::Mem::Segment::Name(SBuf(dirPath), sfx);
219 return resuming ? Ipc::Mem::Owner<T>::Old(id.c_str()) : shm_new(T)(id.c_str(), limit);
220}
221
222Rock::LoadingParts::LoadingParts(const SwapDir &dir, const bool resuming):
223 sizesOwner(createOwner<Sizes>(dir.path, "rebuild_sizes", dir.entryLimitActual(), resuming)),
224 versionsOwner(createOwner<Versions>(dir.path, "rebuild_versions", dir.entryLimitActual(), resuming)),
225 moresOwner(createOwner<Mores>(dir.path, "rebuild_mores", dir.slotLimitActual(), resuming)),
226 flagsOwner(createOwner<Flags>(dir.path, "rebuild_flags", dir.slotLimitActual(), resuming))
abf396ec 227{
8ecbe78d
EB
228 assert(sizes().capacity == versions().capacity); // every entry has both fields
229 assert(sizes().capacity <= mores().capacity); // every entry needs slot(s)
230 assert(mores().capacity == flags().capacity); // every slot needs a set of flags
231
232 if (!resuming) {
233 // other parts rely on shared memory segments being zero-initialized
234 // TODO: refactor the next slot pointer to use 0 for nil values
235 mores().fill(-1);
236 }
237}
238
239Rock::LoadingParts::~LoadingParts()
240{
241 delete sizesOwner;
242 delete versionsOwner;
243 delete moresOwner;
244 delete flagsOwner;
245}
246
247/* Rock::Rebuild::Stats */
248
249SBuf
250Rock::Rebuild::Stats::Path(const char *dirPath)
251{
252 return Ipc::Mem::Segment::Name(SBuf(dirPath), "rebuild_stats");
253}
254
255Ipc::Mem::Owner<Rock::Rebuild::Stats>*
256Rock::Rebuild::Stats::Init(const SwapDir &dir)
257{
258 return shm_new(Stats)(Path(dir.path).c_str());
259}
260
261bool
b4bae09e 262Rock::Rebuild::Stats::completed(const SwapDir &dir) const
8ecbe78d 263{
b4bae09e
JR
264 return DoneLoading(counts.scancount, dir.slotLimitActual()) &&
265 DoneValidating(counts.validations, dir.slotLimitActual(), dir.entryLimitActual());
abf396ec
AR
266}
267
268/* Rebuild */
269
8ecbe78d 270bool
8b082ed9 271Rock::Rebuild::IsResponsible(const SwapDir &)
8ecbe78d
EB
272{
273 // in SMP mode, only the disker is responsible for populating the map
274 return !UsingSmp() || IamDiskProcess();
275}
276
277bool
278Rock::Rebuild::Start(SwapDir &dir)
279{
280 if (!IsResponsible(dir)) {
281 debugs(47, 2, "not responsible for indexing cache_dir #" <<
282 dir.index << " from " << dir.filePath);
283 return false;
284 }
285
286 const auto stats = shm_old(Rebuild::Stats)(Stats::Path(dir.path).c_str());
287 if (stats->completed(dir)) {
288 debugs(47, 2, "already indexed cache_dir #" <<
289 dir.index << " from " << dir.filePath);
290 return false;
291 }
292
2b6b1bcb 293 AsyncJob::Start(new Rebuild(&dir, stats));
8ecbe78d
EB
294 return true;
295}
296
297Rock::Rebuild::Rebuild(SwapDir *dir, const Ipc::Mem::Pointer<Stats> &s): AsyncJob("Rock::Rebuild"),
f53969cc 298 sd(dir),
abf396ec 299 parts(nullptr),
8ecbe78d 300 stats(s),
f53969cc
SM
301 dbSize(0),
302 dbSlotSize(0),
303 dbSlotLimit(0),
304 dbEntryLimit(0),
305 fd(-1),
306 dbOffset(0),
8ecbe78d
EB
307 loadingPos(stats->counts.scancount),
308 validationPos(stats->counts.validations),
309 counts(stats->counts),
310 resuming(stats->counts.started())
e2851fe7
AR
311{
312 assert(sd);
e2851fe7 313 dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
36c84e19
AR
314 dbSlotSize = sd->slotSize;
315 dbEntryLimit = sd->entryLimitActual();
316 dbSlotLimit = sd->slotLimitActual();
317 assert(dbEntryLimit <= dbSlotLimit);
8ecbe78d 318 registerRunner();
e2851fe7
AR
319}
320
321Rock::Rebuild::~Rebuild()
322{
323 if (fd >= 0)
324 file_close(fd);
8ecbe78d
EB
325 // normally, segments are used until the Squid instance quits,
326 // but these indexing-only segments are no longer needed
abf396ec 327 delete parts;
e2851fe7
AR
328}
329
8ecbe78d
EB
330void
331Rock::Rebuild::startShutdown()
332{
333 mustStop("startShutdown");
334}
335
e2851fe7
AR
336/// prepares and initiates entry loading sequence
337void
9199139f
AR
338Rock::Rebuild::start()
339{
8ecbe78d 340 assert(IsResponsible(*sd));
078274f6 341
8ecbe78d 342 if (!resuming) {
c59baaa8 343 debugs(47, Important(18), "Loading cache_dir #" << sd->index <<
8ecbe78d
EB
344 " from " << sd->filePath);
345 } else {
c59baaa8 346 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd->index <<
70ac5b29 347 " from " << sd->filePath << ':' << progressDescription());
8ecbe78d 348 }
e2851fe7
AR
349
350 fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
351 if (fd < 0)
352 failure("cannot open db", errno);
353
50dc81ec
AR
354 char hdrBuf[SwapDir::HeaderSize];
355 if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize)
e2851fe7
AR
356 failure("cannot read db header", errno);
357
50dc81ec
AR
358 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
359 assert(sizeof(DbCellHeader) < SM_PAGE_SIZE);
360 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
361
8ecbe78d 362 dbOffset = SwapDir::HeaderSize + loadingPos * dbSlotSize;
50dc81ec 363
8ecbe78d
EB
364 assert(!parts);
365 parts = new LoadingParts(*sd, resuming);
366
367 counts.updateStartTime(current_time);
e2851fe7
AR
368
369 checkpoint();
370}
371
078274f6 372/// continues after a pause if not done
e2851fe7
AR
373void
374Rock::Rebuild::checkpoint()
375{
50dc81ec 376 if (!done())
e2851fe7 377 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
078274f6
AR
378}
379
abf396ec
AR
380bool
381Rock::Rebuild::doneLoading() const
382{
8ecbe78d 383 return DoneLoading(loadingPos, dbSlotLimit);
abf396ec
AR
384}
385
386bool
387Rock::Rebuild::doneValidating() const
388{
8ecbe78d 389 return DoneValidating(validationPos, dbSlotLimit, dbEntryLimit);
abf396ec
AR
390}
391
078274f6
AR
392bool
393Rock::Rebuild::doneAll() const
394{
abf396ec 395 return doneLoading() && doneValidating() && AsyncJob::doneAll();
e2851fe7
AR
396}
397
398void
399Rock::Rebuild::Steps(void *data)
400{
078274f6
AR
401 // use async call to enable job call protection that time events lack
402 CallJobHere(47, 5, static_cast<Rebuild*>(data), Rock::Rebuild, steps);
e2851fe7
AR
403}
404
93910d5c 405void
50dc81ec 406Rock::Rebuild::steps()
93910d5c 407{
abf396ec 408 if (!doneLoading())
50dc81ec
AR
409 loadingSteps();
410 else
411 validationSteps();
412
413 checkpoint();
93910d5c
AR
414}
415
e2851fe7 416void
50dc81ec 417Rock::Rebuild::loadingSteps()
9199139f 418{
539283df 419 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 420 dbOffset << " <= " << dbSize);
e2851fe7 421
386d28bf 422 // Balance our desire to maximize the number of entries processed at once
9199139f 423 // (and, hence, minimize overheads and total rebuild time) with a
386d28bf
AR
424 // requirement to also process Coordinator events, disk I/Os, etc.
425 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
426 const timeval loopStart = current_time;
427
8ecbe78d 428 int64_t loaded = 0;
abf396ec 429 while (!doneLoading()) {
50dc81ec 430 loadOneSlot();
36c84e19 431 dbOffset += dbSlotSize;
6d68a230 432 ++loadingPos;
386d28bf 433 ++loaded;
e2851fe7
AR
434
435 if (counts.scancount % 1000 == 0)
36c84e19 436 storeRebuildProgress(sd->index, dbSlotLimit, counts.scancount);
386d28bf
AR
437
438 if (opt_foreground_rebuild)
439 continue; // skip "few entries at a time" check below
440
441 getCurrentTime();
442 const double elapsedMsec = tvSubMsec(loopStart, current_time);
443 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
444 debugs(47, 5, HERE << "pausing after " << loaded << " entries in " <<
9199139f 445 elapsedMsec << "ms; " << (elapsedMsec/loaded) << "ms per entry");
386d28bf
AR
446 break;
447 }
448 }
e2851fe7
AR
449}
450
abf396ec
AR
451Rock::LoadingEntry
452Rock::Rebuild::loadingEntry(const sfileno fileNo)
453{
454 Must(0 <= fileNo && fileNo < dbEntryLimit);
455 return LoadingEntry(fileNo, *parts);
456}
457
458Rock::LoadingSlot
459Rock::Rebuild::loadingSlot(const SlotId slotId)
460{
461 Must(0 <= slotId && slotId < dbSlotLimit);
462 Must(slotId <= loadingPos); // cannot look ahead
463 return LoadingSlot(slotId, *parts);
464}
465
93910d5c 466void
50dc81ec 467Rock::Rebuild::loadOneSlot()
93910d5c 468{
539283df 469 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 470 dbOffset << " <= " << dbSize);
e2851fe7 471
8ecbe78d
EB
472 // increment before loadingPos to avoid getting stuck at a slot
473 // in a case of crash
c728b6f9
AR
474 ++counts.scancount;
475
e2851fe7
AR
476 if (lseek(fd, dbOffset, SEEK_SET) < 0)
477 failure("cannot seek to db entry", errno);
478
50dc81ec 479 buf.reset();
c728b6f9
AR
480
481 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
482 return;
483
6d68a230 484 const SlotId slotId = loadingPos;
93910d5c 485
50dc81ec
AR
486 // get our header
487 DbCellHeader header;
c728b6f9 488 if (buf.contentSize() < static_cast<mb_size_t>(sizeof(header))) {
51618c6a 489 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
ce44c1ea
AR
490 "Ignoring truncated " << buf.contentSize() << "-byte " <<
491 "cache entry meta data at " << dbOffset);
abf396ec 492 freeUnusedSlot(slotId, true);
c728b6f9
AR
493 return;
494 }
50dc81ec
AR
495 memcpy(&header, buf.content(), sizeof(header));
496 if (header.empty()) {
abf396ec 497 freeUnusedSlot(slotId, false);
50dc81ec
AR
498 return;
499 }
36c84e19 500 if (!header.sane(dbSlotSize, dbSlotLimit)) {
51618c6a 501 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
9199139f 502 "Ignoring malformed cache entry meta data at " << dbOffset);
abf396ec 503 freeUnusedSlot(slotId, true);
e2851fe7 504 return;
9199139f 505 }
50dc81ec
AR
506 buf.consume(sizeof(header)); // optimize to avoid memmove()
507
508 useNewSlot(slotId, header);
509}
510
511/// parse StoreEntry basics and add them to the map, returning true on success
512bool
513Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header)
514{
515 cache_key key[SQUID_MD5_DIGEST_LENGTH];
516 StoreEntry loadedE;
50dc81ec 517 const uint64_t knownSize = header.entrySize > 0 ?
d2b13bab 518 header.entrySize : anchor.basics.swap_file_sz.load();
ce44c1ea
AR
519 if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize))
520 return false;
521
abf396ec 522 // the entry size may be unknown, but if it is known, it is authoritative
50dc81ec 523
ce44c1ea 524 debugs(47, 8, "importing basics for entry " << fileno <<
abf396ec 525 " inode.entrySize: " << header.entrySize <<
ce44c1ea 526 " swap_file_sz: " << loadedE.swap_file_sz);
50dc81ec
AR
527 anchor.set(loadedE);
528
529 // we have not validated whether all db cells for this entry were loaded
530 EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED);
531
532 // loadedE->dump(5);
533
534 return true;
93910d5c 535}
e2851fe7 536
93910d5c 537void
50dc81ec 538Rock::Rebuild::validationSteps()
93910d5c 539{
50dc81ec 540 debugs(47, 5, sd->index << " validating from " << validationPos);
93910d5c 541
50dc81ec
AR
542 // see loadingSteps() for the rationale; TODO: avoid duplication
543 const int maxSpentMsec = 50; // keep small: validation does not do I/O
544 const timeval loopStart = current_time;
e2851fe7 545
8ecbe78d 546 int64_t validated = 0;
abf396ec 547 while (!doneValidating()) {
8ecbe78d
EB
548 // increment before validationPos to avoid getting stuck at a slot
549 // in a case of crash
550 ++counts.validations;
abf396ec
AR
551 if (validationPos < dbEntryLimit)
552 validateOneEntry(validationPos);
553 else
554 validateOneSlot(validationPos - dbEntryLimit);
50dc81ec
AR
555 ++validationPos;
556 ++validated;
93910d5c 557
50dc81ec
AR
558 if (validationPos % 1000 == 0)
559 debugs(20, 2, "validated: " << validationPos);
e2851fe7 560
50dc81ec
AR
561 if (opt_foreground_rebuild)
562 continue; // skip "few entries at a time" check below
563
564 getCurrentTime();
565 const double elapsedMsec = tvSubMsec(loopStart, current_time);
566 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
567 debugs(47, 5, "pausing after " << validated << " entries in " <<
568 elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry");
569 break;
570 }
571 }
572}
573
abf396ec
AR
574/// Either make the entry accessible to all or throw.
575/// This method assumes it is called only when no more entry slots are expected.
576void
577Rock::Rebuild::finalizeOrThrow(const sfileno fileNo, LoadingEntry &le)
578{
579 // walk all map-linked slots, starting from inode, and mark each
580 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileNo);
581 Must(le.size > 0); // paranoid
582 uint64_t mappedSize = 0;
583 SlotId slotId = anchor.start;
584 while (slotId >= 0 && mappedSize < le.size) {
585 LoadingSlot slot = loadingSlot(slotId); // throws if we have not loaded that slot
586 Must(!slot.finalized()); // no loops or stealing from other entries
587 Must(slot.mapped()); // all our slots should be in the sd->map
588 Must(!slot.freed()); // all our slots should still be present
589 slot.finalized(true);
590
591 Ipc::StoreMapSlice &mapSlice = sd->map->writeableSlice(fileNo, slotId);
592 Must(mapSlice.size > 0); // paranoid
593 mappedSize += mapSlice.size;
594 slotId = mapSlice.next;
595 }
596 /* no hodgepodge entries: one entry - one full chain and no leftovers */
597 Must(slotId < 0);
598 Must(mappedSize == le.size);
599
600 if (!anchor.basics.swap_file_sz)
601 anchor.basics.swap_file_sz = le.size;
602 EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED);
603 le.state(LoadingEntry::leLoaded);
8253d451 604 sd->map->closeForWriting(fileNo);
abf396ec
AR
605 ++counts.objcount;
606}
607
608/// Either make the entry accessible to all or free it.
609/// This method must only be called when no more entry slots are expected.
50dc81ec 610void
abf396ec 611Rock::Rebuild::finalizeOrFree(const sfileno fileNo, LoadingEntry &le)
50dc81ec 612{
abf396ec
AR
613 try {
614 finalizeOrThrow(fileNo, le);
615 } catch (const std::exception &ex) {
616 freeBadEntry(fileNo, ex.what());
617 }
618}
50dc81ec 619
abf396ec
AR
620void
621Rock::Rebuild::validateOneEntry(const sfileno fileNo)
622{
623 LoadingEntry entry = loadingEntry(fileNo);
624 switch (entry.state()) {
50dc81ec
AR
625
626 case LoadingEntry::leLoading:
abf396ec 627 finalizeOrFree(fileNo, entry);
50dc81ec
AR
628 break;
629
abf396ec
AR
630 case LoadingEntry::leEmpty: // no entry hashed to this position
631 case LoadingEntry::leLoaded: // we have already unlocked this entry
632 case LoadingEntry::leCorrupted: // we have already removed this entry
633 case LoadingEntry::leIgnored: // we have already discarded this entry
634 break;
50dc81ec
AR
635 }
636}
637
abf396ec
AR
638void
639Rock::Rebuild::validateOneSlot(const SlotId slotId)
640{
641 const LoadingSlot slot = loadingSlot(slotId);
642 // there should not be any unprocessed slots left
643 Must(slot.freed() || (slot.mapped() && slot.finalized()));
644}
645
50dc81ec
AR
646/// Marks remaining bad entry slots as free and unlocks the entry. The map
647/// cannot do this because Loading entries may have holes in the slots chain.
648void
649Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription)
650{
651 debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription <<
652 " entry " << fileno << " is ignored during rebuild");
653
abf396ec
AR
654 LoadingEntry le = loadingEntry(fileno);
655 le.state(LoadingEntry::leCorrupted);
50dc81ec 656
abf396ec
AR
657 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
658 assert(anchor.start < 0 || le.size > 0);
659 for (SlotId slotId = anchor.start; slotId >= 0;) {
660 const SlotId next = loadingSlot(slotId).more;
661 freeSlot(slotId, true);
50dc81ec 662 slotId = next;
50dc81ec 663 }
50dc81ec
AR
664
665 sd->map->forgetWritingEntry(fileno);
e2851fe7
AR
666}
667
668void
9199139f
AR
669Rock::Rebuild::swanSong()
670{
078274f6 671 debugs(47,3, HERE << "cache_dir #" << sd->index << " rebuild level: " <<
9199139f 672 StoreController::store_dirs_rebuilding);
e2851fe7 673 storeRebuildComplete(&counts);
e2851fe7
AR
674}
675
676void
9199139f
AR
677Rock::Rebuild::failure(const char *msg, int errNo)
678{
539283df 679 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 680 dbOffset << " <= " << dbSize);
e2851fe7
AR
681
682 if (errNo)
f5adb654
AR
683 debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo));
684 debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?");
e2851fe7
AR
685
686 assert(sd);
687 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
9199139f 688 sd->index, sd->filePath, msg);
e2851fe7 689}
93910d5c 690
50dc81ec
AR
691/// adds slot to the free slot index
692void
693Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid)
93910d5c 694{
50dc81ec 695 debugs(47,5, sd->index << " frees slot " << slotId);
abf396ec
AR
696 LoadingSlot slot = loadingSlot(slotId);
697 assert(!slot.freed());
698 slot.freed(true);
50dc81ec
AR
699
700 if (invalid) {
701 ++counts.invalid;
702 //sd->unlink(fileno); leave garbage on disk, it should not hurt
703 }
704
705 Ipc::Mem::PageId pageId;
1fe7f70f 706 pageId.pool = Ipc::Mem::PageStack::IdForSwapDirSpace(sd->index);
50dc81ec
AR
707 pageId.number = slotId+1;
708 sd->freeSlots->push(pageId);
709}
710
abf396ec 711/// freeSlot() for never-been-mapped slots
50dc81ec 712void
abf396ec 713Rock::Rebuild::freeUnusedSlot(const SlotId slotId, const bool invalid)
50dc81ec 714{
abf396ec 715 LoadingSlot slot = loadingSlot(slotId);
50dc81ec 716 // mapped slots must be freed via freeBadEntry() to keep the map in sync
abf396ec
AR
717 assert(!slot.mapped());
718 freeSlot(slotId, invalid);
50dc81ec
AR
719}
720
721/// adds slot to the entry chain in the map
722void
723Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header)
724{
abf396ec
AR
725 LoadingSlot slot = loadingSlot(slotId);
726 assert(!slot.mapped());
727 assert(!slot.freed());
728 slot.mapped(true);
50dc81ec
AR
729
730 Ipc::StoreMapSlice slice;
731 slice.next = header.nextSlot;
732 slice.size = header.payloadSize;
733 sd->map->importSlice(slotId, slice);
734}
735
abf396ec
AR
736template <class SlotIdType> // accommodates atomic and simple SlotIds.
737void
738Rock::Rebuild::chainSlots(SlotIdType &from, const SlotId to)
739{
740 LoadingSlot slot = loadingSlot(to);
741 assert(slot.more < 0);
742 slot.more = from; // may still be unset
743 from = to;
744}
745
50dc81ec
AR
746/// adds slot to an existing entry chain; caller must check that the slot
747/// belongs to the chain it is being added to
748void
749Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
750{
abf396ec 751 LoadingEntry le = loadingEntry(fileno);
50dc81ec
AR
752 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
753
abf396ec
AR
754 debugs(47,9, "adding " << slotId << " to entry " << fileno);
755 // we do not need to preserve the order
756 if (le.anchored()) {
757 LoadingSlot inode = loadingSlot(anchor.start);
758 chainSlots(inode.more, slotId);
50dc81ec 759 } else {
abf396ec 760 chainSlots(anchor.start, slotId);
50dc81ec
AR
761 }
762
abf396ec 763 le.size += header.payloadSize; // must precede freeBadEntry() calls
50dc81ec 764
abf396ec
AR
765 if (header.firstSlot == slotId) {
766 debugs(47,5, "added inode");
767
768 if (le.anchored()) { // we have already added another inode slot
769 freeBadEntry(fileno, "inode conflict");
770 ++counts.clashcount;
771 return;
772 }
773
774 le.anchored(true);
775
776 if (!importEntry(anchor, fileno, header)) {
777 freeBadEntry(fileno, "corrupted metainfo");
778 return;
779 }
780
781 // set total entry size and/or check it for consistency
782 if (const uint64_t totalSize = header.entrySize) {
783 assert(totalSize != static_cast<uint64_t>(-1));
784 if (!anchor.basics.swap_file_sz) {
785 anchor.basics.swap_file_sz = totalSize;
786 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
787 } else if (totalSize != anchor.basics.swap_file_sz) {
788 freeBadEntry(fileno, "size mismatch");
789 return;
790 }
791 }
50dc81ec
AR
792 }
793
abf396ec 794 const uint64_t totalSize = anchor.basics.swap_file_sz; // may be 0/unknown
50dc81ec
AR
795
796 if (totalSize > 0 && le.size > totalSize) { // overflow
ce44c1ea 797 debugs(47, 8, "overflow: " << le.size << " > " << totalSize);
50dc81ec
AR
798 freeBadEntry(fileno, "overflowing");
799 return;
800 }
801
802 mapSlot(slotId, header);
abf396ec
AR
803 if (totalSize > 0 && le.size == totalSize)
804 finalizeOrFree(fileno, le); // entry is probably fully loaded now
50dc81ec
AR
805}
806
807/// initialize housekeeping information for a newly accepted entry
808void
809Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header)
810{
811 anchor.setKey(reinterpret_cast<const cache_key*>(header.key));
812 assert(header.firstSlot >= 0);
abf396ec 813 anchor.start = -1; // addSlotToEntry() will set it
50dc81ec
AR
814
815 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
816
abf396ec
AR
817 LoadingEntry le = loadingEntry(fileno);
818 le.state(LoadingEntry::leLoading);
50dc81ec
AR
819 le.version = header.version;
820 le.size = 0;
821}
822
823/// handle a slot from an entry that we have not seen before
824void
825Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
826{
50dc81ec
AR
827 // A miss may have been stored at our fileno while we were loading other
828 // slots from disk. We ought to preserve that entry because it is fresher.
829 const bool overwriteExisting = false;
830 if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) {
831 primeNewEntry(*anchor, fileno, header);
832 addSlotToEntry(fileno, slotId, header); // may fail
833 assert(anchor->basics.swap_file_sz != static_cast<uint64_t>(-1));
834 } else {
835 // A new from-network entry is occupying our map slot; let it be, but
836 // save us from the trouble of going through the above motions again.
abf396ec
AR
837 LoadingEntry le = loadingEntry(fileno);
838 le.state(LoadingEntry::leIgnored);
839 freeUnusedSlot(slotId, false);
50dc81ec
AR
840 }
841}
842
843/// does the header belong to the fileno entry being loaded?
844bool
845Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const
846{
abf396ec
AR
847 // Header updates always result in multi-start chains and often
848 // result in multi-version chains so we can only compare the keys.
50dc81ec 849 const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno);
abf396ec 850 return anchor.sameKey(reinterpret_cast<const cache_key*>(header.key));
50dc81ec
AR
851}
852
853/// handle freshly loaded (and validated) db slot header
854void
855Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header)
856{
50dc81ec
AR
857 const cache_key *const key =
858 reinterpret_cast<const cache_key*>(header.key);
abf396ec 859 const sfileno fileno = sd->map->fileNoByKey(key);
50dc81ec
AR
860 assert(0 <= fileno && fileno < dbEntryLimit);
861
abf396ec
AR
862 LoadingEntry le = loadingEntry(fileno);
863 debugs(47,9, "entry " << fileno << " state: " << le.state() << ", inode: " <<
9d4e9cfb 864 header.firstSlot << ", size: " << header.payloadSize);
50dc81ec 865
abf396ec 866 switch (le.state()) {
50dc81ec
AR
867
868 case LoadingEntry::leEmpty: {
869 startNewEntry(fileno, slotId, header);
870 break;
871 }
872
873 case LoadingEntry::leLoading: {
abf396ec
AR
874 if (sameEntry(fileno, header)) {
875 addSlotToEntry(fileno, slotId, header); // may fail
50dc81ec
AR
876 } else {
877 // either the loading chain or this slot is stale;
878 // be conservative and ignore both (and any future ones)
50dc81ec 879 freeBadEntry(fileno, "duplicated");
abf396ec 880 freeUnusedSlot(slotId, true);
50dc81ec
AR
881 ++counts.dupcount;
882 }
883 break;
884 }
885
886 case LoadingEntry::leLoaded: {
887 // either the previously loaded chain or this slot is stale;
888 // be conservative and ignore both (and any future ones)
abf396ec 889 le.state(LoadingEntry::leCorrupted);
50dc81ec 890 sd->map->freeEntry(fileno); // may not be immediately successful
abf396ec 891 freeUnusedSlot(slotId, true);
50dc81ec
AR
892 ++counts.dupcount;
893 break;
894 }
895
896 case LoadingEntry::leCorrupted: {
897 // previously seen slots messed things up so we must ignore this one
abf396ec 898 freeUnusedSlot(slotId, true);
50dc81ec
AR
899 break;
900 }
901
902 case LoadingEntry::leIgnored: {
903 // already replaced by a fresher or colliding from-network entry
abf396ec 904 freeUnusedSlot(slotId, false);
50dc81ec
AR
905 break;
906 }
907 }
93910d5c 908}
f53969cc 909
8ecbe78d
EB
910SBuf
911Rock::Rebuild::progressDescription() const
912{
913 SBufStream str;
914
915 str << Debug::Extra << "slots loaded: " << Progress(loadingPos, dbSlotLimit);
916
917 const auto validatingEntries = validationPos < dbEntryLimit;
918 const auto entriesValidated = validatingEntries ? validationPos : dbEntryLimit;
919 str << Debug::Extra << "entries validated: " << Progress(entriesValidated, dbEntryLimit);
920 if (opt_store_doublecheck) {
921 const auto slotsValidated = validatingEntries ? 0 : (validationPos - dbEntryLimit);
922 str << Debug::Extra << "slots validated: " << Progress(slotsValidated, dbSlotLimit);
923 }
924
925 return str.buf();
926}
927