]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/rock/RockRebuild.cc
Source Format Enforcement (#963)
[thirdparty/squid.git] / src / fs / rock / RockRebuild.cc
1 /*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 79 Disk IO Routines */
10
11 #include "squid.h"
12 #include "base/AsyncJobCalls.h"
13 #include "DebugMessages.h"
14 #include "fs/rock/RockDbCell.h"
15 #include "fs/rock/RockRebuild.h"
16 #include "fs/rock/RockSwapDir.h"
17 #include "fs_io.h"
18 #include "globals.h"
19 #include "md5.h"
20 #include "sbuf/Stream.h"
21 #include "SquidTime.h"
22 #include "Store.h"
23 #include "tools.h"
24
25 #include <cerrno>
26
27 CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
28
29 /**
30 \defgroup RockFsRebuild Rock Store Rebuild
31 \ingroup Filesystems
32 *
33 \section RockFsRebuildOverview Overview
34 * Several layers of information are manipualted during the rebuild:
35 \par
36 * Store Entry: Response message plus all the metainformation associated with
37 * it. Identified by store key. At any given time, from Squid point
38 * of view, there is only one entry with a given key, but several
39 * different entries with the same key can be observed in any historical
40 * archive (such as an access log or a store database).
41 \par
42 * Slot chain: A sequence of db slots representing a Store Entry state at
43 * some point in time. Identified by key+version combination. Due to
44 * transaction aborts, crashes, and idle periods, some chains may contain
45 * incomplete or stale information. We assume that no two different chains
46 * have the same key and version. If that assumption fails, we may serve a
47 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
48 \par
49 * iNode: The very first db slot in an entry slot chain. This slot contains
50 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
51 * the entire metadata, HTTP headers, and HTTP body.
52 \par
53 * Db slot: A db record containing a piece of a single store entry and linked
54 * to other slots with the same key and version fields, forming a chain.
55 * Slots are identified by their absolute position in the database file,
56 * which is naturally unique.
57 \par
58 * When information from the newly loaded db slot contradicts the entry-level
59 * information collected so far (e.g., the versions do not match or the total
60 * chain size after the slot contribution exceeds the expected number), the
61 * whole entry (and not just the chain or the slot!) is declared corrupted.
62 \par
63 * Why invalidate the whole entry? Rock Store is written for high-load
64 * environments with large caches, where there is usually very few idle slots
65 * in the database. A space occupied by a purged entry is usually immediately
66 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
67 * leave a relatively large number of stale slots in the database. Thus, the
68 * number of potentially corrupted entries is relatively small. On the other
69 * hand, the damage from serving a single hadgepodge entry may be significant
70 * to the user. In such an environment, invalidating the whole entry has
71 * negligible performance impact but saves us from high-damage bugs.
72 */
73
74 namespace Rock
75 {
76
77 static bool
78 DoneLoading(const int64_t loadingPos, const int64_t dbSlotLimit)
79 {
80 return loadingPos >= dbSlotLimit;
81 }
82
83 static bool
84 DoneValidating(const int64_t validationPos, const int64_t dbSlotLimit, const int64_t dbEntryLimit)
85 {
86 // paranoid slot checking is only enabled with squid -S
87 const auto extraWork = opt_store_doublecheck ? dbSlotLimit : 0;
88 return validationPos >= (dbEntryLimit + extraWork);
89 }
90
91 /// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
92 class LoadingFlags
93 {
94 public:
95 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
96
97 /* for LoadingEntry */
98 uint8_t state:3; ///< current entry state (one of the LoadingEntry::State values)
99 uint8_t anchored:1; ///< whether we loaded the inode slot for this entry
100
101 /* for LoadingSlot */
102 uint8_t mapped:1; ///< whether the slot was added to a mapped entry
103 uint8_t finalized:1; ///< whether finalizeOrThrow() has scanned the slot
104 uint8_t freed:1; ///< whether the slot was given to the map as free space
105 };
106
107 /// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
108 class LoadingEntry
109 {
110 public:
111 LoadingEntry(const sfileno fileNo, LoadingParts &source);
112
113 uint64_t &size; ///< payload seen so far
114 uint32_t &version; ///< DbCellHeader::version to distinguish same-URL chains
115
116 /// possible store entry states during index rebuild
117 typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State;
118
119 /* LoadingFlags::state */
120 State state() const { return static_cast<State>(flags.state); }
121 void state(State aState) const { flags.state = aState; }
122
123 /* LoadingFlags::anchored */
124 bool anchored() const { return flags.anchored; }
125 void anchored(const bool beAnchored) { flags.anchored = beAnchored; }
126
127 private:
128 LoadingFlags &flags; ///< entry flags (see the above accessors) are ours
129 };
130
131 /// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
132 class LoadingSlot
133 {
134 public:
135 LoadingSlot(const SlotId slotId, LoadingParts &source);
136
137 /// another slot in some chain belonging to the same entry (unordered!)
138 Ipc::StoreMapSliceId &more;
139
140 /* LoadingFlags::mapped */
141 bool mapped() const { return flags.mapped; }
142 void mapped(const bool beMapped) { flags.mapped = beMapped; }
143
144 /* LoadingFlags::finalized */
145 bool finalized() const { return flags.finalized; }
146 void finalized(const bool beFinalized) { flags.finalized = beFinalized; }
147
148 /* LoadingFlags::freed */
149 bool freed() const { return flags.freed; }
150 void freed(const bool beFreed) { flags.freed = beFreed; }
151
152 bool used() const { return freed() || mapped() || more != -1; }
153
154 private:
155 LoadingFlags &flags; ///< slot flags (see the above accessors) are ours
156 };
157
158 /// information about store entries being loaded from disk (and their slots)
159 /// used for identifying partially stored/loaded entries
160 class LoadingParts
161 {
162 public:
163 using Sizes = Ipc::StoreMapItems<uint64_t>;
164 using Versions = Ipc::StoreMapItems<uint32_t>;
165 using Mores = Ipc::StoreMapItems<Ipc::StoreMapSliceId>;
166 using Flags = Ipc::StoreMapItems<LoadingFlags>;
167
168 LoadingParts(const SwapDir &dir, const bool resuming);
169 ~LoadingParts();
170
171 // lacking copying/moving code and often too huge to copy
172 LoadingParts(LoadingParts&&) = delete;
173
174 Sizes &sizes() const { return *sizesOwner->object(); }
175 Versions &versions() const { return *versionsOwner->object(); }
176 Mores &mores() const { return *moresOwner->object(); }
177 Flags &flags() const { return *flagsOwner->object(); }
178
179 private:
180 /* Anti-padding storage. With millions of entries, padding matters! */
181
182 /* indexed by sfileno */
183 Sizes::Owner *sizesOwner; ///< LoadingEntry::size for all entries
184 Versions::Owner *versionsOwner; ///< LoadingEntry::version for all entries
185
186 /* indexed by SlotId */
187 Mores::Owner *moresOwner; ///< LoadingSlot::more for all slots
188
189 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
190 Flags::Owner *flagsOwner; ///< all LoadingEntry and LoadingSlot flags
191 };
192
193 } /* namespace Rock */
194
195 /* LoadingEntry */
196
197 Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source):
198 size(source.sizes().at(fileNo)),
199 version(source.versions().at(fileNo)),
200 flags(source.flags().at(fileNo))
201 {
202 }
203
204 /* LoadingSlot */
205
206 Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source):
207 more(source.mores().at(slotId)),
208 flags(source.flags().at(slotId))
209 {
210 }
211
212 /* LoadingParts */
213
214 template <class T>
215 inline typename T::Owner *
216 createOwner(const char *dirPath, const char *sfx, const int64_t limit, const bool resuming)
217 {
218 auto id = Ipc::Mem::Segment::Name(SBuf(dirPath), sfx);
219 return resuming ? Ipc::Mem::Owner<T>::Old(id.c_str()) : shm_new(T)(id.c_str(), limit);
220 }
221
222 Rock::LoadingParts::LoadingParts(const SwapDir &dir, const bool resuming):
223 sizesOwner(createOwner<Sizes>(dir.path, "rebuild_sizes", dir.entryLimitActual(), resuming)),
224 versionsOwner(createOwner<Versions>(dir.path, "rebuild_versions", dir.entryLimitActual(), resuming)),
225 moresOwner(createOwner<Mores>(dir.path, "rebuild_mores", dir.slotLimitActual(), resuming)),
226 flagsOwner(createOwner<Flags>(dir.path, "rebuild_flags", dir.slotLimitActual(), resuming))
227 {
228 assert(sizes().capacity == versions().capacity); // every entry has both fields
229 assert(sizes().capacity <= mores().capacity); // every entry needs slot(s)
230 assert(mores().capacity == flags().capacity); // every slot needs a set of flags
231
232 if (!resuming) {
233 // other parts rely on shared memory segments being zero-initialized
234 // TODO: refactor the next slot pointer to use 0 for nil values
235 mores().fill(-1);
236 }
237 }
238
239 Rock::LoadingParts::~LoadingParts()
240 {
241 delete sizesOwner;
242 delete versionsOwner;
243 delete moresOwner;
244 delete flagsOwner;
245 }
246
247 /* Rock::Rebuild::Stats */
248
249 SBuf
250 Rock::Rebuild::Stats::Path(const char *dirPath)
251 {
252 return Ipc::Mem::Segment::Name(SBuf(dirPath), "rebuild_stats");
253 }
254
255 Ipc::Mem::Owner<Rock::Rebuild::Stats>*
256 Rock::Rebuild::Stats::Init(const SwapDir &dir)
257 {
258 return shm_new(Stats)(Path(dir.path).c_str());
259 }
260
261 bool
262 Rock::Rebuild::Stats::completed(const SwapDir &dir) const
263 {
264 return DoneLoading(counts.scancount, dir.slotLimitActual()) &&
265 DoneValidating(counts.validations, dir.slotLimitActual(), dir.entryLimitActual());
266 }
267
268 /* Rebuild */
269
270 bool
271 Rock::Rebuild::IsResponsible(const SwapDir &)
272 {
273 // in SMP mode, only the disker is responsible for populating the map
274 return !UsingSmp() || IamDiskProcess();
275 }
276
277 bool
278 Rock::Rebuild::Start(SwapDir &dir)
279 {
280 if (!IsResponsible(dir)) {
281 debugs(47, 2, "not responsible for indexing cache_dir #" <<
282 dir.index << " from " << dir.filePath);
283 return false;
284 }
285
286 const auto stats = shm_old(Rebuild::Stats)(Stats::Path(dir.path).c_str());
287 if (stats->completed(dir)) {
288 debugs(47, 2, "already indexed cache_dir #" <<
289 dir.index << " from " << dir.filePath);
290 return false;
291 }
292
293 AsyncJob::Start(new Rebuild(&dir, stats));
294 return true;
295 }
296
297 Rock::Rebuild::Rebuild(SwapDir *dir, const Ipc::Mem::Pointer<Stats> &s): AsyncJob("Rock::Rebuild"),
298 sd(dir),
299 parts(nullptr),
300 stats(s),
301 dbSize(0),
302 dbSlotSize(0),
303 dbSlotLimit(0),
304 dbEntryLimit(0),
305 fd(-1),
306 dbOffset(0),
307 loadingPos(stats->counts.scancount),
308 validationPos(stats->counts.validations),
309 counts(stats->counts),
310 resuming(stats->counts.started())
311 {
312 assert(sd);
313 dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
314 dbSlotSize = sd->slotSize;
315 dbEntryLimit = sd->entryLimitActual();
316 dbSlotLimit = sd->slotLimitActual();
317 assert(dbEntryLimit <= dbSlotLimit);
318 registerRunner();
319 }
320
321 Rock::Rebuild::~Rebuild()
322 {
323 if (fd >= 0)
324 file_close(fd);
325 // normally, segments are used until the Squid instance quits,
326 // but these indexing-only segments are no longer needed
327 delete parts;
328 }
329
330 void
331 Rock::Rebuild::startShutdown()
332 {
333 mustStop("startShutdown");
334 }
335
336 /// prepares and initiates entry loading sequence
337 void
338 Rock::Rebuild::start()
339 {
340 assert(IsResponsible(*sd));
341
342 if (!resuming) {
343 debugs(47, Important(18), "Loading cache_dir #" << sd->index <<
344 " from " << sd->filePath);
345 } else {
346 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd->index <<
347 " from " << sd->filePath << ':' << progressDescription());
348 }
349
350 fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
351 if (fd < 0)
352 failure("cannot open db", errno);
353
354 char hdrBuf[SwapDir::HeaderSize];
355 if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize)
356 failure("cannot read db header", errno);
357
358 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
359 assert(sizeof(DbCellHeader) < SM_PAGE_SIZE);
360 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
361
362 dbOffset = SwapDir::HeaderSize + loadingPos * dbSlotSize;
363
364 assert(!parts);
365 parts = new LoadingParts(*sd, resuming);
366
367 counts.updateStartTime(current_time);
368
369 checkpoint();
370 }
371
372 /// continues after a pause if not done
373 void
374 Rock::Rebuild::checkpoint()
375 {
376 if (!done())
377 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
378 }
379
380 bool
381 Rock::Rebuild::doneLoading() const
382 {
383 return DoneLoading(loadingPos, dbSlotLimit);
384 }
385
386 bool
387 Rock::Rebuild::doneValidating() const
388 {
389 return DoneValidating(validationPos, dbSlotLimit, dbEntryLimit);
390 }
391
392 bool
393 Rock::Rebuild::doneAll() const
394 {
395 return doneLoading() && doneValidating() && AsyncJob::doneAll();
396 }
397
398 void
399 Rock::Rebuild::Steps(void *data)
400 {
401 // use async call to enable job call protection that time events lack
402 CallJobHere(47, 5, static_cast<Rebuild*>(data), Rock::Rebuild, steps);
403 }
404
405 void
406 Rock::Rebuild::steps()
407 {
408 if (!doneLoading())
409 loadingSteps();
410 else
411 validationSteps();
412
413 checkpoint();
414 }
415
416 void
417 Rock::Rebuild::loadingSteps()
418 {
419 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
420 dbOffset << " <= " << dbSize);
421
422 // Balance our desire to maximize the number of entries processed at once
423 // (and, hence, minimize overheads and total rebuild time) with a
424 // requirement to also process Coordinator events, disk I/Os, etc.
425 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
426 const timeval loopStart = current_time;
427
428 int64_t loaded = 0;
429 while (!doneLoading()) {
430 loadOneSlot();
431 dbOffset += dbSlotSize;
432 ++loadingPos;
433 ++loaded;
434
435 if (counts.scancount % 1000 == 0)
436 storeRebuildProgress(sd->index, dbSlotLimit, counts.scancount);
437
438 if (opt_foreground_rebuild)
439 continue; // skip "few entries at a time" check below
440
441 getCurrentTime();
442 const double elapsedMsec = tvSubMsec(loopStart, current_time);
443 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
444 debugs(47, 5, "pausing after " << loaded << " entries in " <<
445 elapsedMsec << "ms; " << (elapsedMsec/loaded) << "ms per entry");
446 break;
447 }
448 }
449 }
450
451 Rock::LoadingEntry
452 Rock::Rebuild::loadingEntry(const sfileno fileNo)
453 {
454 Must(0 <= fileNo && fileNo < dbEntryLimit);
455 return LoadingEntry(fileNo, *parts);
456 }
457
458 Rock::LoadingSlot
459 Rock::Rebuild::loadingSlot(const SlotId slotId)
460 {
461 Must(0 <= slotId && slotId < dbSlotLimit);
462 Must(slotId <= loadingPos); // cannot look ahead
463 return LoadingSlot(slotId, *parts);
464 }
465
466 void
467 Rock::Rebuild::loadOneSlot()
468 {
469 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
470 dbOffset << " <= " << dbSize);
471
472 // increment before loadingPos to avoid getting stuck at a slot
473 // in a case of crash
474 ++counts.scancount;
475
476 if (lseek(fd, dbOffset, SEEK_SET) < 0)
477 failure("cannot seek to db entry", errno);
478
479 buf.reset();
480
481 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
482 return;
483
484 const SlotId slotId = loadingPos;
485
486 // get our header
487 DbCellHeader header;
488 if (buf.contentSize() < static_cast<mb_size_t>(sizeof(header))) {
489 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
490 "Ignoring truncated " << buf.contentSize() << "-byte " <<
491 "cache entry meta data at " << dbOffset);
492 freeUnusedSlot(slotId, true);
493 return;
494 }
495 memcpy(&header, buf.content(), sizeof(header));
496 if (header.empty()) {
497 freeUnusedSlot(slotId, false);
498 return;
499 }
500 if (!header.sane(dbSlotSize, dbSlotLimit)) {
501 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
502 "Ignoring malformed cache entry meta data at " << dbOffset);
503 freeUnusedSlot(slotId, true);
504 return;
505 }
506 buf.consume(sizeof(header)); // optimize to avoid memmove()
507
508 useNewSlot(slotId, header);
509 }
510
511 /// parse StoreEntry basics and add them to the map, returning true on success
512 bool
513 Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header)
514 {
515 cache_key key[SQUID_MD5_DIGEST_LENGTH];
516 StoreEntry loadedE;
517 const uint64_t knownSize = header.entrySize > 0 ?
518 header.entrySize : anchor.basics.swap_file_sz.load();
519 if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize))
520 return false;
521
522 // the entry size may be unknown, but if it is known, it is authoritative
523
524 debugs(47, 8, "importing basics for entry " << fileno <<
525 " inode.entrySize: " << header.entrySize <<
526 " swap_file_sz: " << loadedE.swap_file_sz);
527 anchor.set(loadedE);
528
529 // we have not validated whether all db cells for this entry were loaded
530 EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED);
531
532 // loadedE->dump(5);
533
534 return true;
535 }
536
537 void
538 Rock::Rebuild::validationSteps()
539 {
540 debugs(47, 5, sd->index << " validating from " << validationPos);
541
542 // see loadingSteps() for the rationale; TODO: avoid duplication
543 const int maxSpentMsec = 50; // keep small: validation does not do I/O
544 const timeval loopStart = current_time;
545
546 int64_t validated = 0;
547 while (!doneValidating()) {
548 // increment before validationPos to avoid getting stuck at a slot
549 // in a case of crash
550 ++counts.validations;
551 if (validationPos < dbEntryLimit)
552 validateOneEntry(validationPos);
553 else
554 validateOneSlot(validationPos - dbEntryLimit);
555 ++validationPos;
556 ++validated;
557
558 if (validationPos % 1000 == 0)
559 debugs(20, 2, "validated: " << validationPos);
560
561 if (opt_foreground_rebuild)
562 continue; // skip "few entries at a time" check below
563
564 getCurrentTime();
565 const double elapsedMsec = tvSubMsec(loopStart, current_time);
566 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
567 debugs(47, 5, "pausing after " << validated << " entries in " <<
568 elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry");
569 break;
570 }
571 }
572 }
573
574 /// Either make the entry accessible to all or throw.
575 /// This method assumes it is called only when no more entry slots are expected.
576 void
577 Rock::Rebuild::finalizeOrThrow(const sfileno fileNo, LoadingEntry &le)
578 {
579 // walk all map-linked slots, starting from inode, and mark each
580 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileNo);
581 Must(le.size > 0); // paranoid
582 uint64_t mappedSize = 0;
583 SlotId slotId = anchor.start;
584 while (slotId >= 0 && mappedSize < le.size) {
585 LoadingSlot slot = loadingSlot(slotId); // throws if we have not loaded that slot
586 Must(!slot.finalized()); // no loops or stealing from other entries
587 Must(slot.mapped()); // all our slots should be in the sd->map
588 Must(!slot.freed()); // all our slots should still be present
589 slot.finalized(true);
590
591 Ipc::StoreMapSlice &mapSlice = sd->map->writeableSlice(fileNo, slotId);
592 Must(mapSlice.size > 0); // paranoid
593 mappedSize += mapSlice.size;
594 slotId = mapSlice.next;
595 }
596 /* no hodgepodge entries: one entry - one full chain and no leftovers */
597 Must(slotId < 0);
598 Must(mappedSize == le.size);
599
600 if (!anchor.basics.swap_file_sz)
601 anchor.basics.swap_file_sz = le.size;
602 EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED);
603 le.state(LoadingEntry::leLoaded);
604 sd->map->closeForWriting(fileNo);
605 ++counts.objcount;
606 }
607
608 /// Either make the entry accessible to all or free it.
609 /// This method must only be called when no more entry slots are expected.
610 void
611 Rock::Rebuild::finalizeOrFree(const sfileno fileNo, LoadingEntry &le)
612 {
613 try {
614 finalizeOrThrow(fileNo, le);
615 } catch (const std::exception &ex) {
616 freeBadEntry(fileNo, ex.what());
617 }
618 }
619
620 void
621 Rock::Rebuild::validateOneEntry(const sfileno fileNo)
622 {
623 LoadingEntry entry = loadingEntry(fileNo);
624 switch (entry.state()) {
625
626 case LoadingEntry::leLoading:
627 finalizeOrFree(fileNo, entry);
628 break;
629
630 case LoadingEntry::leEmpty: // no entry hashed to this position
631 case LoadingEntry::leLoaded: // we have already unlocked this entry
632 case LoadingEntry::leCorrupted: // we have already removed this entry
633 case LoadingEntry::leIgnored: // we have already discarded this entry
634 break;
635 }
636 }
637
638 void
639 Rock::Rebuild::validateOneSlot(const SlotId slotId)
640 {
641 const LoadingSlot slot = loadingSlot(slotId);
642 // there should not be any unprocessed slots left
643 Must(slot.freed() || (slot.mapped() && slot.finalized()));
644 }
645
646 /// Marks remaining bad entry slots as free and unlocks the entry. The map
647 /// cannot do this because Loading entries may have holes in the slots chain.
648 void
649 Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription)
650 {
651 debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription <<
652 " entry " << fileno << " is ignored during rebuild");
653
654 LoadingEntry le = loadingEntry(fileno);
655 le.state(LoadingEntry::leCorrupted);
656
657 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
658 assert(anchor.start < 0 || le.size > 0);
659 for (SlotId slotId = anchor.start; slotId >= 0;) {
660 const SlotId next = loadingSlot(slotId).more;
661 freeSlot(slotId, true);
662 slotId = next;
663 }
664
665 sd->map->forgetWritingEntry(fileno);
666 }
667
668 void
669 Rock::Rebuild::swanSong()
670 {
671 debugs(47,3, "cache_dir #" << sd->index << " rebuild level: " <<
672 StoreController::store_dirs_rebuilding);
673 storeRebuildComplete(&counts);
674 }
675
676 void
677 Rock::Rebuild::failure(const char *msg, int errNo)
678 {
679 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
680 dbOffset << " <= " << dbSize);
681
682 if (errNo)
683 debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo));
684 debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?");
685
686 assert(sd);
687 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
688 sd->index, sd->filePath, msg);
689 }
690
691 /// adds slot to the free slot index
692 void
693 Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid)
694 {
695 debugs(47,5, sd->index << " frees slot " << slotId);
696 LoadingSlot slot = loadingSlot(slotId);
697 assert(!slot.freed());
698 slot.freed(true);
699
700 if (invalid) {
701 ++counts.invalid;
702 //sd->unlink(fileno); leave garbage on disk, it should not hurt
703 }
704
705 Ipc::Mem::PageId pageId;
706 pageId.pool = Ipc::Mem::PageStack::IdForSwapDirSpace(sd->index);
707 pageId.number = slotId+1;
708 sd->freeSlots->push(pageId);
709 }
710
711 /// freeSlot() for never-been-mapped slots
712 void
713 Rock::Rebuild::freeUnusedSlot(const SlotId slotId, const bool invalid)
714 {
715 LoadingSlot slot = loadingSlot(slotId);
716 // mapped slots must be freed via freeBadEntry() to keep the map in sync
717 assert(!slot.mapped());
718 freeSlot(slotId, invalid);
719 }
720
721 /// adds slot to the entry chain in the map
722 void
723 Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header)
724 {
725 LoadingSlot slot = loadingSlot(slotId);
726 assert(!slot.mapped());
727 assert(!slot.freed());
728 slot.mapped(true);
729
730 Ipc::StoreMapSlice slice;
731 slice.next = header.nextSlot;
732 slice.size = header.payloadSize;
733 sd->map->importSlice(slotId, slice);
734 }
735
736 template <class SlotIdType> // accommodates atomic and simple SlotIds.
737 void
738 Rock::Rebuild::chainSlots(SlotIdType &from, const SlotId to)
739 {
740 LoadingSlot slot = loadingSlot(to);
741 assert(slot.more < 0);
742 slot.more = from; // may still be unset
743 from = to;
744 }
745
746 /// adds slot to an existing entry chain; caller must check that the slot
747 /// belongs to the chain it is being added to
748 void
749 Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
750 {
751 LoadingEntry le = loadingEntry(fileno);
752 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
753
754 debugs(47,9, "adding " << slotId << " to entry " << fileno);
755 // we do not need to preserve the order
756 if (le.anchored()) {
757 LoadingSlot inode = loadingSlot(anchor.start);
758 chainSlots(inode.more, slotId);
759 } else {
760 chainSlots(anchor.start, slotId);
761 }
762
763 le.size += header.payloadSize; // must precede freeBadEntry() calls
764
765 if (header.firstSlot == slotId) {
766 debugs(47,5, "added inode");
767
768 if (le.anchored()) { // we have already added another inode slot
769 freeBadEntry(fileno, "inode conflict");
770 ++counts.clashcount;
771 return;
772 }
773
774 le.anchored(true);
775
776 if (!importEntry(anchor, fileno, header)) {
777 freeBadEntry(fileno, "corrupted metainfo");
778 return;
779 }
780
781 // set total entry size and/or check it for consistency
782 if (const uint64_t totalSize = header.entrySize) {
783 assert(totalSize != static_cast<uint64_t>(-1));
784 if (!anchor.basics.swap_file_sz) {
785 anchor.basics.swap_file_sz = totalSize;
786 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
787 } else if (totalSize != anchor.basics.swap_file_sz) {
788 freeBadEntry(fileno, "size mismatch");
789 return;
790 }
791 }
792 }
793
794 const uint64_t totalSize = anchor.basics.swap_file_sz; // may be 0/unknown
795
796 if (totalSize > 0 && le.size > totalSize) { // overflow
797 debugs(47, 8, "overflow: " << le.size << " > " << totalSize);
798 freeBadEntry(fileno, "overflowing");
799 return;
800 }
801
802 mapSlot(slotId, header);
803 if (totalSize > 0 && le.size == totalSize)
804 finalizeOrFree(fileno, le); // entry is probably fully loaded now
805 }
806
807 /// initialize housekeeping information for a newly accepted entry
808 void
809 Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header)
810 {
811 anchor.setKey(reinterpret_cast<const cache_key*>(header.key));
812 assert(header.firstSlot >= 0);
813 anchor.start = -1; // addSlotToEntry() will set it
814
815 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
816
817 LoadingEntry le = loadingEntry(fileno);
818 le.state(LoadingEntry::leLoading);
819 le.version = header.version;
820 le.size = 0;
821 }
822
823 /// handle a slot from an entry that we have not seen before
824 void
825 Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
826 {
827 // A miss may have been stored at our fileno while we were loading other
828 // slots from disk. We ought to preserve that entry because it is fresher.
829 const bool overwriteExisting = false;
830 if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) {
831 primeNewEntry(*anchor, fileno, header);
832 addSlotToEntry(fileno, slotId, header); // may fail
833 assert(anchor->basics.swap_file_sz != static_cast<uint64_t>(-1));
834 } else {
835 // A new from-network entry is occupying our map slot; let it be, but
836 // save us from the trouble of going through the above motions again.
837 LoadingEntry le = loadingEntry(fileno);
838 le.state(LoadingEntry::leIgnored);
839 freeUnusedSlot(slotId, false);
840 }
841 }
842
843 /// does the header belong to the fileno entry being loaded?
844 bool
845 Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const
846 {
847 // Header updates always result in multi-start chains and often
848 // result in multi-version chains so we can only compare the keys.
849 const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno);
850 return anchor.sameKey(reinterpret_cast<const cache_key*>(header.key));
851 }
852
853 /// handle freshly loaded (and validated) db slot header
854 void
855 Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header)
856 {
857 const cache_key *const key =
858 reinterpret_cast<const cache_key*>(header.key);
859 const sfileno fileno = sd->map->fileNoByKey(key);
860 assert(0 <= fileno && fileno < dbEntryLimit);
861
862 LoadingEntry le = loadingEntry(fileno);
863 debugs(47,9, "entry " << fileno << " state: " << le.state() << ", inode: " <<
864 header.firstSlot << ", size: " << header.payloadSize);
865
866 switch (le.state()) {
867
868 case LoadingEntry::leEmpty: {
869 startNewEntry(fileno, slotId, header);
870 break;
871 }
872
873 case LoadingEntry::leLoading: {
874 if (sameEntry(fileno, header)) {
875 addSlotToEntry(fileno, slotId, header); // may fail
876 } else {
877 // either the loading chain or this slot is stale;
878 // be conservative and ignore both (and any future ones)
879 freeBadEntry(fileno, "duplicated");
880 freeUnusedSlot(slotId, true);
881 ++counts.dupcount;
882 }
883 break;
884 }
885
886 case LoadingEntry::leLoaded: {
887 // either the previously loaded chain or this slot is stale;
888 // be conservative and ignore both (and any future ones)
889 le.state(LoadingEntry::leCorrupted);
890 sd->map->freeEntry(fileno); // may not be immediately successful
891 freeUnusedSlot(slotId, true);
892 ++counts.dupcount;
893 break;
894 }
895
896 case LoadingEntry::leCorrupted: {
897 // previously seen slots messed things up so we must ignore this one
898 freeUnusedSlot(slotId, true);
899 break;
900 }
901
902 case LoadingEntry::leIgnored: {
903 // already replaced by a fresher or colliding from-network entry
904 freeUnusedSlot(slotId, false);
905 break;
906 }
907 }
908 }
909
910 SBuf
911 Rock::Rebuild::progressDescription() const
912 {
913 SBufStream str;
914
915 str << Debug::Extra << "slots loaded: " << Progress(loadingPos, dbSlotLimit);
916
917 const auto validatingEntries = validationPos < dbEntryLimit;
918 const auto entriesValidated = validatingEntries ? validationPos : dbEntryLimit;
919 str << Debug::Extra << "entries validated: " << Progress(entriesValidated, dbEntryLimit);
920 if (opt_store_doublecheck) {
921 const auto slotsValidated = validatingEntries ? 0 : (validationPos - dbEntryLimit);
922 str << Debug::Extra << "slots validated: " << Progress(slotsValidated, dbSlotLimit);
923 }
924
925 return str.buf();
926 }
927