]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/rock/RockRebuild.cc
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 79 Disk IO Routines */
12 #include "base/AsyncJobCalls.h"
13 #include "debug/Messages.h"
14 #include "fs/rock/RockDbCell.h"
15 #include "fs/rock/RockRebuild.h"
16 #include "fs/rock/RockSwapDir.h"
20 #include "sbuf/Stream.h"
26 CBDATA_NAMESPACED_CLASS_INIT(Rock
, Rebuild
);
29 \defgroup RockFsRebuild Rock Store Rebuild
32 \section RockFsRebuildOverview Overview
33 * Several layers of information are manipualted during the rebuild:
35 * Store Entry: Response message plus all the metainformation associated with
36 * it. Identified by store key. At any given time, from Squid point
37 * of view, there is only one entry with a given key, but several
38 * different entries with the same key can be observed in any historical
39 * archive (such as an access log or a store database).
41 * Slot chain: A sequence of db slots representing a Store Entry state at
42 * some point in time. Identified by key+version combination. Due to
43 * transaction aborts, crashes, and idle periods, some chains may contain
44 * incomplete or stale information. We assume that no two different chains
45 * have the same key and version. If that assumption fails, we may serve a
46 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
48 * iNode: The very first db slot in an entry slot chain. This slot contains
49 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
50 * the entire metadata, HTTP headers, and HTTP body.
52 * Db slot: A db record containing a piece of a single store entry and linked
53 * to other slots with the same key and version fields, forming a chain.
54 * Slots are identified by their absolute position in the database file,
55 * which is naturally unique.
57 * When information from the newly loaded db slot contradicts the entry-level
58 * information collected so far (e.g., the versions do not match or the total
59 * chain size after the slot contribution exceeds the expected number), the
60 * whole entry (and not just the chain or the slot!) is declared corrupted.
62 * Why invalidate the whole entry? Rock Store is written for high-load
63 * environments with large caches, where there is usually very few idle slots
64 * in the database. A space occupied by a purged entry is usually immediately
65 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
66 * leave a relatively large number of stale slots in the database. Thus, the
67 * number of potentially corrupted entries is relatively small. On the other
68 * hand, the damage from serving a single hadgepodge entry may be significant
69 * to the user. In such an environment, invalidating the whole entry has
70 * negligible performance impact but saves us from high-damage bugs.
77 DoneLoading(const int64_t loadingPos
, const int64_t dbSlotLimit
)
79 return loadingPos
>= dbSlotLimit
;
83 DoneValidating(const int64_t validationPos
, const int64_t dbSlotLimit
, const int64_t dbEntryLimit
)
85 // paranoid slot checking is only enabled with squid -S
86 const auto extraWork
= opt_store_doublecheck
? dbSlotLimit
: 0;
87 return validationPos
>= (dbEntryLimit
+ extraWork
);
90 /// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
94 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
96 /* for LoadingEntry */
97 uint8_t state
:3; ///< current entry state (one of the LoadingEntry::State values)
98 uint8_t anchored
:1; ///< whether we loaded the inode slot for this entry
100 /* for LoadingSlot */
101 uint8_t mapped
:1; ///< whether the slot was added to a mapped entry
102 uint8_t finalized
:1; ///< whether finalizeOrThrow() has scanned the slot
103 uint8_t freed
:1; ///< whether the slot was given to the map as free space
106 /// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
110 LoadingEntry(const sfileno fileNo
, LoadingParts
&source
);
112 uint64_t &size
; ///< payload seen so far
113 uint32_t &version
; ///< DbCellHeader::version to distinguish same-URL chains
115 /// possible store entry states during index rebuild
116 typedef enum { leEmpty
= 0, leLoading
, leLoaded
, leCorrupted
, leIgnored
} State
;
118 /* LoadingFlags::state */
119 State
state() const { return static_cast<State
>(flags
.state
); }
120 void state(State aState
) const { flags
.state
= aState
; }
122 /* LoadingFlags::anchored */
123 bool anchored() const { return flags
.anchored
; }
124 void anchored(const bool beAnchored
) { flags
.anchored
= beAnchored
; }
127 LoadingFlags
&flags
; ///< entry flags (see the above accessors) are ours
130 /// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
134 LoadingSlot(const SlotId slotId
, LoadingParts
&source
);
136 /// another slot in some chain belonging to the same entry (unordered!)
137 Ipc::StoreMapSliceId
&more
;
139 /* LoadingFlags::mapped */
140 bool mapped() const { return flags
.mapped
; }
141 void mapped(const bool beMapped
) { flags
.mapped
= beMapped
; }
143 /* LoadingFlags::finalized */
144 bool finalized() const { return flags
.finalized
; }
145 void finalized(const bool beFinalized
) { flags
.finalized
= beFinalized
; }
147 /* LoadingFlags::freed */
148 bool freed() const { return flags
.freed
; }
149 void freed(const bool beFreed
) { flags
.freed
= beFreed
; }
151 bool used() const { return freed() || mapped() || more
!= -1; }
154 LoadingFlags
&flags
; ///< slot flags (see the above accessors) are ours
157 /// information about store entries being loaded from disk (and their slots)
158 /// used for identifying partially stored/loaded entries
162 using Sizes
= Ipc::StoreMapItems
<uint64_t>;
163 using Versions
= Ipc::StoreMapItems
<uint32_t>;
164 using Mores
= Ipc::StoreMapItems
<Ipc::StoreMapSliceId
>;
165 using Flags
= Ipc::StoreMapItems
<LoadingFlags
>;
167 LoadingParts(const SwapDir
&dir
, const bool resuming
);
170 // lacking copying/moving code and often too huge to copy
171 LoadingParts(LoadingParts
&&) = delete;
173 Sizes
&sizes() const { return *sizesOwner
->object(); }
174 Versions
&versions() const { return *versionsOwner
->object(); }
175 Mores
&mores() const { return *moresOwner
->object(); }
176 Flags
&flags() const { return *flagsOwner
->object(); }
179 /* Anti-padding storage. With millions of entries, padding matters! */
181 /* indexed by sfileno */
182 Sizes::Owner
*sizesOwner
; ///< LoadingEntry::size for all entries
183 Versions::Owner
*versionsOwner
; ///< LoadingEntry::version for all entries
185 /* indexed by SlotId */
186 Mores::Owner
*moresOwner
; ///< LoadingSlot::more for all slots
188 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
189 Flags::Owner
*flagsOwner
; ///< all LoadingEntry and LoadingSlot flags
192 } /* namespace Rock */
196 Rock::LoadingEntry::LoadingEntry(const sfileno fileNo
, LoadingParts
&source
):
197 size(source
.sizes().at(fileNo
)),
198 version(source
.versions().at(fileNo
)),
199 flags(source
.flags().at(fileNo
))
205 Rock::LoadingSlot::LoadingSlot(const SlotId slotId
, LoadingParts
&source
):
206 more(source
.mores().at(slotId
)),
207 flags(source
.flags().at(slotId
))
214 inline typename
T::Owner
*
215 createOwner(const char *dirPath
, const char *sfx
, const int64_t limit
, const bool resuming
)
217 auto id
= Ipc::Mem::Segment::Name(SBuf(dirPath
), sfx
);
218 return resuming
? Ipc::Mem::Owner
<T
>::Old(id
.c_str()) : shm_new(T
)(id
.c_str(), limit
);
221 Rock::LoadingParts::LoadingParts(const SwapDir
&dir
, const bool resuming
):
222 sizesOwner(createOwner
<Sizes
>(dir
.path
, "rebuild_sizes", dir
.entryLimitActual(), resuming
)),
223 versionsOwner(createOwner
<Versions
>(dir
.path
, "rebuild_versions", dir
.entryLimitActual(), resuming
)),
224 moresOwner(createOwner
<Mores
>(dir
.path
, "rebuild_mores", dir
.slotLimitActual(), resuming
)),
225 flagsOwner(createOwner
<Flags
>(dir
.path
, "rebuild_flags", dir
.slotLimitActual(), resuming
))
227 assert(sizes().capacity
== versions().capacity
); // every entry has both fields
228 assert(sizes().capacity
<= mores().capacity
); // every entry needs slot(s)
229 assert(mores().capacity
== flags().capacity
); // every slot needs a set of flags
232 // other parts rely on shared memory segments being zero-initialized
233 // TODO: refactor the next slot pointer to use 0 for nil values
238 Rock::LoadingParts::~LoadingParts()
241 delete versionsOwner
;
246 /* Rock::Rebuild::Stats */
249 Rock::Rebuild::Stats::Path(const char *dirPath
)
251 return Ipc::Mem::Segment::Name(SBuf(dirPath
), "rebuild_stats");
254 Ipc::Mem::Owner
<Rock::Rebuild::Stats
>*
255 Rock::Rebuild::Stats::Init(const SwapDir
&dir
)
257 return shm_new(Stats
)(Path(dir
.path
).c_str());
261 Rock::Rebuild::Stats::completed(const SwapDir
&dir
) const
263 return DoneLoading(counts
.scancount
, dir
.slotLimitActual()) &&
264 DoneValidating(counts
.validations
, dir
.slotLimitActual(), dir
.entryLimitActual());
270 Rock::Rebuild::IsResponsible(const SwapDir
&)
272 // in SMP mode, only the disker is responsible for populating the map
273 return !UsingSmp() || IamDiskProcess();
277 Rock::Rebuild::Start(SwapDir
&dir
)
279 if (!IsResponsible(dir
)) {
280 debugs(47, 2, "not responsible for indexing cache_dir #" <<
281 dir
.index
<< " from " << dir
.filePath
);
285 const auto stats
= shm_old(Rebuild::Stats
)(Stats::Path(dir
.path
).c_str());
286 if (stats
->completed(dir
)) {
287 debugs(47, 2, "already indexed cache_dir #" <<
288 dir
.index
<< " from " << dir
.filePath
);
292 AsyncJob::Start(new Rebuild(&dir
, stats
));
296 Rock::Rebuild::Rebuild(SwapDir
*dir
, const Ipc::Mem::Pointer
<Stats
> &s
): AsyncJob("Rock::Rebuild"),
306 loadingPos(stats
->counts
.scancount
),
307 validationPos(stats
->counts
.validations
),
308 counts(stats
->counts
),
309 resuming(stats
->counts
.started())
312 dbSize
= sd
->diskOffsetLimit(); // we do not care about the trailer waste
313 dbSlotSize
= sd
->slotSize
;
314 dbEntryLimit
= sd
->entryLimitActual();
315 dbSlotLimit
= sd
->slotLimitActual();
316 assert(dbEntryLimit
<= dbSlotLimit
);
320 Rock::Rebuild::~Rebuild()
324 // normally, segments are used until the Squid instance quits,
325 // but these indexing-only segments are no longer needed
330 Rock::Rebuild::startShutdown()
332 mustStop("startShutdown");
335 /// prepares and initiates entry loading sequence
337 Rock::Rebuild::start()
339 assert(IsResponsible(*sd
));
342 debugs(47, Important(18), "Loading cache_dir #" << sd
->index
<<
343 " from " << sd
->filePath
);
345 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd
->index
<<
346 " from " << sd
->filePath
<< ':' << progressDescription());
349 fd
= file_open(sd
->filePath
, O_RDONLY
| O_BINARY
);
351 failure("cannot open db", errno
);
353 char hdrBuf
[SwapDir::HeaderSize
];
354 if (read(fd
, hdrBuf
, sizeof(hdrBuf
)) != SwapDir::HeaderSize
)
355 failure("cannot read db header", errno
);
357 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
358 assert(sizeof(DbCellHeader
) < SM_PAGE_SIZE
);
359 buf
.init(SM_PAGE_SIZE
, SM_PAGE_SIZE
);
361 dbOffset
= SwapDir::HeaderSize
+ loadingPos
* dbSlotSize
;
364 parts
= new LoadingParts(*sd
, resuming
);
366 counts
.updateStartTime(current_time
);
371 /// continues after a pause if not done
373 Rock::Rebuild::checkpoint()
376 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps
, this, 0.01, 1, true);
380 Rock::Rebuild::doneLoading() const
382 return DoneLoading(loadingPos
, dbSlotLimit
);
386 Rock::Rebuild::doneValidating() const
388 return DoneValidating(validationPos
, dbSlotLimit
, dbEntryLimit
);
392 Rock::Rebuild::doneAll() const
394 return doneLoading() && doneValidating() && AsyncJob::doneAll();
398 Rock::Rebuild::Steps(void *data
)
400 // use async call to enable job call protection that time events lack
401 CallJobHere(47, 5, static_cast<Rebuild
*>(data
), Rock::Rebuild
, steps
);
405 Rock::Rebuild::steps()
416 Rock::Rebuild::loadingSteps()
418 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
419 dbOffset
<< " <= " << dbSize
);
421 // Balance our desire to maximize the number of entries processed at once
422 // (and, hence, minimize overheads and total rebuild time) with a
423 // requirement to also process Coordinator events, disk I/Os, etc.
424 const int maxSpentMsec
= 50; // keep small: most RAM I/Os are under 1ms
425 const timeval loopStart
= current_time
;
428 while (!doneLoading()) {
430 dbOffset
+= dbSlotSize
;
434 if (counts
.scancount
% 1000 == 0)
435 storeRebuildProgress(sd
->index
, dbSlotLimit
, counts
.scancount
);
437 if (opt_foreground_rebuild
)
438 continue; // skip "few entries at a time" check below
441 const double elapsedMsec
= tvSubMsec(loopStart
, current_time
);
442 if (elapsedMsec
> maxSpentMsec
|| elapsedMsec
< 0) {
443 debugs(47, 5, "pausing after " << loaded
<< " entries in " <<
444 elapsedMsec
<< "ms; " << (elapsedMsec
/loaded
) << "ms per entry");
451 Rock::Rebuild::loadingEntry(const sfileno fileNo
)
453 Must(0 <= fileNo
&& fileNo
< dbEntryLimit
);
454 return LoadingEntry(fileNo
, *parts
);
458 Rock::Rebuild::loadingSlot(const SlotId slotId
)
460 Must(0 <= slotId
&& slotId
< dbSlotLimit
);
461 Must(slotId
<= loadingPos
); // cannot look ahead
462 return LoadingSlot(slotId
, *parts
);
466 Rock::Rebuild::loadOneSlot()
468 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
469 dbOffset
<< " <= " << dbSize
);
471 // increment before loadingPos to avoid getting stuck at a slot
472 // in a case of crash
475 if (lseek(fd
, dbOffset
, SEEK_SET
) < 0)
476 failure("cannot seek to db entry", errno
);
480 if (!storeRebuildLoadEntry(fd
, sd
->index
, buf
, counts
))
483 const SlotId slotId
= loadingPos
;
487 if (buf
.contentSize() < static_cast<mb_size_t
>(sizeof(header
))) {
488 debugs(47, DBG_IMPORTANT
, "WARNING: cache_dir[" << sd
->index
<< "]: " <<
489 "Ignoring truncated " << buf
.contentSize() << "-byte " <<
490 "cache entry meta data at " << dbOffset
);
491 freeUnusedSlot(slotId
, true);
494 memcpy(&header
, buf
.content(), sizeof(header
));
495 if (header
.empty()) {
496 freeUnusedSlot(slotId
, false);
499 if (!header
.sane(dbSlotSize
, dbSlotLimit
)) {
500 debugs(47, DBG_IMPORTANT
, "WARNING: cache_dir[" << sd
->index
<< "]: " <<
501 "Ignoring malformed cache entry meta data at " << dbOffset
);
502 freeUnusedSlot(slotId
, true);
505 buf
.consume(sizeof(header
)); // optimize to avoid memmove()
507 useNewSlot(slotId
, header
);
510 /// parse StoreEntry basics and add them to the map, returning true on success
512 Rock::Rebuild::importEntry(Ipc::StoreMapAnchor
&anchor
, const sfileno fileno
, const DbCellHeader
&header
)
514 cache_key key
[SQUID_MD5_DIGEST_LENGTH
];
516 const uint64_t knownSize
= header
.entrySize
> 0 ?
517 header
.entrySize
: anchor
.basics
.swap_file_sz
.load();
518 if (!storeRebuildParseEntry(buf
, loadedE
, key
, counts
, knownSize
))
521 // the entry size may be unknown, but if it is known, it is authoritative
523 debugs(47, 8, "importing basics for entry " << fileno
<<
524 " inode.entrySize: " << header
.entrySize
<<
525 " swap_file_sz: " << loadedE
.swap_file_sz
);
528 // we have not validated whether all db cells for this entry were loaded
529 EBIT_CLR(anchor
.basics
.flags
, ENTRY_VALIDATED
);
537 Rock::Rebuild::validationSteps()
539 debugs(47, 5, sd
->index
<< " validating from " << validationPos
);
541 // see loadingSteps() for the rationale; TODO: avoid duplication
542 const int maxSpentMsec
= 50; // keep small: validation does not do I/O
543 const timeval loopStart
= current_time
;
545 int64_t validated
= 0;
546 while (!doneValidating()) {
547 // increment before validationPos to avoid getting stuck at a slot
548 // in a case of crash
549 ++counts
.validations
;
550 if (validationPos
< dbEntryLimit
)
551 validateOneEntry(validationPos
);
553 validateOneSlot(validationPos
- dbEntryLimit
);
557 if (validationPos
% 1000 == 0)
558 debugs(20, 2, "validated: " << validationPos
);
560 if (opt_foreground_rebuild
)
561 continue; // skip "few entries at a time" check below
564 const double elapsedMsec
= tvSubMsec(loopStart
, current_time
);
565 if (elapsedMsec
> maxSpentMsec
|| elapsedMsec
< 0) {
566 debugs(47, 5, "pausing after " << validated
<< " entries in " <<
567 elapsedMsec
<< "ms; " << (elapsedMsec
/validated
) << "ms per entry");
573 /// Either make the entry accessible to all or throw.
574 /// This method assumes it is called only when no more entry slots are expected.
576 Rock::Rebuild::finalizeOrThrow(const sfileno fileNo
, LoadingEntry
&le
)
578 // walk all map-linked slots, starting from inode, and mark each
579 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileNo
);
580 Must(le
.size
> 0); // paranoid
581 uint64_t mappedSize
= 0;
582 SlotId slotId
= anchor
.start
;
583 while (slotId
>= 0 && mappedSize
< le
.size
) {
584 LoadingSlot slot
= loadingSlot(slotId
); // throws if we have not loaded that slot
585 Must(!slot
.finalized()); // no loops or stealing from other entries
586 Must(slot
.mapped()); // all our slots should be in the sd->map
587 Must(!slot
.freed()); // all our slots should still be present
588 slot
.finalized(true);
590 Ipc::StoreMapSlice
&mapSlice
= sd
->map
->writeableSlice(fileNo
, slotId
);
591 Must(mapSlice
.size
> 0); // paranoid
592 mappedSize
+= mapSlice
.size
;
593 slotId
= mapSlice
.next
;
595 /* no hodgepodge entries: one entry - one full chain and no leftovers */
597 Must(mappedSize
== le
.size
);
599 if (!anchor
.basics
.swap_file_sz
)
600 anchor
.basics
.swap_file_sz
= le
.size
;
601 EBIT_SET(anchor
.basics
.flags
, ENTRY_VALIDATED
);
602 le
.state(LoadingEntry::leLoaded
);
603 sd
->map
->closeForWriting(fileNo
);
607 /// Either make the entry accessible to all or free it.
608 /// This method must only be called when no more entry slots are expected.
610 Rock::Rebuild::finalizeOrFree(const sfileno fileNo
, LoadingEntry
&le
)
613 finalizeOrThrow(fileNo
, le
);
614 } catch (const std::exception
&ex
) {
615 freeBadEntry(fileNo
, ex
.what());
620 Rock::Rebuild::validateOneEntry(const sfileno fileNo
)
622 LoadingEntry entry
= loadingEntry(fileNo
);
623 switch (entry
.state()) {
625 case LoadingEntry::leLoading
:
626 finalizeOrFree(fileNo
, entry
);
629 case LoadingEntry::leEmpty
: // no entry hashed to this position
630 case LoadingEntry::leLoaded
: // we have already unlocked this entry
631 case LoadingEntry::leCorrupted
: // we have already removed this entry
632 case LoadingEntry::leIgnored
: // we have already discarded this entry
638 Rock::Rebuild::validateOneSlot(const SlotId slotId
)
640 const LoadingSlot slot
= loadingSlot(slotId
);
641 // there should not be any unprocessed slots left
642 Must(slot
.freed() || (slot
.mapped() && slot
.finalized()));
645 /// Marks remaining bad entry slots as free and unlocks the entry. The map
646 /// cannot do this because Loading entries may have holes in the slots chain.
648 Rock::Rebuild::freeBadEntry(const sfileno fileno
, const char *eDescription
)
650 debugs(47, 2, "cache_dir #" << sd
->index
<< ' ' << eDescription
<<
651 " entry " << fileno
<< " is ignored during rebuild");
653 LoadingEntry le
= loadingEntry(fileno
);
654 le
.state(LoadingEntry::leCorrupted
);
656 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileno
);
657 assert(anchor
.start
< 0 || le
.size
> 0);
658 for (SlotId slotId
= anchor
.start
; slotId
>= 0;) {
659 const SlotId next
= loadingSlot(slotId
).more
;
660 freeSlot(slotId
, true);
664 sd
->map
->forgetWritingEntry(fileno
);
668 Rock::Rebuild::swanSong()
670 debugs(47,3, "cache_dir #" << sd
->index
<< " rebuild level: " <<
671 StoreController::store_dirs_rebuilding
);
672 storeRebuildComplete(&counts
);
676 Rock::Rebuild::failure(const char *msg
, int errNo
)
678 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
679 dbOffset
<< " <= " << dbSize
);
682 debugs(47, DBG_CRITICAL
, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo
));
683 debugs(47, DBG_CRITICAL
, "Do you need to run 'squid -z' to initialize storage?");
686 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
687 sd
->index
, sd
->filePath
, msg
);
690 /// adds slot to the free slot index
692 Rock::Rebuild::freeSlot(const SlotId slotId
, const bool invalid
)
694 debugs(47,5, sd
->index
<< " frees slot " << slotId
);
695 LoadingSlot slot
= loadingSlot(slotId
);
696 assert(!slot
.freed());
701 //sd->unlink(fileno); leave garbage on disk, it should not hurt
704 Ipc::Mem::PageId pageId
;
705 pageId
.pool
= Ipc::Mem::PageStack::IdForSwapDirSpace(sd
->index
);
706 pageId
.number
= slotId
+1;
707 sd
->freeSlots
->push(pageId
);
710 /// freeSlot() for never-been-mapped slots
712 Rock::Rebuild::freeUnusedSlot(const SlotId slotId
, const bool invalid
)
714 LoadingSlot slot
= loadingSlot(slotId
);
715 // mapped slots must be freed via freeBadEntry() to keep the map in sync
716 assert(!slot
.mapped());
717 freeSlot(slotId
, invalid
);
720 /// adds slot to the entry chain in the map
722 Rock::Rebuild::mapSlot(const SlotId slotId
, const DbCellHeader
&header
)
724 LoadingSlot slot
= loadingSlot(slotId
);
725 assert(!slot
.mapped());
726 assert(!slot
.freed());
729 Ipc::StoreMapSlice slice
;
730 slice
.next
= header
.nextSlot
;
731 slice
.size
= header
.payloadSize
;
732 sd
->map
->importSlice(slotId
, slice
);
735 template <class SlotIdType
> // accommodates atomic and simple SlotIds.
737 Rock::Rebuild::chainSlots(SlotIdType
&from
, const SlotId to
)
739 LoadingSlot slot
= loadingSlot(to
);
740 assert(slot
.more
< 0);
741 slot
.more
= from
; // may still be unset
745 /// adds slot to an existing entry chain; caller must check that the slot
746 /// belongs to the chain it is being added to
748 Rock::Rebuild::addSlotToEntry(const sfileno fileno
, const SlotId slotId
, const DbCellHeader
&header
)
750 LoadingEntry le
= loadingEntry(fileno
);
751 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileno
);
753 debugs(47,9, "adding " << slotId
<< " to entry " << fileno
);
754 // we do not need to preserve the order
756 LoadingSlot inode
= loadingSlot(anchor
.start
);
757 chainSlots(inode
.more
, slotId
);
759 chainSlots(anchor
.start
, slotId
);
762 le
.size
+= header
.payloadSize
; // must precede freeBadEntry() calls
764 if (header
.firstSlot
== slotId
) {
765 debugs(47,5, "added inode");
767 if (le
.anchored()) { // we have already added another inode slot
768 freeBadEntry(fileno
, "inode conflict");
775 if (!importEntry(anchor
, fileno
, header
)) {
776 freeBadEntry(fileno
, "corrupted metainfo");
780 // set total entry size and/or check it for consistency
781 if (const uint64_t totalSize
= header
.entrySize
) {
782 assert(totalSize
!= static_cast<uint64_t>(-1));
783 if (!anchor
.basics
.swap_file_sz
) {
784 anchor
.basics
.swap_file_sz
= totalSize
;
785 assert(anchor
.basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
786 } else if (totalSize
!= anchor
.basics
.swap_file_sz
) {
787 freeBadEntry(fileno
, "size mismatch");
793 const uint64_t totalSize
= anchor
.basics
.swap_file_sz
; // may be 0/unknown
795 if (totalSize
> 0 && le
.size
> totalSize
) { // overflow
796 debugs(47, 8, "overflow: " << le
.size
<< " > " << totalSize
);
797 freeBadEntry(fileno
, "overflowing");
801 mapSlot(slotId
, header
);
802 if (totalSize
> 0 && le
.size
== totalSize
)
803 finalizeOrFree(fileno
, le
); // entry is probably fully loaded now
806 /// initialize housekeeping information for a newly accepted entry
808 Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor
&anchor
, const sfileno fileno
, const DbCellHeader
&header
)
810 anchor
.setKey(reinterpret_cast<const cache_key
*>(header
.key
));
811 assert(header
.firstSlot
>= 0);
812 anchor
.start
= -1; // addSlotToEntry() will set it
814 assert(anchor
.basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
816 LoadingEntry le
= loadingEntry(fileno
);
817 le
.state(LoadingEntry::leLoading
);
818 le
.version
= header
.version
;
822 /// handle a slot from an entry that we have not seen before
824 Rock::Rebuild::startNewEntry(const sfileno fileno
, const SlotId slotId
, const DbCellHeader
&header
)
826 // A miss may have been stored at our fileno while we were loading other
827 // slots from disk. We ought to preserve that entry because it is fresher.
828 const bool overwriteExisting
= false;
829 if (Ipc::StoreMap::Anchor
*anchor
= sd
->map
->openForWritingAt(fileno
, overwriteExisting
)) {
830 primeNewEntry(*anchor
, fileno
, header
);
831 addSlotToEntry(fileno
, slotId
, header
); // may fail
832 assert(anchor
->basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
834 // A new from-network entry is occupying our map slot; let it be, but
835 // save us from the trouble of going through the above motions again.
836 LoadingEntry le
= loadingEntry(fileno
);
837 le
.state(LoadingEntry::leIgnored
);
838 freeUnusedSlot(slotId
, false);
842 /// does the header belong to the fileno entry being loaded?
844 Rock::Rebuild::sameEntry(const sfileno fileno
, const DbCellHeader
&header
) const
846 // Header updates always result in multi-start chains and often
847 // result in multi-version chains so we can only compare the keys.
848 const Ipc::StoreMap::Anchor
&anchor
= sd
->map
->writeableEntry(fileno
);
849 return anchor
.sameKey(reinterpret_cast<const cache_key
*>(header
.key
));
852 /// handle freshly loaded (and validated) db slot header
854 Rock::Rebuild::useNewSlot(const SlotId slotId
, const DbCellHeader
&header
)
856 const cache_key
*const key
=
857 reinterpret_cast<const cache_key
*>(header
.key
);
858 const sfileno fileno
= sd
->map
->fileNoByKey(key
);
859 assert(0 <= fileno
&& fileno
< dbEntryLimit
);
861 LoadingEntry le
= loadingEntry(fileno
);
862 debugs(47,9, "entry " << fileno
<< " state: " << le
.state() << ", inode: " <<
863 header
.firstSlot
<< ", size: " << header
.payloadSize
);
865 switch (le
.state()) {
867 case LoadingEntry::leEmpty
: {
868 startNewEntry(fileno
, slotId
, header
);
872 case LoadingEntry::leLoading
: {
873 if (sameEntry(fileno
, header
)) {
874 addSlotToEntry(fileno
, slotId
, header
); // may fail
876 // either the loading chain or this slot is stale;
877 // be conservative and ignore both (and any future ones)
878 freeBadEntry(fileno
, "duplicated");
879 freeUnusedSlot(slotId
, true);
885 case LoadingEntry::leLoaded
: {
886 // either the previously loaded chain or this slot is stale;
887 // be conservative and ignore both (and any future ones)
888 le
.state(LoadingEntry::leCorrupted
);
889 sd
->map
->freeEntry(fileno
); // may not be immediately successful
890 freeUnusedSlot(slotId
, true);
895 case LoadingEntry::leCorrupted
: {
896 // previously seen slots messed things up so we must ignore this one
897 freeUnusedSlot(slotId
, true);
901 case LoadingEntry::leIgnored
: {
902 // already replaced by a fresher or colliding from-network entry
903 freeUnusedSlot(slotId
, false);
910 Rock::Rebuild::progressDescription() const
914 str
<< Debug::Extra
<< "slots loaded: " << Progress(loadingPos
, dbSlotLimit
);
916 const auto validatingEntries
= validationPos
< dbEntryLimit
;
917 const auto entriesValidated
= validatingEntries
? validationPos
: dbEntryLimit
;
918 str
<< Debug::Extra
<< "entries validated: " << Progress(entriesValidated
, dbEntryLimit
);
919 if (opt_store_doublecheck
) {
920 const auto slotsValidated
= validatingEntries
? 0 : (validationPos
- dbEntryLimit
);
921 str
<< Debug::Extra
<< "slots validated: " << Progress(slotsValidated
, dbSlotLimit
);