]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/rock/RockRebuild.cc
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 79 Disk IO Routines */
12 #include "base/AsyncJobCalls.h"
13 #include "DebugMessages.h"
14 #include "fs/rock/RockDbCell.h"
15 #include "fs/rock/RockRebuild.h"
16 #include "fs/rock/RockSwapDir.h"
20 #include "sbuf/Stream.h"
21 #include "SquidTime.h"
27 CBDATA_NAMESPACED_CLASS_INIT(Rock
, Rebuild
);
30 \defgroup RockFsRebuild Rock Store Rebuild
33 \section RockFsRebuildOverview Overview
34 * Several layers of information are manipualted during the rebuild:
36 * Store Entry: Response message plus all the metainformation associated with
37 * it. Identified by store key. At any given time, from Squid point
38 * of view, there is only one entry with a given key, but several
39 * different entries with the same key can be observed in any historical
40 * archive (such as an access log or a store database).
42 * Slot chain: A sequence of db slots representing a Store Entry state at
43 * some point in time. Identified by key+version combination. Due to
44 * transaction aborts, crashes, and idle periods, some chains may contain
45 * incomplete or stale information. We assume that no two different chains
46 * have the same key and version. If that assumption fails, we may serve a
47 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
49 * iNode: The very first db slot in an entry slot chain. This slot contains
50 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
51 * the entire metadata, HTTP headers, and HTTP body.
53 * Db slot: A db record containing a piece of a single store entry and linked
54 * to other slots with the same key and version fields, forming a chain.
55 * Slots are identified by their absolute position in the database file,
56 * which is naturally unique.
58 * When information from the newly loaded db slot contradicts the entry-level
59 * information collected so far (e.g., the versions do not match or the total
60 * chain size after the slot contribution exceeds the expected number), the
61 * whole entry (and not just the chain or the slot!) is declared corrupted.
63 * Why invalidate the whole entry? Rock Store is written for high-load
64 * environments with large caches, where there is usually very few idle slots
65 * in the database. A space occupied by a purged entry is usually immediately
66 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
67 * leave a relatively large number of stale slots in the database. Thus, the
68 * number of potentially corrupted entries is relatively small. On the other
69 * hand, the damage from serving a single hadgepodge entry may be significant
70 * to the user. In such an environment, invalidating the whole entry has
71 * negligible performance impact but saves us from high-damage bugs.
78 DoneLoading(const int64_t loadingPos
, const int64_t dbSlotLimit
)
80 return loadingPos
>= dbSlotLimit
;
84 DoneValidating(const int64_t validationPos
, const int64_t dbSlotLimit
, const int64_t dbEntryLimit
)
86 // paranoid slot checking is only enabled with squid -S
87 const auto extraWork
= opt_store_doublecheck
? dbSlotLimit
: 0;
88 return validationPos
>= (dbEntryLimit
+ extraWork
);
91 /// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
95 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
97 /* for LoadingEntry */
98 uint8_t state
:3; ///< current entry state (one of the LoadingEntry::State values)
99 uint8_t anchored
:1; ///< whether we loaded the inode slot for this entry
101 /* for LoadingSlot */
102 uint8_t mapped
:1; ///< whether the slot was added to a mapped entry
103 uint8_t finalized
:1; ///< whether finalizeOrThrow() has scanned the slot
104 uint8_t freed
:1; ///< whether the slot was given to the map as free space
107 /// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
111 LoadingEntry(const sfileno fileNo
, LoadingParts
&source
);
113 uint64_t &size
; ///< payload seen so far
114 uint32_t &version
; ///< DbCellHeader::version to distinguish same-URL chains
116 /// possible store entry states during index rebuild
117 typedef enum { leEmpty
= 0, leLoading
, leLoaded
, leCorrupted
, leIgnored
} State
;
119 /* LoadingFlags::state */
120 State
state() const { return static_cast<State
>(flags
.state
); }
121 void state(State aState
) const { flags
.state
= aState
; }
123 /* LoadingFlags::anchored */
124 bool anchored() const { return flags
.anchored
; }
125 void anchored(const bool beAnchored
) { flags
.anchored
= beAnchored
; }
128 LoadingFlags
&flags
; ///< entry flags (see the above accessors) are ours
131 /// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
135 LoadingSlot(const SlotId slotId
, LoadingParts
&source
);
137 /// another slot in some chain belonging to the same entry (unordered!)
138 Ipc::StoreMapSliceId
&more
;
140 /* LoadingFlags::mapped */
141 bool mapped() const { return flags
.mapped
; }
142 void mapped(const bool beMapped
) { flags
.mapped
= beMapped
; }
144 /* LoadingFlags::finalized */
145 bool finalized() const { return flags
.finalized
; }
146 void finalized(const bool beFinalized
) { flags
.finalized
= beFinalized
; }
148 /* LoadingFlags::freed */
149 bool freed() const { return flags
.freed
; }
150 void freed(const bool beFreed
) { flags
.freed
= beFreed
; }
152 bool used() const { return freed() || mapped() || more
!= -1; }
155 LoadingFlags
&flags
; ///< slot flags (see the above accessors) are ours
158 /// information about store entries being loaded from disk (and their slots)
159 /// used for identifying partially stored/loaded entries
163 using Sizes
= Ipc::StoreMapItems
<uint64_t>;
164 using Versions
= Ipc::StoreMapItems
<uint32_t>;
165 using Mores
= Ipc::StoreMapItems
<Ipc::StoreMapSliceId
>;
166 using Flags
= Ipc::StoreMapItems
<LoadingFlags
>;
168 LoadingParts(const SwapDir
&dir
, const bool resuming
);
171 // lacking copying/moving code and often too huge to copy
172 LoadingParts(LoadingParts
&&) = delete;
174 Sizes
&sizes() const { return *sizesOwner
->object(); }
175 Versions
&versions() const { return *versionsOwner
->object(); }
176 Mores
&mores() const { return *moresOwner
->object(); }
177 Flags
&flags() const { return *flagsOwner
->object(); }
180 /* Anti-padding storage. With millions of entries, padding matters! */
182 /* indexed by sfileno */
183 Sizes::Owner
*sizesOwner
; ///< LoadingEntry::size for all entries
184 Versions::Owner
*versionsOwner
; ///< LoadingEntry::version for all entries
186 /* indexed by SlotId */
187 Mores::Owner
*moresOwner
; ///< LoadingSlot::more for all slots
189 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
190 Flags::Owner
*flagsOwner
; ///< all LoadingEntry and LoadingSlot flags
193 } /* namespace Rock */
197 Rock::LoadingEntry::LoadingEntry(const sfileno fileNo
, LoadingParts
&source
):
198 size(source
.sizes().at(fileNo
)),
199 version(source
.versions().at(fileNo
)),
200 flags(source
.flags().at(fileNo
))
206 Rock::LoadingSlot::LoadingSlot(const SlotId slotId
, LoadingParts
&source
):
207 more(source
.mores().at(slotId
)),
208 flags(source
.flags().at(slotId
))
215 inline typename
T::Owner
*
216 createOwner(const char *dirPath
, const char *sfx
, const int64_t limit
, const bool resuming
)
218 auto id
= Ipc::Mem::Segment::Name(SBuf(dirPath
), sfx
);
219 return resuming
? Ipc::Mem::Owner
<T
>::Old(id
.c_str()) : shm_new(T
)(id
.c_str(), limit
);
222 Rock::LoadingParts::LoadingParts(const SwapDir
&dir
, const bool resuming
):
223 sizesOwner(createOwner
<Sizes
>(dir
.path
, "rebuild_sizes", dir
.entryLimitActual(), resuming
)),
224 versionsOwner(createOwner
<Versions
>(dir
.path
, "rebuild_versions", dir
.entryLimitActual(), resuming
)),
225 moresOwner(createOwner
<Mores
>(dir
.path
, "rebuild_mores", dir
.slotLimitActual(), resuming
)),
226 flagsOwner(createOwner
<Flags
>(dir
.path
, "rebuild_flags", dir
.slotLimitActual(), resuming
))
228 assert(sizes().capacity
== versions().capacity
); // every entry has both fields
229 assert(sizes().capacity
<= mores().capacity
); // every entry needs slot(s)
230 assert(mores().capacity
== flags().capacity
); // every slot needs a set of flags
233 // other parts rely on shared memory segments being zero-initialized
234 // TODO: refactor the next slot pointer to use 0 for nil values
239 Rock::LoadingParts::~LoadingParts()
242 delete versionsOwner
;
247 /* Rock::Rebuild::Stats */
250 Rock::Rebuild::Stats::Path(const char *dirPath
)
252 return Ipc::Mem::Segment::Name(SBuf(dirPath
), "rebuild_stats");
255 Ipc::Mem::Owner
<Rock::Rebuild::Stats
>*
256 Rock::Rebuild::Stats::Init(const SwapDir
&dir
)
258 return shm_new(Stats
)(Path(dir
.path
).c_str());
262 Rock::Rebuild::Stats::completed(const SwapDir
&dir
) const
264 return DoneLoading(counts
.scancount
, dir
.slotLimitActual()) &&
265 DoneValidating(counts
.validations
, dir
.slotLimitActual(), dir
.entryLimitActual());
271 Rock::Rebuild::IsResponsible(const SwapDir
&dir
)
273 // in SMP mode, only the disker is responsible for populating the map
274 return !UsingSmp() || IamDiskProcess();
278 Rock::Rebuild::Start(SwapDir
&dir
)
280 if (!IsResponsible(dir
)) {
281 debugs(47, 2, "not responsible for indexing cache_dir #" <<
282 dir
.index
<< " from " << dir
.filePath
);
286 const auto stats
= shm_old(Rebuild::Stats
)(Stats::Path(dir
.path
).c_str());
287 if (stats
->completed(dir
)) {
288 debugs(47, 2, "already indexed cache_dir #" <<
289 dir
.index
<< " from " << dir
.filePath
);
293 Must(AsyncJob::Start(new Rebuild(&dir
, stats
)));
297 Rock::Rebuild::Rebuild(SwapDir
*dir
, const Ipc::Mem::Pointer
<Stats
> &s
): AsyncJob("Rock::Rebuild"),
307 loadingPos(stats
->counts
.scancount
),
308 validationPos(stats
->counts
.validations
),
309 counts(stats
->counts
),
310 resuming(stats
->counts
.started())
313 dbSize
= sd
->diskOffsetLimit(); // we do not care about the trailer waste
314 dbSlotSize
= sd
->slotSize
;
315 dbEntryLimit
= sd
->entryLimitActual();
316 dbSlotLimit
= sd
->slotLimitActual();
317 assert(dbEntryLimit
<= dbSlotLimit
);
321 Rock::Rebuild::~Rebuild()
325 // normally, segments are used until the Squid instance quits,
326 // but these indexing-only segments are no longer needed
331 Rock::Rebuild::startShutdown()
333 mustStop("startShutdown");
336 /// prepares and initiates entry loading sequence
338 Rock::Rebuild::start()
340 assert(IsResponsible(*sd
));
343 debugs(47, Important(18), "Loading cache_dir #" << sd
->index
<<
344 " from " << sd
->filePath
);
346 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd
->index
<<
347 " from " << sd
->filePath
<< ':' << progressDescription());
350 fd
= file_open(sd
->filePath
, O_RDONLY
| O_BINARY
);
352 failure("cannot open db", errno
);
354 char hdrBuf
[SwapDir::HeaderSize
];
355 if (read(fd
, hdrBuf
, sizeof(hdrBuf
)) != SwapDir::HeaderSize
)
356 failure("cannot read db header", errno
);
358 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
359 assert(sizeof(DbCellHeader
) < SM_PAGE_SIZE
);
360 buf
.init(SM_PAGE_SIZE
, SM_PAGE_SIZE
);
362 dbOffset
= SwapDir::HeaderSize
+ loadingPos
* dbSlotSize
;
365 parts
= new LoadingParts(*sd
, resuming
);
367 counts
.updateStartTime(current_time
);
372 /// continues after a pause if not done
374 Rock::Rebuild::checkpoint()
377 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps
, this, 0.01, 1, true);
381 Rock::Rebuild::doneLoading() const
383 return DoneLoading(loadingPos
, dbSlotLimit
);
387 Rock::Rebuild::doneValidating() const
389 return DoneValidating(validationPos
, dbSlotLimit
, dbEntryLimit
);
393 Rock::Rebuild::doneAll() const
395 return doneLoading() && doneValidating() && AsyncJob::doneAll();
399 Rock::Rebuild::Steps(void *data
)
401 // use async call to enable job call protection that time events lack
402 CallJobHere(47, 5, static_cast<Rebuild
*>(data
), Rock::Rebuild
, steps
);
406 Rock::Rebuild::steps()
417 Rock::Rebuild::loadingSteps()
419 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
420 dbOffset
<< " <= " << dbSize
);
422 // Balance our desire to maximize the number of entries processed at once
423 // (and, hence, minimize overheads and total rebuild time) with a
424 // requirement to also process Coordinator events, disk I/Os, etc.
425 const int maxSpentMsec
= 50; // keep small: most RAM I/Os are under 1ms
426 const timeval loopStart
= current_time
;
429 while (!doneLoading()) {
431 dbOffset
+= dbSlotSize
;
435 if (counts
.scancount
% 1000 == 0)
436 storeRebuildProgress(sd
->index
, dbSlotLimit
, counts
.scancount
);
438 if (opt_foreground_rebuild
)
439 continue; // skip "few entries at a time" check below
442 const double elapsedMsec
= tvSubMsec(loopStart
, current_time
);
443 if (elapsedMsec
> maxSpentMsec
|| elapsedMsec
< 0) {
444 debugs(47, 5, HERE
<< "pausing after " << loaded
<< " entries in " <<
445 elapsedMsec
<< "ms; " << (elapsedMsec
/loaded
) << "ms per entry");
452 Rock::Rebuild::loadingEntry(const sfileno fileNo
)
454 Must(0 <= fileNo
&& fileNo
< dbEntryLimit
);
455 return LoadingEntry(fileNo
, *parts
);
459 Rock::Rebuild::loadingSlot(const SlotId slotId
)
461 Must(0 <= slotId
&& slotId
< dbSlotLimit
);
462 Must(slotId
<= loadingPos
); // cannot look ahead
463 return LoadingSlot(slotId
, *parts
);
467 Rock::Rebuild::loadOneSlot()
469 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
470 dbOffset
<< " <= " << dbSize
);
472 // increment before loadingPos to avoid getting stuck at a slot
473 // in a case of crash
476 if (lseek(fd
, dbOffset
, SEEK_SET
) < 0)
477 failure("cannot seek to db entry", errno
);
481 if (!storeRebuildLoadEntry(fd
, sd
->index
, buf
, counts
))
484 const SlotId slotId
= loadingPos
;
488 if (buf
.contentSize() < static_cast<mb_size_t
>(sizeof(header
))) {
489 debugs(47, DBG_IMPORTANT
, "WARNING: cache_dir[" << sd
->index
<< "]: " <<
490 "Ignoring truncated " << buf
.contentSize() << "-byte " <<
491 "cache entry meta data at " << dbOffset
);
492 freeUnusedSlot(slotId
, true);
495 memcpy(&header
, buf
.content(), sizeof(header
));
496 if (header
.empty()) {
497 freeUnusedSlot(slotId
, false);
500 if (!header
.sane(dbSlotSize
, dbSlotLimit
)) {
501 debugs(47, DBG_IMPORTANT
, "WARNING: cache_dir[" << sd
->index
<< "]: " <<
502 "Ignoring malformed cache entry meta data at " << dbOffset
);
503 freeUnusedSlot(slotId
, true);
506 buf
.consume(sizeof(header
)); // optimize to avoid memmove()
508 useNewSlot(slotId
, header
);
511 /// parse StoreEntry basics and add them to the map, returning true on success
513 Rock::Rebuild::importEntry(Ipc::StoreMapAnchor
&anchor
, const sfileno fileno
, const DbCellHeader
&header
)
515 cache_key key
[SQUID_MD5_DIGEST_LENGTH
];
517 const uint64_t knownSize
= header
.entrySize
> 0 ?
518 header
.entrySize
: anchor
.basics
.swap_file_sz
.load();
519 if (!storeRebuildParseEntry(buf
, loadedE
, key
, counts
, knownSize
))
522 // the entry size may be unknown, but if it is known, it is authoritative
524 debugs(47, 8, "importing basics for entry " << fileno
<<
525 " inode.entrySize: " << header
.entrySize
<<
526 " swap_file_sz: " << loadedE
.swap_file_sz
);
529 // we have not validated whether all db cells for this entry were loaded
530 EBIT_CLR(anchor
.basics
.flags
, ENTRY_VALIDATED
);
538 Rock::Rebuild::validationSteps()
540 debugs(47, 5, sd
->index
<< " validating from " << validationPos
);
542 // see loadingSteps() for the rationale; TODO: avoid duplication
543 const int maxSpentMsec
= 50; // keep small: validation does not do I/O
544 const timeval loopStart
= current_time
;
546 int64_t validated
= 0;
547 while (!doneValidating()) {
548 // increment before validationPos to avoid getting stuck at a slot
549 // in a case of crash
550 ++counts
.validations
;
551 if (validationPos
< dbEntryLimit
)
552 validateOneEntry(validationPos
);
554 validateOneSlot(validationPos
- dbEntryLimit
);
558 if (validationPos
% 1000 == 0)
559 debugs(20, 2, "validated: " << validationPos
);
561 if (opt_foreground_rebuild
)
562 continue; // skip "few entries at a time" check below
565 const double elapsedMsec
= tvSubMsec(loopStart
, current_time
);
566 if (elapsedMsec
> maxSpentMsec
|| elapsedMsec
< 0) {
567 debugs(47, 5, "pausing after " << validated
<< " entries in " <<
568 elapsedMsec
<< "ms; " << (elapsedMsec
/validated
) << "ms per entry");
574 /// Either make the entry accessible to all or throw.
575 /// This method assumes it is called only when no more entry slots are expected.
577 Rock::Rebuild::finalizeOrThrow(const sfileno fileNo
, LoadingEntry
&le
)
579 // walk all map-linked slots, starting from inode, and mark each
580 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileNo
);
581 Must(le
.size
> 0); // paranoid
582 uint64_t mappedSize
= 0;
583 SlotId slotId
= anchor
.start
;
584 while (slotId
>= 0 && mappedSize
< le
.size
) {
585 LoadingSlot slot
= loadingSlot(slotId
); // throws if we have not loaded that slot
586 Must(!slot
.finalized()); // no loops or stealing from other entries
587 Must(slot
.mapped()); // all our slots should be in the sd->map
588 Must(!slot
.freed()); // all our slots should still be present
589 slot
.finalized(true);
591 Ipc::StoreMapSlice
&mapSlice
= sd
->map
->writeableSlice(fileNo
, slotId
);
592 Must(mapSlice
.size
> 0); // paranoid
593 mappedSize
+= mapSlice
.size
;
594 slotId
= mapSlice
.next
;
596 /* no hodgepodge entries: one entry - one full chain and no leftovers */
598 Must(mappedSize
== le
.size
);
600 if (!anchor
.basics
.swap_file_sz
)
601 anchor
.basics
.swap_file_sz
= le
.size
;
602 EBIT_SET(anchor
.basics
.flags
, ENTRY_VALIDATED
);
603 le
.state(LoadingEntry::leLoaded
);
604 sd
->map
->closeForWriting(fileNo
);
608 /// Either make the entry accessible to all or free it.
609 /// This method must only be called when no more entry slots are expected.
611 Rock::Rebuild::finalizeOrFree(const sfileno fileNo
, LoadingEntry
&le
)
614 finalizeOrThrow(fileNo
, le
);
615 } catch (const std::exception
&ex
) {
616 freeBadEntry(fileNo
, ex
.what());
621 Rock::Rebuild::validateOneEntry(const sfileno fileNo
)
623 LoadingEntry entry
= loadingEntry(fileNo
);
624 switch (entry
.state()) {
626 case LoadingEntry::leLoading
:
627 finalizeOrFree(fileNo
, entry
);
630 case LoadingEntry::leEmpty
: // no entry hashed to this position
631 case LoadingEntry::leLoaded
: // we have already unlocked this entry
632 case LoadingEntry::leCorrupted
: // we have already removed this entry
633 case LoadingEntry::leIgnored
: // we have already discarded this entry
639 Rock::Rebuild::validateOneSlot(const SlotId slotId
)
641 const LoadingSlot slot
= loadingSlot(slotId
);
642 // there should not be any unprocessed slots left
643 Must(slot
.freed() || (slot
.mapped() && slot
.finalized()));
646 /// Marks remaining bad entry slots as free and unlocks the entry. The map
647 /// cannot do this because Loading entries may have holes in the slots chain.
649 Rock::Rebuild::freeBadEntry(const sfileno fileno
, const char *eDescription
)
651 debugs(47, 2, "cache_dir #" << sd
->index
<< ' ' << eDescription
<<
652 " entry " << fileno
<< " is ignored during rebuild");
654 LoadingEntry le
= loadingEntry(fileno
);
655 le
.state(LoadingEntry::leCorrupted
);
657 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileno
);
658 assert(anchor
.start
< 0 || le
.size
> 0);
659 for (SlotId slotId
= anchor
.start
; slotId
>= 0;) {
660 const SlotId next
= loadingSlot(slotId
).more
;
661 freeSlot(slotId
, true);
665 sd
->map
->forgetWritingEntry(fileno
);
669 Rock::Rebuild::swanSong()
671 debugs(47,3, HERE
<< "cache_dir #" << sd
->index
<< " rebuild level: " <<
672 StoreController::store_dirs_rebuilding
);
673 storeRebuildComplete(&counts
);
677 Rock::Rebuild::failure(const char *msg
, int errNo
)
679 debugs(47,5, sd
->index
<< " slot " << loadingPos
<< " at " <<
680 dbOffset
<< " <= " << dbSize
);
683 debugs(47, DBG_CRITICAL
, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo
));
684 debugs(47, DBG_CRITICAL
, "Do you need to run 'squid -z' to initialize storage?");
687 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
688 sd
->index
, sd
->filePath
, msg
);
691 /// adds slot to the free slot index
693 Rock::Rebuild::freeSlot(const SlotId slotId
, const bool invalid
)
695 debugs(47,5, sd
->index
<< " frees slot " << slotId
);
696 LoadingSlot slot
= loadingSlot(slotId
);
697 assert(!slot
.freed());
702 //sd->unlink(fileno); leave garbage on disk, it should not hurt
705 Ipc::Mem::PageId pageId
;
706 pageId
.pool
= Ipc::Mem::PageStack::IdForSwapDirSpace(sd
->index
);
707 pageId
.number
= slotId
+1;
708 sd
->freeSlots
->push(pageId
);
711 /// freeSlot() for never-been-mapped slots
713 Rock::Rebuild::freeUnusedSlot(const SlotId slotId
, const bool invalid
)
715 LoadingSlot slot
= loadingSlot(slotId
);
716 // mapped slots must be freed via freeBadEntry() to keep the map in sync
717 assert(!slot
.mapped());
718 freeSlot(slotId
, invalid
);
721 /// adds slot to the entry chain in the map
723 Rock::Rebuild::mapSlot(const SlotId slotId
, const DbCellHeader
&header
)
725 LoadingSlot slot
= loadingSlot(slotId
);
726 assert(!slot
.mapped());
727 assert(!slot
.freed());
730 Ipc::StoreMapSlice slice
;
731 slice
.next
= header
.nextSlot
;
732 slice
.size
= header
.payloadSize
;
733 sd
->map
->importSlice(slotId
, slice
);
736 template <class SlotIdType
> // accommodates atomic and simple SlotIds.
738 Rock::Rebuild::chainSlots(SlotIdType
&from
, const SlotId to
)
740 LoadingSlot slot
= loadingSlot(to
);
741 assert(slot
.more
< 0);
742 slot
.more
= from
; // may still be unset
746 /// adds slot to an existing entry chain; caller must check that the slot
747 /// belongs to the chain it is being added to
749 Rock::Rebuild::addSlotToEntry(const sfileno fileno
, const SlotId slotId
, const DbCellHeader
&header
)
751 LoadingEntry le
= loadingEntry(fileno
);
752 Ipc::StoreMapAnchor
&anchor
= sd
->map
->writeableEntry(fileno
);
754 debugs(47,9, "adding " << slotId
<< " to entry " << fileno
);
755 // we do not need to preserve the order
757 LoadingSlot inode
= loadingSlot(anchor
.start
);
758 chainSlots(inode
.more
, slotId
);
760 chainSlots(anchor
.start
, slotId
);
763 le
.size
+= header
.payloadSize
; // must precede freeBadEntry() calls
765 if (header
.firstSlot
== slotId
) {
766 debugs(47,5, "added inode");
768 if (le
.anchored()) { // we have already added another inode slot
769 freeBadEntry(fileno
, "inode conflict");
776 if (!importEntry(anchor
, fileno
, header
)) {
777 freeBadEntry(fileno
, "corrupted metainfo");
781 // set total entry size and/or check it for consistency
782 if (const uint64_t totalSize
= header
.entrySize
) {
783 assert(totalSize
!= static_cast<uint64_t>(-1));
784 if (!anchor
.basics
.swap_file_sz
) {
785 anchor
.basics
.swap_file_sz
= totalSize
;
786 assert(anchor
.basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
787 } else if (totalSize
!= anchor
.basics
.swap_file_sz
) {
788 freeBadEntry(fileno
, "size mismatch");
794 const uint64_t totalSize
= anchor
.basics
.swap_file_sz
; // may be 0/unknown
796 if (totalSize
> 0 && le
.size
> totalSize
) { // overflow
797 debugs(47, 8, "overflow: " << le
.size
<< " > " << totalSize
);
798 freeBadEntry(fileno
, "overflowing");
802 mapSlot(slotId
, header
);
803 if (totalSize
> 0 && le
.size
== totalSize
)
804 finalizeOrFree(fileno
, le
); // entry is probably fully loaded now
807 /// initialize housekeeping information for a newly accepted entry
809 Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor
&anchor
, const sfileno fileno
, const DbCellHeader
&header
)
811 anchor
.setKey(reinterpret_cast<const cache_key
*>(header
.key
));
812 assert(header
.firstSlot
>= 0);
813 anchor
.start
= -1; // addSlotToEntry() will set it
815 assert(anchor
.basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
817 LoadingEntry le
= loadingEntry(fileno
);
818 le
.state(LoadingEntry::leLoading
);
819 le
.version
= header
.version
;
823 /// handle a slot from an entry that we have not seen before
825 Rock::Rebuild::startNewEntry(const sfileno fileno
, const SlotId slotId
, const DbCellHeader
&header
)
827 // A miss may have been stored at our fileno while we were loading other
828 // slots from disk. We ought to preserve that entry because it is fresher.
829 const bool overwriteExisting
= false;
830 if (Ipc::StoreMap::Anchor
*anchor
= sd
->map
->openForWritingAt(fileno
, overwriteExisting
)) {
831 primeNewEntry(*anchor
, fileno
, header
);
832 addSlotToEntry(fileno
, slotId
, header
); // may fail
833 assert(anchor
->basics
.swap_file_sz
!= static_cast<uint64_t>(-1));
835 // A new from-network entry is occupying our map slot; let it be, but
836 // save us from the trouble of going through the above motions again.
837 LoadingEntry le
= loadingEntry(fileno
);
838 le
.state(LoadingEntry::leIgnored
);
839 freeUnusedSlot(slotId
, false);
843 /// does the header belong to the fileno entry being loaded?
845 Rock::Rebuild::sameEntry(const sfileno fileno
, const DbCellHeader
&header
) const
847 // Header updates always result in multi-start chains and often
848 // result in multi-version chains so we can only compare the keys.
849 const Ipc::StoreMap::Anchor
&anchor
= sd
->map
->writeableEntry(fileno
);
850 return anchor
.sameKey(reinterpret_cast<const cache_key
*>(header
.key
));
853 /// handle freshly loaded (and validated) db slot header
855 Rock::Rebuild::useNewSlot(const SlotId slotId
, const DbCellHeader
&header
)
857 const cache_key
*const key
=
858 reinterpret_cast<const cache_key
*>(header
.key
);
859 const sfileno fileno
= sd
->map
->fileNoByKey(key
);
860 assert(0 <= fileno
&& fileno
< dbEntryLimit
);
862 LoadingEntry le
= loadingEntry(fileno
);
863 debugs(47,9, "entry " << fileno
<< " state: " << le
.state() << ", inode: " <<
864 header
.firstSlot
<< ", size: " << header
.payloadSize
);
866 switch (le
.state()) {
868 case LoadingEntry::leEmpty
: {
869 startNewEntry(fileno
, slotId
, header
);
873 case LoadingEntry::leLoading
: {
874 if (sameEntry(fileno
, header
)) {
875 addSlotToEntry(fileno
, slotId
, header
); // may fail
877 // either the loading chain or this slot is stale;
878 // be conservative and ignore both (and any future ones)
879 freeBadEntry(fileno
, "duplicated");
880 freeUnusedSlot(slotId
, true);
886 case LoadingEntry::leLoaded
: {
887 // either the previously loaded chain or this slot is stale;
888 // be conservative and ignore both (and any future ones)
889 le
.state(LoadingEntry::leCorrupted
);
890 sd
->map
->freeEntry(fileno
); // may not be immediately successful
891 freeUnusedSlot(slotId
, true);
896 case LoadingEntry::leCorrupted
: {
897 // previously seen slots messed things up so we must ignore this one
898 freeUnusedSlot(slotId
, true);
902 case LoadingEntry::leIgnored
: {
903 // already replaced by a fresher or colliding from-network entry
904 freeUnusedSlot(slotId
, false);
911 Rock::Rebuild::progressDescription() const
915 str
<< Debug::Extra
<< "slots loaded: " << Progress(loadingPos
, dbSlotLimit
);
917 const auto validatingEntries
= validationPos
< dbEntryLimit
;
918 const auto entriesValidated
= validatingEntries
? validationPos
: dbEntryLimit
;
919 str
<< Debug::Extra
<< "entries validated: " << Progress(entriesValidated
, dbEntryLimit
);
920 if (opt_store_doublecheck
) {
921 const auto slotsValidated
= validatingEntries
? 0 : (validationPos
- dbEntryLimit
);
922 str
<< Debug::Extra
<< "slots validated: " << Progress(slotsValidated
, dbSlotLimit
);