]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs/rock/RockRebuild.cc
SourceLayout: Move time related tools to time/libtime.la (#1001)
[thirdparty/squid.git] / src / fs / rock / RockRebuild.cc
1 /*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 79 Disk IO Routines */
10
11 #include "squid.h"
12 #include "base/AsyncJobCalls.h"
13 #include "debug/Messages.h"
14 #include "fs/rock/RockDbCell.h"
15 #include "fs/rock/RockRebuild.h"
16 #include "fs/rock/RockSwapDir.h"
17 #include "fs_io.h"
18 #include "globals.h"
19 #include "md5.h"
20 #include "sbuf/Stream.h"
21 #include "Store.h"
22 #include "tools.h"
23
24 #include <cerrno>
25
26 CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
27
28 /**
29 \defgroup RockFsRebuild Rock Store Rebuild
30 \ingroup Filesystems
31 *
32 \section RockFsRebuildOverview Overview
33 * Several layers of information are manipualted during the rebuild:
34 \par
35 * Store Entry: Response message plus all the metainformation associated with
36 * it. Identified by store key. At any given time, from Squid point
37 * of view, there is only one entry with a given key, but several
38 * different entries with the same key can be observed in any historical
39 * archive (such as an access log or a store database).
40 \par
41 * Slot chain: A sequence of db slots representing a Store Entry state at
42 * some point in time. Identified by key+version combination. Due to
43 * transaction aborts, crashes, and idle periods, some chains may contain
44 * incomplete or stale information. We assume that no two different chains
45 * have the same key and version. If that assumption fails, we may serve a
46 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
47 \par
48 * iNode: The very first db slot in an entry slot chain. This slot contains
49 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
50 * the entire metadata, HTTP headers, and HTTP body.
51 \par
52 * Db slot: A db record containing a piece of a single store entry and linked
53 * to other slots with the same key and version fields, forming a chain.
54 * Slots are identified by their absolute position in the database file,
55 * which is naturally unique.
56 \par
57 * When information from the newly loaded db slot contradicts the entry-level
58 * information collected so far (e.g., the versions do not match or the total
59 * chain size after the slot contribution exceeds the expected number), the
60 * whole entry (and not just the chain or the slot!) is declared corrupted.
61 \par
62 * Why invalidate the whole entry? Rock Store is written for high-load
63 * environments with large caches, where there is usually very few idle slots
64 * in the database. A space occupied by a purged entry is usually immediately
65 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
66 * leave a relatively large number of stale slots in the database. Thus, the
67 * number of potentially corrupted entries is relatively small. On the other
68 * hand, the damage from serving a single hadgepodge entry may be significant
69 * to the user. In such an environment, invalidating the whole entry has
70 * negligible performance impact but saves us from high-damage bugs.
71 */
72
73 namespace Rock
74 {
75
76 static bool
77 DoneLoading(const int64_t loadingPos, const int64_t dbSlotLimit)
78 {
79 return loadingPos >= dbSlotLimit;
80 }
81
82 static bool
83 DoneValidating(const int64_t validationPos, const int64_t dbSlotLimit, const int64_t dbEntryLimit)
84 {
85 // paranoid slot checking is only enabled with squid -S
86 const auto extraWork = opt_store_doublecheck ? dbSlotLimit : 0;
87 return validationPos >= (dbEntryLimit + extraWork);
88 }
89
90 /// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
91 class LoadingFlags
92 {
93 public:
94 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
95
96 /* for LoadingEntry */
97 uint8_t state:3; ///< current entry state (one of the LoadingEntry::State values)
98 uint8_t anchored:1; ///< whether we loaded the inode slot for this entry
99
100 /* for LoadingSlot */
101 uint8_t mapped:1; ///< whether the slot was added to a mapped entry
102 uint8_t finalized:1; ///< whether finalizeOrThrow() has scanned the slot
103 uint8_t freed:1; ///< whether the slot was given to the map as free space
104 };
105
106 /// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
107 class LoadingEntry
108 {
109 public:
110 LoadingEntry(const sfileno fileNo, LoadingParts &source);
111
112 uint64_t &size; ///< payload seen so far
113 uint32_t &version; ///< DbCellHeader::version to distinguish same-URL chains
114
115 /// possible store entry states during index rebuild
116 typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State;
117
118 /* LoadingFlags::state */
119 State state() const { return static_cast<State>(flags.state); }
120 void state(State aState) const { flags.state = aState; }
121
122 /* LoadingFlags::anchored */
123 bool anchored() const { return flags.anchored; }
124 void anchored(const bool beAnchored) { flags.anchored = beAnchored; }
125
126 private:
127 LoadingFlags &flags; ///< entry flags (see the above accessors) are ours
128 };
129
130 /// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
131 class LoadingSlot
132 {
133 public:
134 LoadingSlot(const SlotId slotId, LoadingParts &source);
135
136 /// another slot in some chain belonging to the same entry (unordered!)
137 Ipc::StoreMapSliceId &more;
138
139 /* LoadingFlags::mapped */
140 bool mapped() const { return flags.mapped; }
141 void mapped(const bool beMapped) { flags.mapped = beMapped; }
142
143 /* LoadingFlags::finalized */
144 bool finalized() const { return flags.finalized; }
145 void finalized(const bool beFinalized) { flags.finalized = beFinalized; }
146
147 /* LoadingFlags::freed */
148 bool freed() const { return flags.freed; }
149 void freed(const bool beFreed) { flags.freed = beFreed; }
150
151 bool used() const { return freed() || mapped() || more != -1; }
152
153 private:
154 LoadingFlags &flags; ///< slot flags (see the above accessors) are ours
155 };
156
157 /// information about store entries being loaded from disk (and their slots)
158 /// used for identifying partially stored/loaded entries
159 class LoadingParts
160 {
161 public:
162 using Sizes = Ipc::StoreMapItems<uint64_t>;
163 using Versions = Ipc::StoreMapItems<uint32_t>;
164 using Mores = Ipc::StoreMapItems<Ipc::StoreMapSliceId>;
165 using Flags = Ipc::StoreMapItems<LoadingFlags>;
166
167 LoadingParts(const SwapDir &dir, const bool resuming);
168 ~LoadingParts();
169
170 // lacking copying/moving code and often too huge to copy
171 LoadingParts(LoadingParts&&) = delete;
172
173 Sizes &sizes() const { return *sizesOwner->object(); }
174 Versions &versions() const { return *versionsOwner->object(); }
175 Mores &mores() const { return *moresOwner->object(); }
176 Flags &flags() const { return *flagsOwner->object(); }
177
178 private:
179 /* Anti-padding storage. With millions of entries, padding matters! */
180
181 /* indexed by sfileno */
182 Sizes::Owner *sizesOwner; ///< LoadingEntry::size for all entries
183 Versions::Owner *versionsOwner; ///< LoadingEntry::version for all entries
184
185 /* indexed by SlotId */
186 Mores::Owner *moresOwner; ///< LoadingSlot::more for all slots
187
188 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
189 Flags::Owner *flagsOwner; ///< all LoadingEntry and LoadingSlot flags
190 };
191
192 } /* namespace Rock */
193
194 /* LoadingEntry */
195
196 Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source):
197 size(source.sizes().at(fileNo)),
198 version(source.versions().at(fileNo)),
199 flags(source.flags().at(fileNo))
200 {
201 }
202
203 /* LoadingSlot */
204
205 Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source):
206 more(source.mores().at(slotId)),
207 flags(source.flags().at(slotId))
208 {
209 }
210
211 /* LoadingParts */
212
213 template <class T>
214 inline typename T::Owner *
215 createOwner(const char *dirPath, const char *sfx, const int64_t limit, const bool resuming)
216 {
217 auto id = Ipc::Mem::Segment::Name(SBuf(dirPath), sfx);
218 return resuming ? Ipc::Mem::Owner<T>::Old(id.c_str()) : shm_new(T)(id.c_str(), limit);
219 }
220
221 Rock::LoadingParts::LoadingParts(const SwapDir &dir, const bool resuming):
222 sizesOwner(createOwner<Sizes>(dir.path, "rebuild_sizes", dir.entryLimitActual(), resuming)),
223 versionsOwner(createOwner<Versions>(dir.path, "rebuild_versions", dir.entryLimitActual(), resuming)),
224 moresOwner(createOwner<Mores>(dir.path, "rebuild_mores", dir.slotLimitActual(), resuming)),
225 flagsOwner(createOwner<Flags>(dir.path, "rebuild_flags", dir.slotLimitActual(), resuming))
226 {
227 assert(sizes().capacity == versions().capacity); // every entry has both fields
228 assert(sizes().capacity <= mores().capacity); // every entry needs slot(s)
229 assert(mores().capacity == flags().capacity); // every slot needs a set of flags
230
231 if (!resuming) {
232 // other parts rely on shared memory segments being zero-initialized
233 // TODO: refactor the next slot pointer to use 0 for nil values
234 mores().fill(-1);
235 }
236 }
237
238 Rock::LoadingParts::~LoadingParts()
239 {
240 delete sizesOwner;
241 delete versionsOwner;
242 delete moresOwner;
243 delete flagsOwner;
244 }
245
246 /* Rock::Rebuild::Stats */
247
248 SBuf
249 Rock::Rebuild::Stats::Path(const char *dirPath)
250 {
251 return Ipc::Mem::Segment::Name(SBuf(dirPath), "rebuild_stats");
252 }
253
254 Ipc::Mem::Owner<Rock::Rebuild::Stats>*
255 Rock::Rebuild::Stats::Init(const SwapDir &dir)
256 {
257 return shm_new(Stats)(Path(dir.path).c_str());
258 }
259
260 bool
261 Rock::Rebuild::Stats::completed(const SwapDir &dir) const
262 {
263 return DoneLoading(counts.scancount, dir.slotLimitActual()) &&
264 DoneValidating(counts.validations, dir.slotLimitActual(), dir.entryLimitActual());
265 }
266
267 /* Rebuild */
268
269 bool
270 Rock::Rebuild::IsResponsible(const SwapDir &)
271 {
272 // in SMP mode, only the disker is responsible for populating the map
273 return !UsingSmp() || IamDiskProcess();
274 }
275
276 bool
277 Rock::Rebuild::Start(SwapDir &dir)
278 {
279 if (!IsResponsible(dir)) {
280 debugs(47, 2, "not responsible for indexing cache_dir #" <<
281 dir.index << " from " << dir.filePath);
282 return false;
283 }
284
285 const auto stats = shm_old(Rebuild::Stats)(Stats::Path(dir.path).c_str());
286 if (stats->completed(dir)) {
287 debugs(47, 2, "already indexed cache_dir #" <<
288 dir.index << " from " << dir.filePath);
289 return false;
290 }
291
292 AsyncJob::Start(new Rebuild(&dir, stats));
293 return true;
294 }
295
296 Rock::Rebuild::Rebuild(SwapDir *dir, const Ipc::Mem::Pointer<Stats> &s): AsyncJob("Rock::Rebuild"),
297 sd(dir),
298 parts(nullptr),
299 stats(s),
300 dbSize(0),
301 dbSlotSize(0),
302 dbSlotLimit(0),
303 dbEntryLimit(0),
304 fd(-1),
305 dbOffset(0),
306 loadingPos(stats->counts.scancount),
307 validationPos(stats->counts.validations),
308 counts(stats->counts),
309 resuming(stats->counts.started())
310 {
311 assert(sd);
312 dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
313 dbSlotSize = sd->slotSize;
314 dbEntryLimit = sd->entryLimitActual();
315 dbSlotLimit = sd->slotLimitActual();
316 assert(dbEntryLimit <= dbSlotLimit);
317 registerRunner();
318 }
319
320 Rock::Rebuild::~Rebuild()
321 {
322 if (fd >= 0)
323 file_close(fd);
324 // normally, segments are used until the Squid instance quits,
325 // but these indexing-only segments are no longer needed
326 delete parts;
327 }
328
329 void
330 Rock::Rebuild::startShutdown()
331 {
332 mustStop("startShutdown");
333 }
334
335 /// prepares and initiates entry loading sequence
336 void
337 Rock::Rebuild::start()
338 {
339 assert(IsResponsible(*sd));
340
341 if (!resuming) {
342 debugs(47, Important(18), "Loading cache_dir #" << sd->index <<
343 " from " << sd->filePath);
344 } else {
345 debugs(47, Important(63), "Resuming indexing cache_dir #" << sd->index <<
346 " from " << sd->filePath << ':' << progressDescription());
347 }
348
349 fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
350 if (fd < 0)
351 failure("cannot open db", errno);
352
353 char hdrBuf[SwapDir::HeaderSize];
354 if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize)
355 failure("cannot read db header", errno);
356
357 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
358 assert(sizeof(DbCellHeader) < SM_PAGE_SIZE);
359 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
360
361 dbOffset = SwapDir::HeaderSize + loadingPos * dbSlotSize;
362
363 assert(!parts);
364 parts = new LoadingParts(*sd, resuming);
365
366 counts.updateStartTime(current_time);
367
368 checkpoint();
369 }
370
371 /// continues after a pause if not done
372 void
373 Rock::Rebuild::checkpoint()
374 {
375 if (!done())
376 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
377 }
378
379 bool
380 Rock::Rebuild::doneLoading() const
381 {
382 return DoneLoading(loadingPos, dbSlotLimit);
383 }
384
385 bool
386 Rock::Rebuild::doneValidating() const
387 {
388 return DoneValidating(validationPos, dbSlotLimit, dbEntryLimit);
389 }
390
391 bool
392 Rock::Rebuild::doneAll() const
393 {
394 return doneLoading() && doneValidating() && AsyncJob::doneAll();
395 }
396
397 void
398 Rock::Rebuild::Steps(void *data)
399 {
400 // use async call to enable job call protection that time events lack
401 CallJobHere(47, 5, static_cast<Rebuild*>(data), Rock::Rebuild, steps);
402 }
403
404 void
405 Rock::Rebuild::steps()
406 {
407 if (!doneLoading())
408 loadingSteps();
409 else
410 validationSteps();
411
412 checkpoint();
413 }
414
415 void
416 Rock::Rebuild::loadingSteps()
417 {
418 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
419 dbOffset << " <= " << dbSize);
420
421 // Balance our desire to maximize the number of entries processed at once
422 // (and, hence, minimize overheads and total rebuild time) with a
423 // requirement to also process Coordinator events, disk I/Os, etc.
424 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
425 const timeval loopStart = current_time;
426
427 int64_t loaded = 0;
428 while (!doneLoading()) {
429 loadOneSlot();
430 dbOffset += dbSlotSize;
431 ++loadingPos;
432 ++loaded;
433
434 if (counts.scancount % 1000 == 0)
435 storeRebuildProgress(sd->index, dbSlotLimit, counts.scancount);
436
437 if (opt_foreground_rebuild)
438 continue; // skip "few entries at a time" check below
439
440 getCurrentTime();
441 const double elapsedMsec = tvSubMsec(loopStart, current_time);
442 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
443 debugs(47, 5, "pausing after " << loaded << " entries in " <<
444 elapsedMsec << "ms; " << (elapsedMsec/loaded) << "ms per entry");
445 break;
446 }
447 }
448 }
449
450 Rock::LoadingEntry
451 Rock::Rebuild::loadingEntry(const sfileno fileNo)
452 {
453 Must(0 <= fileNo && fileNo < dbEntryLimit);
454 return LoadingEntry(fileNo, *parts);
455 }
456
457 Rock::LoadingSlot
458 Rock::Rebuild::loadingSlot(const SlotId slotId)
459 {
460 Must(0 <= slotId && slotId < dbSlotLimit);
461 Must(slotId <= loadingPos); // cannot look ahead
462 return LoadingSlot(slotId, *parts);
463 }
464
465 void
466 Rock::Rebuild::loadOneSlot()
467 {
468 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
469 dbOffset << " <= " << dbSize);
470
471 // increment before loadingPos to avoid getting stuck at a slot
472 // in a case of crash
473 ++counts.scancount;
474
475 if (lseek(fd, dbOffset, SEEK_SET) < 0)
476 failure("cannot seek to db entry", errno);
477
478 buf.reset();
479
480 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
481 return;
482
483 const SlotId slotId = loadingPos;
484
485 // get our header
486 DbCellHeader header;
487 if (buf.contentSize() < static_cast<mb_size_t>(sizeof(header))) {
488 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
489 "Ignoring truncated " << buf.contentSize() << "-byte " <<
490 "cache entry meta data at " << dbOffset);
491 freeUnusedSlot(slotId, true);
492 return;
493 }
494 memcpy(&header, buf.content(), sizeof(header));
495 if (header.empty()) {
496 freeUnusedSlot(slotId, false);
497 return;
498 }
499 if (!header.sane(dbSlotSize, dbSlotLimit)) {
500 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
501 "Ignoring malformed cache entry meta data at " << dbOffset);
502 freeUnusedSlot(slotId, true);
503 return;
504 }
505 buf.consume(sizeof(header)); // optimize to avoid memmove()
506
507 useNewSlot(slotId, header);
508 }
509
510 /// parse StoreEntry basics and add them to the map, returning true on success
511 bool
512 Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header)
513 {
514 cache_key key[SQUID_MD5_DIGEST_LENGTH];
515 StoreEntry loadedE;
516 const uint64_t knownSize = header.entrySize > 0 ?
517 header.entrySize : anchor.basics.swap_file_sz.load();
518 if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize))
519 return false;
520
521 // the entry size may be unknown, but if it is known, it is authoritative
522
523 debugs(47, 8, "importing basics for entry " << fileno <<
524 " inode.entrySize: " << header.entrySize <<
525 " swap_file_sz: " << loadedE.swap_file_sz);
526 anchor.set(loadedE);
527
528 // we have not validated whether all db cells for this entry were loaded
529 EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED);
530
531 // loadedE->dump(5);
532
533 return true;
534 }
535
536 void
537 Rock::Rebuild::validationSteps()
538 {
539 debugs(47, 5, sd->index << " validating from " << validationPos);
540
541 // see loadingSteps() for the rationale; TODO: avoid duplication
542 const int maxSpentMsec = 50; // keep small: validation does not do I/O
543 const timeval loopStart = current_time;
544
545 int64_t validated = 0;
546 while (!doneValidating()) {
547 // increment before validationPos to avoid getting stuck at a slot
548 // in a case of crash
549 ++counts.validations;
550 if (validationPos < dbEntryLimit)
551 validateOneEntry(validationPos);
552 else
553 validateOneSlot(validationPos - dbEntryLimit);
554 ++validationPos;
555 ++validated;
556
557 if (validationPos % 1000 == 0)
558 debugs(20, 2, "validated: " << validationPos);
559
560 if (opt_foreground_rebuild)
561 continue; // skip "few entries at a time" check below
562
563 getCurrentTime();
564 const double elapsedMsec = tvSubMsec(loopStart, current_time);
565 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
566 debugs(47, 5, "pausing after " << validated << " entries in " <<
567 elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry");
568 break;
569 }
570 }
571 }
572
573 /// Either make the entry accessible to all or throw.
574 /// This method assumes it is called only when no more entry slots are expected.
575 void
576 Rock::Rebuild::finalizeOrThrow(const sfileno fileNo, LoadingEntry &le)
577 {
578 // walk all map-linked slots, starting from inode, and mark each
579 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileNo);
580 Must(le.size > 0); // paranoid
581 uint64_t mappedSize = 0;
582 SlotId slotId = anchor.start;
583 while (slotId >= 0 && mappedSize < le.size) {
584 LoadingSlot slot = loadingSlot(slotId); // throws if we have not loaded that slot
585 Must(!slot.finalized()); // no loops or stealing from other entries
586 Must(slot.mapped()); // all our slots should be in the sd->map
587 Must(!slot.freed()); // all our slots should still be present
588 slot.finalized(true);
589
590 Ipc::StoreMapSlice &mapSlice = sd->map->writeableSlice(fileNo, slotId);
591 Must(mapSlice.size > 0); // paranoid
592 mappedSize += mapSlice.size;
593 slotId = mapSlice.next;
594 }
595 /* no hodgepodge entries: one entry - one full chain and no leftovers */
596 Must(slotId < 0);
597 Must(mappedSize == le.size);
598
599 if (!anchor.basics.swap_file_sz)
600 anchor.basics.swap_file_sz = le.size;
601 EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED);
602 le.state(LoadingEntry::leLoaded);
603 sd->map->closeForWriting(fileNo);
604 ++counts.objcount;
605 }
606
607 /// Either make the entry accessible to all or free it.
608 /// This method must only be called when no more entry slots are expected.
609 void
610 Rock::Rebuild::finalizeOrFree(const sfileno fileNo, LoadingEntry &le)
611 {
612 try {
613 finalizeOrThrow(fileNo, le);
614 } catch (const std::exception &ex) {
615 freeBadEntry(fileNo, ex.what());
616 }
617 }
618
619 void
620 Rock::Rebuild::validateOneEntry(const sfileno fileNo)
621 {
622 LoadingEntry entry = loadingEntry(fileNo);
623 switch (entry.state()) {
624
625 case LoadingEntry::leLoading:
626 finalizeOrFree(fileNo, entry);
627 break;
628
629 case LoadingEntry::leEmpty: // no entry hashed to this position
630 case LoadingEntry::leLoaded: // we have already unlocked this entry
631 case LoadingEntry::leCorrupted: // we have already removed this entry
632 case LoadingEntry::leIgnored: // we have already discarded this entry
633 break;
634 }
635 }
636
637 void
638 Rock::Rebuild::validateOneSlot(const SlotId slotId)
639 {
640 const LoadingSlot slot = loadingSlot(slotId);
641 // there should not be any unprocessed slots left
642 Must(slot.freed() || (slot.mapped() && slot.finalized()));
643 }
644
645 /// Marks remaining bad entry slots as free and unlocks the entry. The map
646 /// cannot do this because Loading entries may have holes in the slots chain.
647 void
648 Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription)
649 {
650 debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription <<
651 " entry " << fileno << " is ignored during rebuild");
652
653 LoadingEntry le = loadingEntry(fileno);
654 le.state(LoadingEntry::leCorrupted);
655
656 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
657 assert(anchor.start < 0 || le.size > 0);
658 for (SlotId slotId = anchor.start; slotId >= 0;) {
659 const SlotId next = loadingSlot(slotId).more;
660 freeSlot(slotId, true);
661 slotId = next;
662 }
663
664 sd->map->forgetWritingEntry(fileno);
665 }
666
667 void
668 Rock::Rebuild::swanSong()
669 {
670 debugs(47,3, "cache_dir #" << sd->index << " rebuild level: " <<
671 StoreController::store_dirs_rebuilding);
672 storeRebuildComplete(&counts);
673 }
674
675 void
676 Rock::Rebuild::failure(const char *msg, int errNo)
677 {
678 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
679 dbOffset << " <= " << dbSize);
680
681 if (errNo)
682 debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo));
683 debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?");
684
685 assert(sd);
686 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
687 sd->index, sd->filePath, msg);
688 }
689
690 /// adds slot to the free slot index
691 void
692 Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid)
693 {
694 debugs(47,5, sd->index << " frees slot " << slotId);
695 LoadingSlot slot = loadingSlot(slotId);
696 assert(!slot.freed());
697 slot.freed(true);
698
699 if (invalid) {
700 ++counts.invalid;
701 //sd->unlink(fileno); leave garbage on disk, it should not hurt
702 }
703
704 Ipc::Mem::PageId pageId;
705 pageId.pool = Ipc::Mem::PageStack::IdForSwapDirSpace(sd->index);
706 pageId.number = slotId+1;
707 sd->freeSlots->push(pageId);
708 }
709
710 /// freeSlot() for never-been-mapped slots
711 void
712 Rock::Rebuild::freeUnusedSlot(const SlotId slotId, const bool invalid)
713 {
714 LoadingSlot slot = loadingSlot(slotId);
715 // mapped slots must be freed via freeBadEntry() to keep the map in sync
716 assert(!slot.mapped());
717 freeSlot(slotId, invalid);
718 }
719
720 /// adds slot to the entry chain in the map
721 void
722 Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header)
723 {
724 LoadingSlot slot = loadingSlot(slotId);
725 assert(!slot.mapped());
726 assert(!slot.freed());
727 slot.mapped(true);
728
729 Ipc::StoreMapSlice slice;
730 slice.next = header.nextSlot;
731 slice.size = header.payloadSize;
732 sd->map->importSlice(slotId, slice);
733 }
734
735 template <class SlotIdType> // accommodates atomic and simple SlotIds.
736 void
737 Rock::Rebuild::chainSlots(SlotIdType &from, const SlotId to)
738 {
739 LoadingSlot slot = loadingSlot(to);
740 assert(slot.more < 0);
741 slot.more = from; // may still be unset
742 from = to;
743 }
744
745 /// adds slot to an existing entry chain; caller must check that the slot
746 /// belongs to the chain it is being added to
747 void
748 Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
749 {
750 LoadingEntry le = loadingEntry(fileno);
751 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
752
753 debugs(47,9, "adding " << slotId << " to entry " << fileno);
754 // we do not need to preserve the order
755 if (le.anchored()) {
756 LoadingSlot inode = loadingSlot(anchor.start);
757 chainSlots(inode.more, slotId);
758 } else {
759 chainSlots(anchor.start, slotId);
760 }
761
762 le.size += header.payloadSize; // must precede freeBadEntry() calls
763
764 if (header.firstSlot == slotId) {
765 debugs(47,5, "added inode");
766
767 if (le.anchored()) { // we have already added another inode slot
768 freeBadEntry(fileno, "inode conflict");
769 ++counts.clashcount;
770 return;
771 }
772
773 le.anchored(true);
774
775 if (!importEntry(anchor, fileno, header)) {
776 freeBadEntry(fileno, "corrupted metainfo");
777 return;
778 }
779
780 // set total entry size and/or check it for consistency
781 if (const uint64_t totalSize = header.entrySize) {
782 assert(totalSize != static_cast<uint64_t>(-1));
783 if (!anchor.basics.swap_file_sz) {
784 anchor.basics.swap_file_sz = totalSize;
785 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
786 } else if (totalSize != anchor.basics.swap_file_sz) {
787 freeBadEntry(fileno, "size mismatch");
788 return;
789 }
790 }
791 }
792
793 const uint64_t totalSize = anchor.basics.swap_file_sz; // may be 0/unknown
794
795 if (totalSize > 0 && le.size > totalSize) { // overflow
796 debugs(47, 8, "overflow: " << le.size << " > " << totalSize);
797 freeBadEntry(fileno, "overflowing");
798 return;
799 }
800
801 mapSlot(slotId, header);
802 if (totalSize > 0 && le.size == totalSize)
803 finalizeOrFree(fileno, le); // entry is probably fully loaded now
804 }
805
806 /// initialize housekeeping information for a newly accepted entry
807 void
808 Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header)
809 {
810 anchor.setKey(reinterpret_cast<const cache_key*>(header.key));
811 assert(header.firstSlot >= 0);
812 anchor.start = -1; // addSlotToEntry() will set it
813
814 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
815
816 LoadingEntry le = loadingEntry(fileno);
817 le.state(LoadingEntry::leLoading);
818 le.version = header.version;
819 le.size = 0;
820 }
821
822 /// handle a slot from an entry that we have not seen before
823 void
824 Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
825 {
826 // A miss may have been stored at our fileno while we were loading other
827 // slots from disk. We ought to preserve that entry because it is fresher.
828 const bool overwriteExisting = false;
829 if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) {
830 primeNewEntry(*anchor, fileno, header);
831 addSlotToEntry(fileno, slotId, header); // may fail
832 assert(anchor->basics.swap_file_sz != static_cast<uint64_t>(-1));
833 } else {
834 // A new from-network entry is occupying our map slot; let it be, but
835 // save us from the trouble of going through the above motions again.
836 LoadingEntry le = loadingEntry(fileno);
837 le.state(LoadingEntry::leIgnored);
838 freeUnusedSlot(slotId, false);
839 }
840 }
841
842 /// does the header belong to the fileno entry being loaded?
843 bool
844 Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const
845 {
846 // Header updates always result in multi-start chains and often
847 // result in multi-version chains so we can only compare the keys.
848 const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno);
849 return anchor.sameKey(reinterpret_cast<const cache_key*>(header.key));
850 }
851
852 /// handle freshly loaded (and validated) db slot header
853 void
854 Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header)
855 {
856 const cache_key *const key =
857 reinterpret_cast<const cache_key*>(header.key);
858 const sfileno fileno = sd->map->fileNoByKey(key);
859 assert(0 <= fileno && fileno < dbEntryLimit);
860
861 LoadingEntry le = loadingEntry(fileno);
862 debugs(47,9, "entry " << fileno << " state: " << le.state() << ", inode: " <<
863 header.firstSlot << ", size: " << header.payloadSize);
864
865 switch (le.state()) {
866
867 case LoadingEntry::leEmpty: {
868 startNewEntry(fileno, slotId, header);
869 break;
870 }
871
872 case LoadingEntry::leLoading: {
873 if (sameEntry(fileno, header)) {
874 addSlotToEntry(fileno, slotId, header); // may fail
875 } else {
876 // either the loading chain or this slot is stale;
877 // be conservative and ignore both (and any future ones)
878 freeBadEntry(fileno, "duplicated");
879 freeUnusedSlot(slotId, true);
880 ++counts.dupcount;
881 }
882 break;
883 }
884
885 case LoadingEntry::leLoaded: {
886 // either the previously loaded chain or this slot is stale;
887 // be conservative and ignore both (and any future ones)
888 le.state(LoadingEntry::leCorrupted);
889 sd->map->freeEntry(fileno); // may not be immediately successful
890 freeUnusedSlot(slotId, true);
891 ++counts.dupcount;
892 break;
893 }
894
895 case LoadingEntry::leCorrupted: {
896 // previously seen slots messed things up so we must ignore this one
897 freeUnusedSlot(slotId, true);
898 break;
899 }
900
901 case LoadingEntry::leIgnored: {
902 // already replaced by a fresher or colliding from-network entry
903 freeUnusedSlot(slotId, false);
904 break;
905 }
906 }
907 }
908
909 SBuf
910 Rock::Rebuild::progressDescription() const
911 {
912 SBufStream str;
913
914 str << Debug::Extra << "slots loaded: " << Progress(loadingPos, dbSlotLimit);
915
916 const auto validatingEntries = validationPos < dbEntryLimit;
917 const auto entriesValidated = validatingEntries ? validationPos : dbEntryLimit;
918 str << Debug::Extra << "entries validated: " << Progress(entriesValidated, dbEntryLimit);
919 if (opt_store_doublecheck) {
920 const auto slotsValidated = validatingEntries ? 0 : (validationPos - dbEntryLimit);
921 str << Debug::Extra << "slots validated: " << Progress(slotsValidated, dbSlotLimit);
922 }
923
924 return str.buf();
925 }
926