]> git.ipfire.org Git - thirdparty/squid.git/blame - src/fs/rock/RockRebuild.cc
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / fs / rock / RockRebuild.cc
CommitLineData
e2851fe7 1/*
5b74111a 2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
e2851fe7
AR
7 */
8
bbc27441
AJ
9/* DEBUG: section 79 Disk IO Routines */
10
f7f3304a 11#include "squid.h"
2745fea5 12#include "base/AsyncJobCalls.h"
602d9612 13#include "fs/rock/RockDbCell.h"
e2851fe7
AR
14#include "fs/rock/RockRebuild.h"
15#include "fs/rock/RockSwapDir.h"
b3f7fd88 16#include "fs_io.h"
67679543 17#include "globals.h"
dcd84f80 18#include "ipc/StoreMap.h"
582c2af2 19#include "md5.h"
386d28bf 20#include "SquidTime.h"
2745fea5 21#include "Store.h"
b3f7fd88 22#include "store_rebuild.h"
602d9612 23#include "tools.h"
e2851fe7 24
1a30fdf5 25#include <cerrno>
21d845b1 26
e2851fe7
AR
27CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild);
28
e4d13993
AR
29/**
30 \defgroup RockFsRebuild Rock Store Rebuild
31 \ingroup Filesystems
32 *
33 \section Overview Overview
34 * Several layers of information are manipualted during the rebuild:
35 \par
36 * Store Entry: Response message plus all the metainformation associated with
37 * it. Identified by store key. At any given time, from Squid point
38 * of view, there is only one entry with a given key, but several
39 * different entries with the same key can be observed in any historical
40 * archive (such as an access log or a store database).
41 \par
42 * Slot chain: A sequence of db slots representing a Store Entry state at
43 * some point in time. Identified by key+version combination. Due to
44 * transaction aborts, crashes, and idle periods, some chains may contain
45 * incomplete or stale information. We assume that no two different chains
46 * have the same key and version. If that assumption fails, we may serve a
47 * hodgepodge entry during rebuild, until "extra" slots are loaded/noticed.
48 \par
abf396ec
AR
49 * iNode: The very first db slot in an entry slot chain. This slot contains
50 * at least the beginning of Store Entry metadata, but most 32KB inodes contain
51 * the entire metadata, HTTP headers, and HTTP body.
52 \par
e4d13993
AR
53 * Db slot: A db record containing a piece of a single store entry and linked
54 * to other slots with the same key and version fields, forming a chain.
55 * Slots are identified by their absolute position in the database file,
56 * which is naturally unique.
57 \par
e4d13993
AR
58 * When information from the newly loaded db slot contradicts the entry-level
59 * information collected so far (e.g., the versions do not match or the total
60 * chain size after the slot contribution exceeds the expected number), the
61 * whole entry (and not just the chain or the slot!) is declared corrupted.
62 \par
63 * Why invalidate the whole entry? Rock Store is written for high-load
64 * environments with large caches, where there is usually very few idle slots
65 * in the database. A space occupied by a purged entry is usually immediately
66 * reclaimed. A Squid crash or a transaction abort is rather unlikely to
67 * leave a relatively large number of stale slots in the database. Thus, the
68 * number of potentially corrupted entries is relatively small. On the other
69 * hand, the damage from serving a single hadgepodge entry may be significant
70 * to the user. In such an environment, invalidating the whole entry has
71 * negligible performance impact but saves us from high-damage bugs.
72 */
73
9d4e9cfb
AR
74namespace Rock
75{
50dc81ec 76
abf396ec
AR
77/// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags
78class LoadingFlags
9d4e9cfb 79{
50dc81ec 80public:
abf396ec 81 LoadingFlags(): state(0), anchored(0), mapped(0), finalized(0), freed(0) {}
50dc81ec 82
abf396ec
AR
83 /* for LoadingEntry */
84 uint8_t state:3; ///< current entry state (one of the LoadingEntry::State values)
36c84e19 85 uint8_t anchored:1; ///< whether we loaded the inode slot for this entry
50dc81ec 86
abf396ec
AR
87 /* for LoadingSlot */
88 uint8_t mapped:1; ///< whether the slot was added to a mapped entry
89 uint8_t finalized:1; ///< whether finalizeOrThrow() has scanned the slot
90 uint8_t freed:1; ///< whether the slot was given to the map as free space
91};
92
93/// smart StoreEntry-level info pointer (hides anti-padding LoadingParts arrays)
94class LoadingEntry
95{
96public:
97 LoadingEntry(const sfileno fileNo, LoadingParts &source);
98
99 uint64_t &size; ///< payload seen so far
100 uint32_t &version; ///< DbCellHeader::version to distinguish same-URL chains
50dc81ec 101
abf396ec 102 /// possible store entry states during index rebuild
50dc81ec 103 typedef enum { leEmpty = 0, leLoading, leLoaded, leCorrupted, leIgnored } State;
abf396ec
AR
104
105 /* LoadingFlags::state */
106 State state() const { return static_cast<State>(flags.state); }
107 void state(State aState) const { flags.state = aState; }
108
109 /* LoadingFlags::anchored */
110 bool anchored() const { return flags.anchored; }
111 void anchored(const bool beAnchored) { flags.anchored = beAnchored; }
112
113private:
114 LoadingFlags &flags; ///< entry flags (see the above accessors) are ours
115};
116
117/// smart db slot-level info pointer (hides anti-padding LoadingParts arrays)
118class LoadingSlot
119{
120public:
121 LoadingSlot(const SlotId slotId, LoadingParts &source);
122
123 /// another slot in some chain belonging to the same entry (unordered!)
124 Ipc::StoreMapSliceId &more;
125
126 /* LoadingFlags::mapped */
127 bool mapped() const { return flags.mapped; }
128 void mapped(const bool beMapped) { flags.mapped = beMapped; }
129
130 /* LoadingFlags::finalized */
131 bool finalized() const { return flags.finalized; }
132 void finalized(const bool beFinalized) { flags.finalized = beFinalized; }
133
134 /* LoadingFlags::freed */
135 bool freed() const { return flags.freed; }
136 void freed(const bool beFreed) { flags.freed = beFreed; }
137
138 bool used() const { return freed() || mapped() || more != -1; }
139
140private:
141 LoadingFlags &flags; ///< slot flags (see the above accessors) are ours
142};
143
144/// information about store entries being loaded from disk (and their slots)
145/// used for identifying partially stored/loaded entries
146class LoadingParts
147{
148public:
149 LoadingParts(int dbSlotLimit, int dbEntryLimit);
150 LoadingParts(LoadingParts&&) = delete; // paranoid (often too huge to copy)
151
152private:
153 friend class LoadingEntry;
154 friend class LoadingSlot;
155
156 /* Anti-padding storage. With millions of entries, padding matters! */
157
158 /* indexed by sfileno */
159 std::vector<uint64_t> sizes; ///< LoadingEntry::size for all entries
160 std::vector<uint32_t> versions; ///< LoadingEntry::version for all entries
161
162 /* indexed by SlotId */
163 std::vector<Ipc::StoreMapSliceId> mores; ///< LoadingSlot::more for all slots
164
165 /* entry flags are indexed by sfileno; slot flags -- by SlotId */
166 std::vector<LoadingFlags> flags; ///< all LoadingEntry and LoadingSlot flags
50dc81ec
AR
167};
168
169} /* namespace Rock */
170
abf396ec
AR
171/* LoadingEntry */
172
173Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source):
174 size(source.sizes.at(fileNo)),
175 version(source.versions.at(fileNo)),
176 flags(source.flags.at(fileNo))
177{
178}
179
180/* LoadingSlot */
181
182Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source):
183 more(source.mores.at(slotId)),
184 flags(source.flags.at(slotId))
185{
186}
187
188/* LoadingParts */
189
190Rock::LoadingParts::LoadingParts(const int dbEntryLimit, const int dbSlotLimit):
191 sizes(dbEntryLimit, 0),
192 versions(dbEntryLimit, 0),
193 mores(dbSlotLimit, -1),
194 flags(dbSlotLimit)
195{
196 assert(sizes.size() == versions.size()); // every entry has both fields
197 assert(sizes.size() <= mores.size()); // every entry needs slot(s)
198 assert(mores.size() == flags.size()); // every slot needs a set of flags
199}
200
201/* Rebuild */
202
078274f6 203Rock::Rebuild::Rebuild(SwapDir *dir): AsyncJob("Rock::Rebuild"),
f53969cc 204 sd(dir),
abf396ec 205 parts(nullptr),
f53969cc
SM
206 dbSize(0),
207 dbSlotSize(0),
208 dbSlotLimit(0),
209 dbEntryLimit(0),
210 fd(-1),
211 dbOffset(0),
212 loadingPos(0),
213 validationPos(0)
e2851fe7
AR
214{
215 assert(sd);
216 memset(&counts, 0, sizeof(counts));
217 dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste
36c84e19
AR
218 dbSlotSize = sd->slotSize;
219 dbEntryLimit = sd->entryLimitActual();
220 dbSlotLimit = sd->slotLimitActual();
221 assert(dbEntryLimit <= dbSlotLimit);
e2851fe7
AR
222}
223
224Rock::Rebuild::~Rebuild()
225{
226 if (fd >= 0)
227 file_close(fd);
abf396ec 228 delete parts;
e2851fe7
AR
229}
230
231/// prepares and initiates entry loading sequence
232void
9199139f
AR
233Rock::Rebuild::start()
234{
078274f6
AR
235 // in SMP mode, only the disker is responsible for populating the map
236 if (UsingSmp() && !IamDiskProcess()) {
237 debugs(47, 2, "Non-disker skips rebuilding of cache_dir #" <<
9199139f 238 sd->index << " from " << sd->filePath);
078274f6
AR
239 mustStop("non-disker");
240 return;
241 }
242
095ec2b1
AR
243 debugs(47, DBG_IMPORTANT, "Loading cache_dir #" << sd->index <<
244 " from " << sd->filePath);
e2851fe7
AR
245
246 fd = file_open(sd->filePath, O_RDONLY | O_BINARY);
247 if (fd < 0)
248 failure("cannot open db", errno);
249
50dc81ec
AR
250 char hdrBuf[SwapDir::HeaderSize];
251 if (read(fd, hdrBuf, sizeof(hdrBuf)) != SwapDir::HeaderSize)
e2851fe7
AR
252 failure("cannot read db header", errno);
253
50dc81ec
AR
254 // slot prefix of SM_PAGE_SIZE should fit both core entry header and ours
255 assert(sizeof(DbCellHeader) < SM_PAGE_SIZE);
256 buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
257
e2851fe7 258 dbOffset = SwapDir::HeaderSize;
50dc81ec 259
abf396ec 260 parts = new LoadingParts(dbEntryLimit, dbSlotLimit);
e2851fe7
AR
261
262 checkpoint();
263}
264
078274f6 265/// continues after a pause if not done
e2851fe7
AR
266void
267Rock::Rebuild::checkpoint()
268{
50dc81ec 269 if (!done())
e2851fe7 270 eventAdd("Rock::Rebuild", Rock::Rebuild::Steps, this, 0.01, 1, true);
078274f6
AR
271}
272
abf396ec
AR
273bool
274Rock::Rebuild::doneLoading() const
275{
276 return loadingPos >= dbSlotLimit;
277}
278
279bool
280Rock::Rebuild::doneValidating() const
281{
282 // paranoid slot checking is only enabled with squid -S
283 return validationPos >= dbEntryLimit +
284 (opt_store_doublecheck ? dbSlotLimit : 0);
285}
286
078274f6
AR
287bool
288Rock::Rebuild::doneAll() const
289{
abf396ec 290 return doneLoading() && doneValidating() && AsyncJob::doneAll();
e2851fe7
AR
291}
292
293void
294Rock::Rebuild::Steps(void *data)
295{
078274f6
AR
296 // use async call to enable job call protection that time events lack
297 CallJobHere(47, 5, static_cast<Rebuild*>(data), Rock::Rebuild, steps);
e2851fe7
AR
298}
299
93910d5c 300void
50dc81ec 301Rock::Rebuild::steps()
93910d5c 302{
abf396ec 303 if (!doneLoading())
50dc81ec
AR
304 loadingSteps();
305 else
306 validationSteps();
307
308 checkpoint();
93910d5c
AR
309}
310
e2851fe7 311void
50dc81ec 312Rock::Rebuild::loadingSteps()
9199139f 313{
539283df 314 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 315 dbOffset << " <= " << dbSize);
e2851fe7 316
386d28bf 317 // Balance our desire to maximize the number of entries processed at once
9199139f 318 // (and, hence, minimize overheads and total rebuild time) with a
386d28bf
AR
319 // requirement to also process Coordinator events, disk I/Os, etc.
320 const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
321 const timeval loopStart = current_time;
322
323 int loaded = 0;
abf396ec 324 while (!doneLoading()) {
50dc81ec 325 loadOneSlot();
36c84e19 326 dbOffset += dbSlotSize;
6d68a230 327 ++loadingPos;
386d28bf 328 ++loaded;
e2851fe7
AR
329
330 if (counts.scancount % 1000 == 0)
36c84e19 331 storeRebuildProgress(sd->index, dbSlotLimit, counts.scancount);
386d28bf
AR
332
333 if (opt_foreground_rebuild)
334 continue; // skip "few entries at a time" check below
335
336 getCurrentTime();
337 const double elapsedMsec = tvSubMsec(loopStart, current_time);
338 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
339 debugs(47, 5, HERE << "pausing after " << loaded << " entries in " <<
9199139f 340 elapsedMsec << "ms; " << (elapsedMsec/loaded) << "ms per entry");
386d28bf
AR
341 break;
342 }
343 }
e2851fe7
AR
344}
345
abf396ec
AR
346Rock::LoadingEntry
347Rock::Rebuild::loadingEntry(const sfileno fileNo)
348{
349 Must(0 <= fileNo && fileNo < dbEntryLimit);
350 return LoadingEntry(fileNo, *parts);
351}
352
353Rock::LoadingSlot
354Rock::Rebuild::loadingSlot(const SlotId slotId)
355{
356 Must(0 <= slotId && slotId < dbSlotLimit);
357 Must(slotId <= loadingPos); // cannot look ahead
358 return LoadingSlot(slotId, *parts);
359}
360
93910d5c 361void
50dc81ec 362Rock::Rebuild::loadOneSlot()
93910d5c 363{
539283df 364 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 365 dbOffset << " <= " << dbSize);
e2851fe7 366
c728b6f9
AR
367 ++counts.scancount;
368
e2851fe7
AR
369 if (lseek(fd, dbOffset, SEEK_SET) < 0)
370 failure("cannot seek to db entry", errno);
371
50dc81ec 372 buf.reset();
c728b6f9
AR
373
374 if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
375 return;
376
6d68a230 377 const SlotId slotId = loadingPos;
93910d5c 378
50dc81ec
AR
379 // get our header
380 DbCellHeader header;
c728b6f9 381 if (buf.contentSize() < static_cast<mb_size_t>(sizeof(header))) {
51618c6a 382 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
ce44c1ea
AR
383 "Ignoring truncated " << buf.contentSize() << "-byte " <<
384 "cache entry meta data at " << dbOffset);
abf396ec 385 freeUnusedSlot(slotId, true);
c728b6f9
AR
386 return;
387 }
50dc81ec
AR
388 memcpy(&header, buf.content(), sizeof(header));
389 if (header.empty()) {
abf396ec 390 freeUnusedSlot(slotId, false);
50dc81ec
AR
391 return;
392 }
36c84e19 393 if (!header.sane(dbSlotSize, dbSlotLimit)) {
51618c6a 394 debugs(47, DBG_IMPORTANT, "WARNING: cache_dir[" << sd->index << "]: " <<
9199139f 395 "Ignoring malformed cache entry meta data at " << dbOffset);
abf396ec 396 freeUnusedSlot(slotId, true);
e2851fe7 397 return;
9199139f 398 }
50dc81ec
AR
399 buf.consume(sizeof(header)); // optimize to avoid memmove()
400
401 useNewSlot(slotId, header);
402}
403
404/// parse StoreEntry basics and add them to the map, returning true on success
405bool
406Rock::Rebuild::importEntry(Ipc::StoreMapAnchor &anchor, const sfileno fileno, const DbCellHeader &header)
407{
408 cache_key key[SQUID_MD5_DIGEST_LENGTH];
409 StoreEntry loadedE;
50dc81ec 410 const uint64_t knownSize = header.entrySize > 0 ?
d2b13bab 411 header.entrySize : anchor.basics.swap_file_sz.load();
ce44c1ea
AR
412 if (!storeRebuildParseEntry(buf, loadedE, key, counts, knownSize))
413 return false;
414
abf396ec 415 // the entry size may be unknown, but if it is known, it is authoritative
50dc81ec 416
ce44c1ea 417 debugs(47, 8, "importing basics for entry " << fileno <<
abf396ec 418 " inode.entrySize: " << header.entrySize <<
ce44c1ea 419 " swap_file_sz: " << loadedE.swap_file_sz);
50dc81ec
AR
420 anchor.set(loadedE);
421
422 // we have not validated whether all db cells for this entry were loaded
423 EBIT_CLR(anchor.basics.flags, ENTRY_VALIDATED);
424
425 // loadedE->dump(5);
426
427 return true;
93910d5c 428}
e2851fe7 429
93910d5c 430void
50dc81ec 431Rock::Rebuild::validationSteps()
93910d5c 432{
50dc81ec 433 debugs(47, 5, sd->index << " validating from " << validationPos);
93910d5c 434
50dc81ec
AR
435 // see loadingSteps() for the rationale; TODO: avoid duplication
436 const int maxSpentMsec = 50; // keep small: validation does not do I/O
437 const timeval loopStart = current_time;
e2851fe7 438
50dc81ec 439 int validated = 0;
abf396ec
AR
440 while (!doneValidating()) {
441 if (validationPos < dbEntryLimit)
442 validateOneEntry(validationPos);
443 else
444 validateOneSlot(validationPos - dbEntryLimit);
50dc81ec
AR
445 ++validationPos;
446 ++validated;
93910d5c 447
50dc81ec
AR
448 if (validationPos % 1000 == 0)
449 debugs(20, 2, "validated: " << validationPos);
e2851fe7 450
50dc81ec
AR
451 if (opt_foreground_rebuild)
452 continue; // skip "few entries at a time" check below
453
454 getCurrentTime();
455 const double elapsedMsec = tvSubMsec(loopStart, current_time);
456 if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
457 debugs(47, 5, "pausing after " << validated << " entries in " <<
458 elapsedMsec << "ms; " << (elapsedMsec/validated) << "ms per entry");
459 break;
460 }
461 }
462}
463
abf396ec
AR
464/// Either make the entry accessible to all or throw.
465/// This method assumes it is called only when no more entry slots are expected.
466void
467Rock::Rebuild::finalizeOrThrow(const sfileno fileNo, LoadingEntry &le)
468{
469 // walk all map-linked slots, starting from inode, and mark each
470 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileNo);
471 Must(le.size > 0); // paranoid
472 uint64_t mappedSize = 0;
473 SlotId slotId = anchor.start;
474 while (slotId >= 0 && mappedSize < le.size) {
475 LoadingSlot slot = loadingSlot(slotId); // throws if we have not loaded that slot
476 Must(!slot.finalized()); // no loops or stealing from other entries
477 Must(slot.mapped()); // all our slots should be in the sd->map
478 Must(!slot.freed()); // all our slots should still be present
479 slot.finalized(true);
480
481 Ipc::StoreMapSlice &mapSlice = sd->map->writeableSlice(fileNo, slotId);
482 Must(mapSlice.size > 0); // paranoid
483 mappedSize += mapSlice.size;
484 slotId = mapSlice.next;
485 }
486 /* no hodgepodge entries: one entry - one full chain and no leftovers */
487 Must(slotId < 0);
488 Must(mappedSize == le.size);
489
490 if (!anchor.basics.swap_file_sz)
491 anchor.basics.swap_file_sz = le.size;
492 EBIT_SET(anchor.basics.flags, ENTRY_VALIDATED);
493 le.state(LoadingEntry::leLoaded);
494 sd->map->closeForWriting(fileNo, false);
495 ++counts.objcount;
496}
497
498/// Either make the entry accessible to all or free it.
499/// This method must only be called when no more entry slots are expected.
50dc81ec 500void
abf396ec 501Rock::Rebuild::finalizeOrFree(const sfileno fileNo, LoadingEntry &le)
50dc81ec 502{
abf396ec
AR
503 try {
504 finalizeOrThrow(fileNo, le);
505 } catch (const std::exception &ex) {
506 freeBadEntry(fileNo, ex.what());
507 }
508}
50dc81ec 509
abf396ec
AR
510void
511Rock::Rebuild::validateOneEntry(const sfileno fileNo)
512{
513 LoadingEntry entry = loadingEntry(fileNo);
514 switch (entry.state()) {
50dc81ec
AR
515
516 case LoadingEntry::leLoading:
abf396ec 517 finalizeOrFree(fileNo, entry);
50dc81ec
AR
518 break;
519
abf396ec
AR
520 case LoadingEntry::leEmpty: // no entry hashed to this position
521 case LoadingEntry::leLoaded: // we have already unlocked this entry
522 case LoadingEntry::leCorrupted: // we have already removed this entry
523 case LoadingEntry::leIgnored: // we have already discarded this entry
524 break;
50dc81ec
AR
525 }
526}
527
abf396ec
AR
528void
529Rock::Rebuild::validateOneSlot(const SlotId slotId)
530{
531 const LoadingSlot slot = loadingSlot(slotId);
532 // there should not be any unprocessed slots left
533 Must(slot.freed() || (slot.mapped() && slot.finalized()));
534}
535
50dc81ec
AR
536/// Marks remaining bad entry slots as free and unlocks the entry. The map
537/// cannot do this because Loading entries may have holes in the slots chain.
538void
539Rock::Rebuild::freeBadEntry(const sfileno fileno, const char *eDescription)
540{
541 debugs(47, 2, "cache_dir #" << sd->index << ' ' << eDescription <<
542 " entry " << fileno << " is ignored during rebuild");
543
abf396ec
AR
544 LoadingEntry le = loadingEntry(fileno);
545 le.state(LoadingEntry::leCorrupted);
50dc81ec 546
abf396ec
AR
547 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
548 assert(anchor.start < 0 || le.size > 0);
549 for (SlotId slotId = anchor.start; slotId >= 0;) {
550 const SlotId next = loadingSlot(slotId).more;
551 freeSlot(slotId, true);
50dc81ec 552 slotId = next;
50dc81ec 553 }
50dc81ec
AR
554
555 sd->map->forgetWritingEntry(fileno);
e2851fe7
AR
556}
557
558void
9199139f
AR
559Rock::Rebuild::swanSong()
560{
078274f6 561 debugs(47,3, HERE << "cache_dir #" << sd->index << " rebuild level: " <<
9199139f 562 StoreController::store_dirs_rebuilding);
078274f6 563 --StoreController::store_dirs_rebuilding;
e2851fe7 564 storeRebuildComplete(&counts);
e2851fe7
AR
565}
566
567void
9199139f
AR
568Rock::Rebuild::failure(const char *msg, int errNo)
569{
539283df 570 debugs(47,5, sd->index << " slot " << loadingPos << " at " <<
9199139f 571 dbOffset << " <= " << dbSize);
e2851fe7
AR
572
573 if (errNo)
f5adb654
AR
574 debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo));
575 debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?");
e2851fe7
AR
576
577 assert(sd);
578 fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.",
9199139f 579 sd->index, sd->filePath, msg);
e2851fe7 580}
93910d5c 581
50dc81ec
AR
582/// adds slot to the free slot index
583void
584Rock::Rebuild::freeSlot(const SlotId slotId, const bool invalid)
93910d5c 585{
50dc81ec 586 debugs(47,5, sd->index << " frees slot " << slotId);
abf396ec
AR
587 LoadingSlot slot = loadingSlot(slotId);
588 assert(!slot.freed());
589 slot.freed(true);
50dc81ec
AR
590
591 if (invalid) {
592 ++counts.invalid;
593 //sd->unlink(fileno); leave garbage on disk, it should not hurt
594 }
595
596 Ipc::Mem::PageId pageId;
597 pageId.pool = sd->index+1;
598 pageId.number = slotId+1;
599 sd->freeSlots->push(pageId);
600}
601
abf396ec 602/// freeSlot() for never-been-mapped slots
50dc81ec 603void
abf396ec 604Rock::Rebuild::freeUnusedSlot(const SlotId slotId, const bool invalid)
50dc81ec 605{
abf396ec 606 LoadingSlot slot = loadingSlot(slotId);
50dc81ec 607 // mapped slots must be freed via freeBadEntry() to keep the map in sync
abf396ec
AR
608 assert(!slot.mapped());
609 freeSlot(slotId, invalid);
50dc81ec
AR
610}
611
612/// adds slot to the entry chain in the map
613void
614Rock::Rebuild::mapSlot(const SlotId slotId, const DbCellHeader &header)
615{
abf396ec
AR
616 LoadingSlot slot = loadingSlot(slotId);
617 assert(!slot.mapped());
618 assert(!slot.freed());
619 slot.mapped(true);
50dc81ec
AR
620
621 Ipc::StoreMapSlice slice;
622 slice.next = header.nextSlot;
623 slice.size = header.payloadSize;
624 sd->map->importSlice(slotId, slice);
625}
626
abf396ec
AR
627template <class SlotIdType> // accommodates atomic and simple SlotIds.
628void
629Rock::Rebuild::chainSlots(SlotIdType &from, const SlotId to)
630{
631 LoadingSlot slot = loadingSlot(to);
632 assert(slot.more < 0);
633 slot.more = from; // may still be unset
634 from = to;
635}
636
50dc81ec
AR
637/// adds slot to an existing entry chain; caller must check that the slot
638/// belongs to the chain it is being added to
639void
640Rock::Rebuild::addSlotToEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
641{
abf396ec 642 LoadingEntry le = loadingEntry(fileno);
50dc81ec
AR
643 Ipc::StoreMapAnchor &anchor = sd->map->writeableEntry(fileno);
644
abf396ec
AR
645 debugs(47,9, "adding " << slotId << " to entry " << fileno);
646 // we do not need to preserve the order
647 if (le.anchored()) {
648 LoadingSlot inode = loadingSlot(anchor.start);
649 chainSlots(inode.more, slotId);
50dc81ec 650 } else {
abf396ec 651 chainSlots(anchor.start, slotId);
50dc81ec
AR
652 }
653
abf396ec 654 le.size += header.payloadSize; // must precede freeBadEntry() calls
50dc81ec 655
abf396ec
AR
656 if (header.firstSlot == slotId) {
657 debugs(47,5, "added inode");
658
659 if (le.anchored()) { // we have already added another inode slot
660 freeBadEntry(fileno, "inode conflict");
661 ++counts.clashcount;
662 return;
663 }
664
665 le.anchored(true);
666
667 if (!importEntry(anchor, fileno, header)) {
668 freeBadEntry(fileno, "corrupted metainfo");
669 return;
670 }
671
672 // set total entry size and/or check it for consistency
673 if (const uint64_t totalSize = header.entrySize) {
674 assert(totalSize != static_cast<uint64_t>(-1));
675 if (!anchor.basics.swap_file_sz) {
676 anchor.basics.swap_file_sz = totalSize;
677 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
678 } else if (totalSize != anchor.basics.swap_file_sz) {
679 freeBadEntry(fileno, "size mismatch");
680 return;
681 }
682 }
50dc81ec
AR
683 }
684
abf396ec 685 const uint64_t totalSize = anchor.basics.swap_file_sz; // may be 0/unknown
50dc81ec
AR
686
687 if (totalSize > 0 && le.size > totalSize) { // overflow
ce44c1ea 688 debugs(47, 8, "overflow: " << le.size << " > " << totalSize);
50dc81ec
AR
689 freeBadEntry(fileno, "overflowing");
690 return;
691 }
692
693 mapSlot(slotId, header);
abf396ec
AR
694 if (totalSize > 0 && le.size == totalSize)
695 finalizeOrFree(fileno, le); // entry is probably fully loaded now
50dc81ec
AR
696}
697
698/// initialize housekeeping information for a newly accepted entry
699void
700Rock::Rebuild::primeNewEntry(Ipc::StoreMap::Anchor &anchor, const sfileno fileno, const DbCellHeader &header)
701{
702 anchor.setKey(reinterpret_cast<const cache_key*>(header.key));
703 assert(header.firstSlot >= 0);
abf396ec 704 anchor.start = -1; // addSlotToEntry() will set it
50dc81ec
AR
705
706 assert(anchor.basics.swap_file_sz != static_cast<uint64_t>(-1));
707
abf396ec
AR
708 LoadingEntry le = loadingEntry(fileno);
709 le.state(LoadingEntry::leLoading);
50dc81ec
AR
710 le.version = header.version;
711 le.size = 0;
712}
713
714/// handle a slot from an entry that we have not seen before
715void
716Rock::Rebuild::startNewEntry(const sfileno fileno, const SlotId slotId, const DbCellHeader &header)
717{
50dc81ec
AR
718 // A miss may have been stored at our fileno while we were loading other
719 // slots from disk. We ought to preserve that entry because it is fresher.
720 const bool overwriteExisting = false;
721 if (Ipc::StoreMap::Anchor *anchor = sd->map->openForWritingAt(fileno, overwriteExisting)) {
722 primeNewEntry(*anchor, fileno, header);
723 addSlotToEntry(fileno, slotId, header); // may fail
724 assert(anchor->basics.swap_file_sz != static_cast<uint64_t>(-1));
725 } else {
726 // A new from-network entry is occupying our map slot; let it be, but
727 // save us from the trouble of going through the above motions again.
abf396ec
AR
728 LoadingEntry le = loadingEntry(fileno);
729 le.state(LoadingEntry::leIgnored);
730 freeUnusedSlot(slotId, false);
50dc81ec
AR
731 }
732}
733
734/// does the header belong to the fileno entry being loaded?
735bool
736Rock::Rebuild::sameEntry(const sfileno fileno, const DbCellHeader &header) const
737{
abf396ec
AR
738 // Header updates always result in multi-start chains and often
739 // result in multi-version chains so we can only compare the keys.
50dc81ec 740 const Ipc::StoreMap::Anchor &anchor = sd->map->writeableEntry(fileno);
abf396ec 741 return anchor.sameKey(reinterpret_cast<const cache_key*>(header.key));
50dc81ec
AR
742}
743
744/// handle freshly loaded (and validated) db slot header
745void
746Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header)
747{
50dc81ec
AR
748 const cache_key *const key =
749 reinterpret_cast<const cache_key*>(header.key);
abf396ec 750 const sfileno fileno = sd->map->fileNoByKey(key);
50dc81ec
AR
751 assert(0 <= fileno && fileno < dbEntryLimit);
752
abf396ec
AR
753 LoadingEntry le = loadingEntry(fileno);
754 debugs(47,9, "entry " << fileno << " state: " << le.state() << ", inode: " <<
9d4e9cfb 755 header.firstSlot << ", size: " << header.payloadSize);
50dc81ec 756
abf396ec 757 switch (le.state()) {
50dc81ec
AR
758
759 case LoadingEntry::leEmpty: {
760 startNewEntry(fileno, slotId, header);
761 break;
762 }
763
764 case LoadingEntry::leLoading: {
abf396ec
AR
765 if (sameEntry(fileno, header)) {
766 addSlotToEntry(fileno, slotId, header); // may fail
50dc81ec
AR
767 } else {
768 // either the loading chain or this slot is stale;
769 // be conservative and ignore both (and any future ones)
50dc81ec 770 freeBadEntry(fileno, "duplicated");
abf396ec 771 freeUnusedSlot(slotId, true);
50dc81ec
AR
772 ++counts.dupcount;
773 }
774 break;
775 }
776
777 case LoadingEntry::leLoaded: {
778 // either the previously loaded chain or this slot is stale;
779 // be conservative and ignore both (and any future ones)
abf396ec 780 le.state(LoadingEntry::leCorrupted);
50dc81ec 781 sd->map->freeEntry(fileno); // may not be immediately successful
abf396ec 782 freeUnusedSlot(slotId, true);
50dc81ec
AR
783 ++counts.dupcount;
784 break;
785 }
786
787 case LoadingEntry::leCorrupted: {
788 // previously seen slots messed things up so we must ignore this one
abf396ec 789 freeUnusedSlot(slotId, true);
50dc81ec
AR
790 break;
791 }
792
793 case LoadingEntry::leIgnored: {
794 // already replaced by a fresher or colliding from-network entry
abf396ec 795 freeUnusedSlot(slotId, false);
50dc81ec
AR
796 break;
797 }
798 }
93910d5c 799}
f53969cc 800