From 8ecbe78dd0b13a6a23bfaa066a120ec17342813d Mon Sep 17 00:00:00 2001 From: Eduard Bagdasaryan Date: Fri, 9 Oct 2020 16:34:24 +0000 Subject: [PATCH] Do not duplicate free disk slots on diskers restart (#731) When a disker process starts, it scans the on-disk storage to populate shared-memory indexes of cached entries and unused/free slots. This process may take more than ten minutes for large caches. Squid workers use these indexes as they are being populated by diskers - workers do not wait for the slow index rebuild process to finish. Cached entries can be retrieved and misses can be cached almost immediately. The disker does not "lock" the free slots to itself because the disker does not want to preclude workers from caching new entries while the disker is scanning the rock storage to build a complete index of old cached entries (and free slots). The disker knows that it shares the disk slot index with workers and is careful to populate the indexes without confusing workers. However, if the disker process is restarted for any reason (e.g., a crash or kid registration timeout), the disker starts scanning its on-disk storage from the beginning, adding to the indexes that already contain some entries (added by the first disker incarnation and adjusted by workers). An attempt to index the same cached object twice may remove that object. Such a removal would be wasteful but not dangerous. Indexing a free/unused slot twice can be disastrous: * If Squid is lucky, the disker quickly hits an assertion (or a fatal exception) when trying to add the already free slot to the free slot collection, as long as no worker starts using the free slot between additions (detailed in the next bullet). * Unfortunately, there is also a good chance that a worker starts using the free slot before the (restarted) disker adds it the second time. In this case, the "double free" event cannot be detected. Both free slot copies (pointing to the same disk location) will eventually be used by a worker to cache new objects. In the worst case, it may lead to completely wrong cached response content being served to an unsuspecting user. The risk is partially mitigated by the fact that disker crashes/restarts are rare. Now, if a disker did not finish indexing before being restarted, it resumes from the next db slot, thus avoiding indexing the same slot twice. In other words, the disker forgets/ignores all the slots scanned prior to the restart. Squid logs "Resuming indexing cache_dir..." instead of the usual "Loading cache_dir..." to mark these (hopefully rare) occurrences. Also simplified code that delays post-indexing revalidation of cache entries (i.e. store_dirs_rebuilding hacks). We touched that code because the updated rock code will now refuse to reindex the already indexed cache_dir. That decision relies on shared memory info and should not be made where the old code was fiddling with store_dirs_rebuilding level. After several attempts resulted in subtle bugs, we decided to simplify that hack to reduce the risks of mismanaging store_dirs_rebuilding. Adjusted old level-1 "Store rebuilding is ... complete" messages to report more details (especially useful when rebuilding kid crashes). The code now also reports some of the "unknown rebuild goal" UFS cases better, but more work is needed in that area. Also updated several rebuild-related counters to use int64_t instead of int. Those changes stemmed from the need to add a new counter (StoreRebuildData::validations), and we did not want to add an int counter that will sooner or later overflow (especially when counting db slots (across all cache_dirs) rather than just cache entries (from one cache_dir)). That new counter interacted with several others, so we had to update them as well. Long-term, all old StoreRebuildData counters and the cache_dir code feeding them should be updated/revised. --- src/fs/rock/RockRebuild.cc | 217 +++++++++++++++++++++++++------- src/fs/rock/RockRebuild.h | 55 ++++++-- src/fs/rock/RockSwapDir.cc | 19 +-- src/fs/rock/RockSwapDir.h | 5 +- src/fs/ufs/RebuildState.cc | 3 +- src/fs/ufs/UFSSwapDir.cc | 1 - src/ipc/StoreMap.h | 19 +++ src/ipc/mem/Pointer.h | 23 +++- src/ipc/mem/Segment.cc | 3 +- src/ipc/mem/Segment.h | 4 +- src/store/Disks.cc | 8 ++ src/store_rebuild.cc | 60 ++++++--- src/store_rebuild.h | 35 ++++++ src/tests/stub_store_rebuild.cc | 10 ++ 14 files changed, 372 insertions(+), 90 deletions(-) diff --git a/src/fs/rock/RockRebuild.cc b/src/fs/rock/RockRebuild.cc index 87723dac0e..34691db595 100644 --- a/src/fs/rock/RockRebuild.cc +++ b/src/fs/rock/RockRebuild.cc @@ -15,11 +15,10 @@ #include "fs/rock/RockSwapDir.h" #include "fs_io.h" #include "globals.h" -#include "ipc/StoreMap.h" #include "md5.h" +#include "sbuf/Stream.h" #include "SquidTime.h" #include "Store.h" -#include "store_rebuild.h" #include "tools.h" #include @@ -74,6 +73,20 @@ CBDATA_NAMESPACED_CLASS_INIT(Rock, Rebuild); namespace Rock { +static bool +DoneLoading(const int64_t loadingPos, const int64_t dbSlotLimit) +{ + return loadingPos >= dbSlotLimit; +} + +static bool +DoneValidating(const int64_t validationPos, const int64_t dbSlotLimit, const int64_t dbEntryLimit) +{ + // paranoid slot checking is only enabled with squid -S + const auto extraWork = opt_store_doublecheck ? dbSlotLimit : 0; + return validationPos >= (dbEntryLimit + extraWork); +} + /// low-level anti-padding storage class for LoadingEntry and LoadingSlot flags class LoadingFlags { @@ -146,24 +159,34 @@ private: class LoadingParts { public: - LoadingParts(int dbSlotLimit, int dbEntryLimit); - LoadingParts(LoadingParts&&) = delete; // paranoid (often too huge to copy) + using Sizes = Ipc::StoreMapItems; + using Versions = Ipc::StoreMapItems; + using Mores = Ipc::StoreMapItems; + using Flags = Ipc::StoreMapItems; -private: - friend class LoadingEntry; - friend class LoadingSlot; + LoadingParts(const SwapDir &dir, const bool resuming); + ~LoadingParts(); + // lacking copying/moving code and often too huge to copy + LoadingParts(LoadingParts&&) = delete; + + Sizes &sizes() const { return *sizesOwner->object(); } + Versions &versions() const { return *versionsOwner->object(); } + Mores &mores() const { return *moresOwner->object(); } + Flags &flags() const { return *flagsOwner->object(); } + +private: /* Anti-padding storage. With millions of entries, padding matters! */ /* indexed by sfileno */ - std::vector sizes; ///< LoadingEntry::size for all entries - std::vector versions; ///< LoadingEntry::version for all entries + Sizes::Owner *sizesOwner; ///< LoadingEntry::size for all entries + Versions::Owner *versionsOwner; ///< LoadingEntry::version for all entries /* indexed by SlotId */ - std::vector mores; ///< LoadingSlot::more for all slots + Mores::Owner *moresOwner; ///< LoadingSlot::more for all slots /* entry flags are indexed by sfileno; slot flags -- by SlotId */ - std::vector flags; ///< all LoadingEntry and LoadingSlot flags + Flags::Owner *flagsOwner; ///< all LoadingEntry and LoadingSlot flags }; } /* namespace Rock */ @@ -171,46 +194,119 @@ private: /* LoadingEntry */ Rock::LoadingEntry::LoadingEntry(const sfileno fileNo, LoadingParts &source): - size(source.sizes.at(fileNo)), - version(source.versions.at(fileNo)), - flags(source.flags.at(fileNo)) + size(source.sizes().at(fileNo)), + version(source.versions().at(fileNo)), + flags(source.flags().at(fileNo)) { } /* LoadingSlot */ Rock::LoadingSlot::LoadingSlot(const SlotId slotId, LoadingParts &source): - more(source.mores.at(slotId)), - flags(source.flags.at(slotId)) + more(source.mores().at(slotId)), + flags(source.flags().at(slotId)) { } /* LoadingParts */ -Rock::LoadingParts::LoadingParts(const int dbEntryLimit, const int dbSlotLimit): - sizes(dbEntryLimit, 0), - versions(dbEntryLimit, 0), - mores(dbSlotLimit, -1), - flags(dbSlotLimit) +template +inline typename T::Owner * +createOwner(const char *dirPath, const char *sfx, const int64_t limit, const bool resuming) +{ + auto id = Ipc::Mem::Segment::Name(SBuf(dirPath), sfx); + return resuming ? Ipc::Mem::Owner::Old(id.c_str()) : shm_new(T)(id.c_str(), limit); +} + +Rock::LoadingParts::LoadingParts(const SwapDir &dir, const bool resuming): + sizesOwner(createOwner(dir.path, "rebuild_sizes", dir.entryLimitActual(), resuming)), + versionsOwner(createOwner(dir.path, "rebuild_versions", dir.entryLimitActual(), resuming)), + moresOwner(createOwner(dir.path, "rebuild_mores", dir.slotLimitActual(), resuming)), + flagsOwner(createOwner(dir.path, "rebuild_flags", dir.slotLimitActual(), resuming)) { - assert(sizes.size() == versions.size()); // every entry has both fields - assert(sizes.size() <= mores.size()); // every entry needs slot(s) - assert(mores.size() == flags.size()); // every slot needs a set of flags + assert(sizes().capacity == versions().capacity); // every entry has both fields + assert(sizes().capacity <= mores().capacity); // every entry needs slot(s) + assert(mores().capacity == flags().capacity); // every slot needs a set of flags + + if (!resuming) { + // other parts rely on shared memory segments being zero-initialized + // TODO: refactor the next slot pointer to use 0 for nil values + mores().fill(-1); + } +} + +Rock::LoadingParts::~LoadingParts() +{ + delete sizesOwner; + delete versionsOwner; + delete moresOwner; + delete flagsOwner; +} + +/* Rock::Rebuild::Stats */ + +SBuf +Rock::Rebuild::Stats::Path(const char *dirPath) +{ + return Ipc::Mem::Segment::Name(SBuf(dirPath), "rebuild_stats"); +} + +Ipc::Mem::Owner* +Rock::Rebuild::Stats::Init(const SwapDir &dir) +{ + return shm_new(Stats)(Path(dir.path).c_str()); +} + +bool +Rock::Rebuild::Stats::completed(const SwapDir &sd) const +{ + return DoneLoading(counts.scancount, sd.slotLimitActual()) && + DoneValidating(counts.validations, sd.slotLimitActual(), sd.entryLimitActual()); } /* Rebuild */ -Rock::Rebuild::Rebuild(SwapDir *dir): AsyncJob("Rock::Rebuild"), +bool +Rock::Rebuild::IsResponsible(const SwapDir &sd) +{ + // in SMP mode, only the disker is responsible for populating the map + return !UsingSmp() || IamDiskProcess(); +} + +bool +Rock::Rebuild::Start(SwapDir &dir) +{ + if (!IsResponsible(dir)) { + debugs(47, 2, "not responsible for indexing cache_dir #" << + dir.index << " from " << dir.filePath); + return false; + } + + const auto stats = shm_old(Rebuild::Stats)(Stats::Path(dir.path).c_str()); + if (stats->completed(dir)) { + debugs(47, 2, "already indexed cache_dir #" << + dir.index << " from " << dir.filePath); + return false; + } + + Must(AsyncJob::Start(new Rebuild(&dir, stats))); + return true; +} + +Rock::Rebuild::Rebuild(SwapDir *dir, const Ipc::Mem::Pointer &s): AsyncJob("Rock::Rebuild"), sd(dir), parts(nullptr), + stats(s), dbSize(0), dbSlotSize(0), dbSlotLimit(0), dbEntryLimit(0), fd(-1), dbOffset(0), - loadingPos(0), - validationPos(0) + loadingPos(stats->counts.scancount), + validationPos(stats->counts.validations), + counts(stats->counts), + resuming(stats->counts.started()) { assert(sd); dbSize = sd->diskOffsetLimit(); // we do not care about the trailer waste @@ -218,29 +314,37 @@ Rock::Rebuild::Rebuild(SwapDir *dir): AsyncJob("Rock::Rebuild"), dbEntryLimit = sd->entryLimitActual(); dbSlotLimit = sd->slotLimitActual(); assert(dbEntryLimit <= dbSlotLimit); + registerRunner(); } Rock::Rebuild::~Rebuild() { if (fd >= 0) file_close(fd); + // normally, segments are used until the Squid instance quits, + // but these indexing-only segments are no longer needed delete parts; } +void +Rock::Rebuild::startShutdown() +{ + mustStop("startShutdown"); +} + /// prepares and initiates entry loading sequence void Rock::Rebuild::start() { - // in SMP mode, only the disker is responsible for populating the map - if (UsingSmp() && !IamDiskProcess()) { - debugs(47, 2, "Non-disker skips rebuilding of cache_dir #" << - sd->index << " from " << sd->filePath); - mustStop("non-disker"); - return; - } + assert(IsResponsible(*sd)); - debugs(47, DBG_IMPORTANT, "Loading cache_dir #" << sd->index << - " from " << sd->filePath); + if (!resuming) { + debugs(47, DBG_IMPORTANT, "Loading cache_dir #" << sd->index << + " from " << sd->filePath); + } else { + debugs(47, DBG_IMPORTANT, "Resuming indexing cache_dir #" << sd->index << + " from " << sd->filePath << ':' << progressDescription()); + } fd = file_open(sd->filePath, O_RDONLY | O_BINARY); if (fd < 0) @@ -254,9 +358,12 @@ Rock::Rebuild::start() assert(sizeof(DbCellHeader) < SM_PAGE_SIZE); buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE); - dbOffset = SwapDir::HeaderSize; + dbOffset = SwapDir::HeaderSize + loadingPos * dbSlotSize; - parts = new LoadingParts(dbEntryLimit, dbSlotLimit); + assert(!parts); + parts = new LoadingParts(*sd, resuming); + + counts.updateStartTime(current_time); checkpoint(); } @@ -272,15 +379,13 @@ Rock::Rebuild::checkpoint() bool Rock::Rebuild::doneLoading() const { - return loadingPos >= dbSlotLimit; + return DoneLoading(loadingPos, dbSlotLimit); } bool Rock::Rebuild::doneValidating() const { - // paranoid slot checking is only enabled with squid -S - return validationPos >= dbEntryLimit + - (opt_store_doublecheck ? dbSlotLimit : 0); + return DoneValidating(validationPos, dbSlotLimit, dbEntryLimit); } bool @@ -319,7 +424,7 @@ Rock::Rebuild::loadingSteps() const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms const timeval loopStart = current_time; - int loaded = 0; + int64_t loaded = 0; while (!doneLoading()) { loadOneSlot(); dbOffset += dbSlotSize; @@ -363,6 +468,8 @@ Rock::Rebuild::loadOneSlot() debugs(47,5, sd->index << " slot " << loadingPos << " at " << dbOffset << " <= " << dbSize); + // increment before loadingPos to avoid getting stuck at a slot + // in a case of crash ++counts.scancount; if (lseek(fd, dbOffset, SEEK_SET) < 0) @@ -435,8 +542,11 @@ Rock::Rebuild::validationSteps() const int maxSpentMsec = 50; // keep small: validation does not do I/O const timeval loopStart = current_time; - int validated = 0; + int64_t validated = 0; while (!doneValidating()) { + // increment before validationPos to avoid getting stuck at a slot + // in a case of crash + ++counts.validations; if (validationPos < dbEntryLimit) validateOneEntry(validationPos); else @@ -559,7 +669,6 @@ Rock::Rebuild::swanSong() { debugs(47,3, HERE << "cache_dir #" << sd->index << " rebuild level: " << StoreController::store_dirs_rebuilding); - --StoreController::store_dirs_rebuilding; storeRebuildComplete(&counts); } @@ -797,3 +906,21 @@ Rock::Rebuild::useNewSlot(const SlotId slotId, const DbCellHeader &header) } } +SBuf +Rock::Rebuild::progressDescription() const +{ + SBufStream str; + + str << Debug::Extra << "slots loaded: " << Progress(loadingPos, dbSlotLimit); + + const auto validatingEntries = validationPos < dbEntryLimit; + const auto entriesValidated = validatingEntries ? validationPos : dbEntryLimit; + str << Debug::Extra << "entries validated: " << Progress(entriesValidated, dbEntryLimit); + if (opt_store_doublecheck) { + const auto slotsValidated = validatingEntries ? 0 : (validationPos - dbEntryLimit); + str << Debug::Extra << "slots validated: " << Progress(slotsValidated, dbSlotLimit); + } + + return str.buf(); +} + diff --git a/src/fs/rock/RockRebuild.h b/src/fs/rock/RockRebuild.h index d1e2c62070..5309a59a67 100644 --- a/src/fs/rock/RockRebuild.h +++ b/src/fs/rock/RockRebuild.h @@ -10,8 +10,11 @@ #define SQUID_FS_ROCK_REBUILD_H #include "base/AsyncJob.h" +#include "base/RunnersRegistry.h" #include "cbdata.h" #include "fs/rock/forward.h" +#include "ipc/mem/Pointer.h" +#include "ipc/StoreMap.h" #include "MemBuf.h" #include "store_rebuild.h" @@ -24,15 +27,41 @@ class LoadingParts; /// \ingroup Rock /// manages store rebuild process: loading meta information from db on disk -class Rebuild: public AsyncJob +class Rebuild: public AsyncJob, private IndependentRunner { CBDATA_CHILD(Rebuild); public: - Rebuild(SwapDir *dir); - virtual ~Rebuild() override; + /// cache_dir indexing statistics shared across same-kid process restarts + class Stats + { + public: + static SBuf Path(const char *dirPath); + static Ipc::Mem::Owner *Init(const SwapDir &); + + static size_t SharedMemorySize() { return sizeof(Stats); } + size_t sharedMemorySize() const { return SharedMemorySize(); } + + /// whether the rebuild is finished already + bool completed(const SwapDir &) const; + + StoreRebuildData counts; + }; + + /// starts indexing the given cache_dir if that indexing is necessary + /// \returns whether the indexing was necessary (and, hence, started) + static bool Start(SwapDir &dir); protected: + /// whether the current kid is responsible for rebuilding this db file + static bool IsResponsible(const SwapDir &); + + Rebuild(SwapDir *dir, const Ipc::Mem::Pointer &); + virtual ~Rebuild() override; + + /* Registered Runner API */ + virtual void startShutdown() override; + /* AsyncJob API */ virtual void start() override; virtual bool doneAll() const override; @@ -72,21 +101,29 @@ private: bool sameEntry(const sfileno fileno, const DbCellHeader &header) const; + SBuf progressDescription() const; + SwapDir *sd; LoadingParts *parts; ///< parts of store entries being loaded from disk + Ipc::Mem::Pointer stats; ///< indexing statistics in shared memory + int64_t dbSize; int dbSlotSize; ///< the size of a db cell, including the cell header - int dbSlotLimit; ///< total number of db cells - int dbEntryLimit; ///< maximum number of entries that can be stored in db + int64_t dbSlotLimit; ///< total number of db cells + int64_t dbEntryLimit; ///< maximum number of entries that can be stored in db int fd; // store db file descriptor - int64_t dbOffset; - sfileno loadingPos; ///< index of the db slot being loaded from disk now - sfileno validationPos; ///< index of the loaded db slot being validated now + int64_t dbOffset; // TODO: calculate in a method, using loadingPos + int64_t loadingPos; ///< index of the db slot being loaded from disk now + int64_t validationPos; ///< index of the loaded db slot being validated now MemBuf buf; ///< space to load current db slot (and entry metadata) into - StoreRebuildData counts; + StoreRebuildData &counts; ///< a reference to the shared memory counters + + /// whether we have started indexing this cache_dir before, + /// presumably in the previous process performing the same-kid role + const bool resuming; static void Steps(void *data); }; diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc index ab1cf4062b..14e7744211 100644 --- a/src/fs/rock/RockSwapDir.cc +++ b/src/fs/rock/RockSwapDir.cc @@ -19,7 +19,6 @@ #include "fs/rock/RockHeaderUpdater.h" #include "fs/rock/RockIoRequests.h" #include "fs/rock/RockIoState.h" -#include "fs/rock/RockRebuild.h" #include "fs/rock/RockSwapDir.h" #include "globals.h" #include "ipc/mem/Pages.h" @@ -307,11 +306,6 @@ Rock::SwapDir::init() theFile = io->newFile(filePath); theFile->configure(fileConfig); theFile->open(O_RDWR, 0644, this); - - // Increment early. Otherwise, if one SwapDir finishes rebuild before - // others start, storeRebuildComplete() will think the rebuild is over! - // TODO: move store_dirs_rebuilding hack to store modules that need it. - ++StoreController::store_dirs_rebuilding; } bool @@ -586,13 +580,6 @@ Rock::SwapDir::validateOptions() } } -void -Rock::SwapDir::rebuild() -{ - //++StoreController::store_dirs_rebuilding; // see Rock::SwapDir::init() - AsyncJob::Start(new Rebuild(this)); -} - bool Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const { @@ -830,7 +817,8 @@ Rock::SwapDir::ioCompletedNotification() std::setw(7) << map->entryLimit() << " entries, and " << std::setw(7) << map->sliceLimit() << " slots"); - rebuild(); + if (!Rebuild::Start(*this)) + storeRebuildComplete(nullptr); } void @@ -1129,6 +1117,8 @@ void Rock::SwapDirRr::create() Must(mapOwners.empty() && freeSlotsOwners.empty()); for (int i = 0; i < Config.cacheSwap.n_configured; ++i) { if (const Rock::SwapDir *const sd = dynamic_cast(INDEXSD(i))) { + rebuildStatsOwners.push_back(Rebuild::Stats::Init(*sd)); + const int64_t capacity = sd->slotLimitActual(); SwapDir::DirMap::Owner *const mapOwner = @@ -1151,6 +1141,7 @@ void Rock::SwapDirRr::create() Rock::SwapDirRr::~SwapDirRr() { for (size_t i = 0; i < mapOwners.size(); ++i) { + delete rebuildStatsOwners[i]; delete mapOwners[i]; delete freeSlotsOwners[i]; } diff --git a/src/fs/rock/RockSwapDir.h b/src/fs/rock/RockSwapDir.h index 82b1df04f2..ef2fc1809e 100644 --- a/src/fs/rock/RockSwapDir.h +++ b/src/fs/rock/RockSwapDir.h @@ -13,10 +13,12 @@ #include "DiskIO/IORequestor.h" #include "fs/rock/forward.h" #include "fs/rock/RockDbCell.h" +#include "fs/rock/RockRebuild.h" #include "ipc/mem/Page.h" #include "ipc/mem/PageStack.h" #include "ipc/StoreMap.h" #include "store/Disk.h" +#include "store_rebuild.h" #include class DiskIOStrategy; @@ -116,8 +118,6 @@ protected: bool parseSizeOption(char const *option, const char *value, int reconfiguring); void dumpSizeOption(StoreEntry * e) const; - void rebuild(); ///< starts loading and validating stored entry metadata - bool full() const; ///< no more entries can be stored without purging void trackReferences(StoreEntry &e); ///< add to replacement policy scope void ignoreReferences(StoreEntry &e); ///< delete from repl policy scope @@ -164,6 +164,7 @@ protected: virtual void create(); private: + std::vector *> rebuildStatsOwners; std::vector mapOwners; std::vector< Ipc::Mem::Owner *> freeSlotsOwners; }; diff --git a/src/fs/ufs/RebuildState.cc b/src/fs/ufs/RebuildState.cc index 1f02d59c48..c846dcb4b8 100644 --- a/src/fs/ufs/RebuildState.cc +++ b/src/fs/ufs/RebuildState.cc @@ -74,6 +74,8 @@ Fs::Ufs::RebuildState::RebuildState(RefCount aSwapDir) : if (!clean) flags.need_to_validate = true; + counts.updateStartTime(current_time); + debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" << (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")"); } @@ -97,7 +99,6 @@ Fs::Ufs::RebuildState::RebuildStep(void *data) if (!rb->isDone() || reconfiguring) eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1); else { - -- StoreController::store_dirs_rebuilding; storeRebuildComplete(&rb->counts); delete rb; } diff --git a/src/fs/ufs/UFSSwapDir.cc b/src/fs/ufs/UFSSwapDir.cc index bda76b8ee7..7681f5a06d 100644 --- a/src/fs/ufs/UFSSwapDir.cc +++ b/src/fs/ufs/UFSSwapDir.cc @@ -826,7 +826,6 @@ Fs::Ufs::UFSSwapDir::addDiskRestore(const cache_key * key, void Fs::Ufs::UFSSwapDir::rebuild() { - ++StoreController::store_dirs_rebuilding; eventAdd("storeRebuild", Fs::Ufs::RebuildState::RebuildStep, new Fs::Ufs::RebuildState(this), 0.0, 1); } diff --git a/src/ipc/StoreMap.h b/src/ipc/StoreMap.h index 0edc82ec17..1d70d8a229 100644 --- a/src/ipc/StoreMap.h +++ b/src/ipc/StoreMap.h @@ -129,6 +129,25 @@ public: size_t sharedMemorySize() const { return SharedMemorySize(capacity); } static size_t SharedMemorySize(const int aCapacity) { return sizeof(StoreMapItems) + aCapacity*sizeof(Item); } + Item &at(const int index) + { + assert(index >= 0); + assert(index < capacity); + return items[index]; + } + + const Item &at(const int index) const + { + return const_cast&>(*this).at(index); + } + + /// reset all items to the same value + void fill(const Item &value) + { + for (int index = 0; index < capacity; ++index) + items[index] = value; + } + const int capacity; ///< total number of items Ipc::Mem::FlexibleArray items; ///< storage }; diff --git a/src/ipc/mem/Pointer.h b/src/ipc/mem/Pointer.h index 6e4294c3da..15d013911d 100644 --- a/src/ipc/mem/Pointer.h +++ b/src/ipc/mem/Pointer.h @@ -34,6 +34,8 @@ public: static Owner *New(const char *const id, const P1 &p1, const P2 &p2, const P3 &p3); template static Owner *New(const char *const id, const P1 &p1, const P2 &p2, const P3 &p3, const P4 &p4); + /// attaches to the existing shared memory segment, becoming its owner + static Owner *Old(const char *const id); ~Owner(); @@ -41,6 +43,7 @@ public: Class *object() { return theObject; } private: + explicit Owner(const char *const id); Owner(const char *const id, const off_t sharedSize); // not implemented @@ -100,6 +103,14 @@ Owner::Owner(const char *const id, const off_t sharedSize): Must(theSegment.mem()); } +template +Owner::Owner(const char *const id): + theSegment(id), theObject(nullptr) +{ + theSegment.open(true); + Must(theSegment.mem()); +} + template Owner::~Owner() { @@ -107,6 +118,16 @@ Owner::~Owner() theObject->~Class(); } +template +Owner * +Owner::Old(const char *const id) +{ + auto owner = new Owner(id); + owner->theObject = reinterpret_cast(owner->theSegment.mem()); + Must(static_cast(owner->theObject->sharedMemorySize()) <= owner->theSegment.size()); + return owner; +} + template Owner * Owner::New(const char *const id) @@ -162,7 +183,7 @@ Owner::New(const char *const id, const P1 &p1, const P2 &p2, const P3 &p3 template Object::Object(const char *const id): theSegment(id) { - theSegment.open(); + theSegment.open(false); Must(theSegment.mem()); theObject = reinterpret_cast(theSegment.mem()); Must(static_cast(theObject->sharedMemorySize()) <= theSegment.size()); diff --git a/src/ipc/mem/Segment.cc b/src/ipc/mem/Segment.cc index 3668d65523..0b61eeba4b 100644 --- a/src/ipc/mem/Segment.cc +++ b/src/ipc/mem/Segment.cc @@ -130,7 +130,7 @@ Ipc::Mem::Segment::create(const off_t aSize) } void -Ipc::Mem::Segment::open() +Ipc::Mem::Segment::open(const bool unlinkWhenDone) { assert(theFD < 0); @@ -143,6 +143,7 @@ Ipc::Mem::Segment::open() } theSize = statSize("Ipc::Mem::Segment::open"); + doUnlink = unlinkWhenDone; debugs(54, 3, HERE << "opened " << theName << " segment: " << theSize); diff --git a/src/ipc/mem/Segment.h b/src/ipc/mem/Segment.h index e595e28cda..e7520dbcca 100644 --- a/src/ipc/mem/Segment.h +++ b/src/ipc/mem/Segment.h @@ -32,7 +32,9 @@ public: /// Create a new shared memory segment. Unlinks the segment on destruction. void create(const off_t aSize); - void open(); ///< Open an existing shared memory segment. + /// opens an existing shared memory segment + /// \param unlinkWhenDone whether to delete the segment on destruction + void open(const bool unlinkWhenDone); const String &name() { return theName; } ///< shared memory segment name off_t size() { return theSize; } ///< shared memory segment size diff --git a/src/store/Disks.cc b/src/store/Disks.cc index e74eb89c31..e2ab46678c 100644 --- a/src/store/Disks.cc +++ b/src/store/Disks.cc @@ -16,6 +16,7 @@ #include "Store.h" #include "store/Disk.h" #include "store/Disks.h" +#include "store_rebuild.h" #include "swap_log_op.h" #include "util.h" // for tvSubDsec() which should be in SquidTime.h @@ -268,6 +269,11 @@ Store::Disks::init() store_table = hash_create(storeKeyHashCmp, store_hash_buckets, storeKeyHashHash); + // Increment _before_ any possible storeRebuildComplete() calls so that + // storeRebuildComplete() can reliably detect when all disks are done. The + // level is decremented in each corresponding storeRebuildComplete() call. + StoreController::store_dirs_rebuilding += Config.cacheSwap.n_configured; + for (int i = 0; i < Config.cacheSwap.n_configured; ++i) { /* this starts a search of the store dirs, loading their * index. under the new Store api this should be @@ -285,6 +291,8 @@ Store::Disks::init() */ if (Dir(i).active()) store(i)->init(); + else + storeRebuildComplete(nullptr); } if (strcasecmp(Config.store_dir_select_algorithm, "round-robin") == 0) { diff --git a/src/store_rebuild.cc b/src/store_rebuild.cc index 0595834caf..eb39f3fc44 100644 --- a/src/store_rebuild.cc +++ b/src/store_rebuild.cc @@ -28,9 +28,10 @@ static StoreRebuildData counts; -static struct timeval rebuild_start; static void storeCleanup(void *); +// TODO: Either convert to Progress or replace with StoreRebuildData. +// TODO: Handle unknown totals (UFS cache_dir that lost swap.state) correctly. typedef struct { /* total number of "swap.state" entries that will be read */ int total; @@ -40,6 +41,12 @@ typedef struct { static store_rebuild_progress *RebuildProgress = NULL; +void +StoreRebuildData::updateStartTime(const timeval &dirStartTime) +{ + startTime = started() ? std::min(startTime, dirStartTime) : dirStartTime; +} + static int storeCleanupDoubleCheck(StoreEntry * e) { @@ -122,17 +129,25 @@ void storeRebuildComplete(StoreRebuildData *dc) { - double dt; - counts.objcount += dc->objcount; - counts.expcount += dc->expcount; - counts.scancount += dc->scancount; - counts.clashcount += dc->clashcount; - counts.dupcount += dc->dupcount; - counts.cancelcount += dc->cancelcount; - counts.invalid += dc->invalid; - counts.badflags += dc->badflags; - counts.bad_log_op += dc->bad_log_op; - counts.zero_object_sz += dc->zero_object_sz; + if (dc) { + counts.objcount += dc->objcount; + counts.expcount += dc->expcount; + counts.scancount += dc->scancount; + counts.clashcount += dc->clashcount; + counts.dupcount += dc->dupcount; + counts.cancelcount += dc->cancelcount; + counts.invalid += dc->invalid; + counts.badflags += dc->badflags; + counts.bad_log_op += dc->bad_log_op; + counts.zero_object_sz += dc->zero_object_sz; + counts.validations += dc->validations; + counts.updateStartTime(dc->startTime); + } + // else the caller was not responsible for indexing its cache_dir + + assert(StoreController::store_dirs_rebuilding > 1); + --StoreController::store_dirs_rebuilding; + /* * When store_dirs_rebuilding == 1, it means we are done reading * or scanning all cache_dirs. Now report the stats and start @@ -142,7 +157,7 @@ storeRebuildComplete(StoreRebuildData *dc) if (StoreController::store_dirs_rebuilding > 1) return; - dt = tvSubDsec(rebuild_start, current_time); + const auto dt = tvSubDsec(counts.startTime, current_time); debugs(20, DBG_IMPORTANT, "Finished rebuilding storage from disk."); debugs(20, DBG_IMPORTANT, " " << std::setw(7) << counts.scancount << " Entries scanned"); @@ -173,7 +188,6 @@ void storeRebuildStart(void) { counts = StoreRebuildData(); // reset counters - rebuild_start = current_time; /* * Note: store_dirs_rebuilding is initialized to 1. * @@ -197,6 +211,7 @@ void storeRebuildProgress(int sd_index, int total, int sofar) { static time_t last_report = 0; + // TODO: Switch to int64_t and fix handling of unknown totals. double n = 0.0; double d = 0.0; @@ -221,10 +236,25 @@ storeRebuildProgress(int sd_index, int total, int sofar) d += (double) RebuildProgress[sd_index].total; } - debugs(20, DBG_IMPORTANT, "Store rebuilding is "<< std::setw(4)<< std::setprecision(2) << 100.0 * n / d << "% complete"); + debugs(20, DBG_IMPORTANT, "Indexing cache entries: " << Progress(n, d)); last_report = squid_curtime; } +void +Progress::print(std::ostream &os) const +{ + if (goal > 0) { + const auto percent = 100.0 * completed / goal; + os << std::setprecision(2) << percent << "% (" << + completed << " out of " << goal << ")"; + } else if (!completed && !goal) { + os << "nothing to do"; + } else { + // unknown (i.e. negative) or buggy (i.e. zero when completed != 0) goal + os << completed; + } +} + #include "fde.h" #include "Generic.h" #include "StoreMeta.h" diff --git a/src/store_rebuild.h b/src/store_rebuild.h index ebda8b2fb1..e316397f07 100644 --- a/src/store_rebuild.h +++ b/src/store_rebuild.h @@ -13,9 +13,21 @@ #include "store_key_md5.h" +class MemBuf; + +/// cache_dir(s) indexing statistics class StoreRebuildData { public: + /// maintain earliest initiation time across multiple indexing cache_dirs + void updateStartTime(const timeval &dirStartTime); + + /// whether we have worked on indexing this(these) cache_dir(s) before + bool started() const { return startTime.tv_sec > 0; } + + // when adding members, keep the class compatible with placement new onto a + // zeroed shared memory segment (see Rock::Rebuild::Stats usage) + int objcount = 0; /* # objects successfully reloaded */ int expcount = 0; /* # objects expired */ int scancount = 0; /* # entries scanned or read from state file */ @@ -26,8 +38,31 @@ public: int badflags = 0; /* # bad e->flags */ int bad_log_op = 0; int zero_object_sz = 0; + int64_t validations = 0; ///< the number of validated cache entries, slots + timeval startTime = {}; ///< absolute time when the rebuild was initiated +}; + +/// advancement of work that consists of (usually known number) of similar steps +class Progress +{ +public: + Progress(const int64_t stepsCompleted, const int64_t stepsTotal): + completed(stepsCompleted), goal(stepsTotal) {} + + /// brief progress report suitable for level-0/1 debugging + void print(std::ostream &os) const; + + int64_t completed; ///< the number of finished work steps + int64_t goal; ///< the known total number of work steps (or negative) }; +inline std::ostream & +operator <<(std::ostream &os, const Progress &p) +{ + p.print(os); + return os; +} + void storeRebuildStart(void); void storeRebuildComplete(StoreRebuildData *); void storeRebuildProgress(int sd_index, int total, int sofar); diff --git a/src/tests/stub_store_rebuild.cc b/src/tests/stub_store_rebuild.cc index fea44a5dff..47e05371d0 100644 --- a/src/tests/stub_store_rebuild.cc +++ b/src/tests/stub_store_rebuild.cc @@ -10,6 +10,7 @@ #include "squid.h" #include "MemBuf.h" +#include "SquidTime.h" #include "store/Controller.h" #include "store_rebuild.h" @@ -21,9 +22,16 @@ void storeRebuildProgress(int sd_index, int total, int sofar) STUB bool storeRebuildParseEntry(MemBuf &, StoreEntry &, cache_key *, StoreRebuildData &, uint64_t) STUB_RETVAL(false) +void StoreRebuildData::updateStartTime(const timeval &dirStartTime) +{ + startTime = started() ? std::min(startTime, dirStartTime) : dirStartTime; +} + void storeRebuildComplete(StoreRebuildData *) { --StoreController::store_dirs_rebuilding; + if (StoreController::store_dirs_rebuilding == 1) + --StoreController::store_dirs_rebuilding; // normally in storeCleanup() } bool @@ -39,3 +47,5 @@ storeRebuildLoadEntry(int fd, int diskIndex, MemBuf &buf, StoreRebuildData &) return true; } +void Progress::print(std::ostream &) const STUB + -- 2.39.5