From: Eduard Bagdasaryan Date: Thu, 9 Nov 2023 05:54:55 +0000 (+0000) Subject: Report/abort on any catastrophic rock cache_dir indexing failure (#1575) X-Git-Tag: SQUID_7_0_1~291 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=63d73bab6cdf882861a5a326a5a21e715f2c791d;p=thirdparty%2Fsquid.git Report/abort on any catastrophic rock cache_dir indexing failure (#1575) kid11| 0,3| TextException.cc(110) Throw: mem/PageStack.cc:111: false kid11| 93,2| AsyncJob.cc(129) callException: false kid11| 93,5| AsyncJob.cc(84) mustStop: Rock::Rebuild will stop kid11| Finished rebuilding storage from disk. Some of the serious cache_dir indexing errors (such as job-stopping exceptions) were not logged at level 1. After such an error, Squid would end up using a small (possibly empty) subset of cache_dir slots, usually without admin knowledge. Other serious errors were reported and treated as fatal. We now report all serious errors and treat them as fatal. Also added more rebuild information to reported errors. Also removed the "Do you need to run 'squid -z' ..." hint. "squid -z" is only useful for ENOENT, but SwapDir::init() essentially prevents ENOENT during Rebuild by not starting a Rebuild job when cache_dir is missing. --- diff --git a/src/fs/rock/RockRebuild.cc b/src/fs/rock/RockRebuild.cc index 4d5790c599..b594e9a0fe 100644 --- a/src/fs/rock/RockRebuild.cc +++ b/src/fs/rock/RockRebuild.cc @@ -18,6 +18,7 @@ #include "globals.h" #include "md5.h" #include "sbuf/Stream.h" +#include "SquidMath.h" #include "Store.h" #include "tools.h" @@ -695,18 +696,29 @@ Rock::Rebuild::swanSong() } void -Rock::Rebuild::failure(const char *msg, int errNo) +Rock::Rebuild::callException(const std::exception &) { + // For now, treat all Rebuild exceptions as fatal errors rather than letting + // the default callException() implementation to silently stop this job. + throw; +} + +/// a helper to handle rebuild-killing I/O errors +void +Rock::Rebuild::failure(const char * const msg, const int errNo) +{ + assert(sd); debugs(47,5, sd->index << " slot " << loadingPos << " at " << dbOffset << " <= " << dbSize); + SBufStream error; + error << "Cannot rebuild rock cache_dir index for " << sd->filePath << + Debug::Extra << "problem: " << msg; if (errNo) - debugs(47, DBG_CRITICAL, "ERROR: Rock cache_dir rebuild failure: " << xstrerr(errNo)); - debugs(47, DBG_CRITICAL, "Do you need to run 'squid -z' to initialize storage?"); + error << Debug::Extra << "I/O error: " << xstrerr(errNo); + error << Debug::Extra << "scan progress: " << Math::int64Percent(loadingPos, dbSlotLimit) << '%'; - assert(sd); - fatalf("Rock cache_dir[%d] rebuild of %s failed: %s.", - sd->index, sd->filePath, msg); + throw TextException(error.buf(), Here()); } /// adds slot to the free slot index diff --git a/src/fs/rock/RockRebuild.h b/src/fs/rock/RockRebuild.h index f855cea841..62ed4d2fc0 100644 --- a/src/fs/rock/RockRebuild.h +++ b/src/fs/rock/RockRebuild.h @@ -52,6 +52,9 @@ public: /// \returns whether the indexing was necessary (and, hence, started) static bool Start(SwapDir &dir); + /* AsyncJob API */ + virtual void callException(const std::exception &) override; + protected: /// whether the current kid is responsible for rebuilding the given cache_dir static bool IsResponsible(const SwapDir &);