From: Frank Ch. Eigler Date: Tue, 14 Sep 2021 12:15:23 +0000 (-0400) Subject: PR28339: debuginfod: fix groom/scan race condition on just-emptied queue X-Git-Tag: elfutils-0.186~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2ff803956d6aaefeed3bc3f186da6fe666c7b1b6;p=thirdparty%2Felfutils.git PR28339: debuginfod: fix groom/scan race condition on just-emptied queue debuginfod's scan and groom operations (thread_main_scanner, thread_main_fts_source_paths) are intended to be mutually exclusive, as a substitute for more complicated sql transaction batching. (This is because scanning / grooming involves inserting or deleting data from multiple related tables.) The workq class that governs this in debuginfod.cxx has a problem: if the workq just becomes empty, its sole entry pulled by a scanner thread in response to a wait_front(), an 'idler' groomer thread is ALSO permitted to run, because there is no indication as to when the scanner thread operation finishes, only when it starts. Extending the workq with a counter ("fronters") to track any active scanning activity (even if the workq is empty) lets us block idlers groomers a little longer. Signed-off-by: Frank Ch. Eigler --- diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 1173f9cd7..4ff59efdc 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-09-14 Frank Ch. Eigler + + PRPR28339 + * debuginfod.cxx (waitq::fronters): New field. + (waitq::wait_idle): Respect it. + (waitq::done_front): New function. + (thread_main_scanner): Call it to match wait_front(). + 2021-09-12 Mark Wielaard * debuginfod.cxx (libarchive_fdcache::lookup): Add endl after diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 6cc9f777c..1267efbe0 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -663,10 +663,11 @@ class workq mutex mtx; condition_variable cv; bool dead; - unsigned idlers; + unsigned idlers; // number of threads busy with wait_idle / done_idle + unsigned fronters; // number of threads busy with wait_front / done_front public: - workq() { dead = false; idlers = 0; } + workq() { dead = false; idlers = 0; fronters = 0; } ~workq() {} void push_back(const Payload& p) @@ -690,10 +691,11 @@ public: unique_lock lock(mtx); q.clear(); set_metric("thread_work_pending","role","scan", q.size()); + // NB: there may still be some live fronters cv.notify_all(); // maybe wake up waiting idlers } - // block this scanner thread until there is work to do and no active + // block this scanner thread until there is work to do and no active idler bool wait_front (Payload& p) { unique_lock lock(mtx); @@ -705,19 +707,29 @@ public: { p = * q.begin(); q.erase (q.begin()); + fronters ++; // prevent idlers from starting awhile, even if empty q set_metric("thread_work_pending","role","scan", q.size()); - if (q.size() == 0) - cv.notify_all(); // maybe wake up waiting idlers + // NB: don't wake up idlers yet! The consumer is busy + // processing this element until it calls done_front(). return true; } } + // notify waitq that scanner thread is done with that last item + void done_front () + { + unique_lock lock(mtx); + fronters --; + if (q.size() == 0 && fronters == 0) + cv.notify_all(); // maybe wake up waiting idlers + } + // block this idler thread until there is no work to do void wait_idle () { unique_lock lock(mtx); cv.notify_all(); // maybe wake up waiting scanners - while (!dead && (q.size() != 0)) + while (!dead && ((q.size() != 0) || fronters > 0)) cv.wait(lock); idlers ++; } @@ -3145,6 +3157,8 @@ thread_main_scanner (void* arg) e.report(cerr); } + scanq.done_front(); // let idlers run + if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef) {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size