PR27863: debuginfod optimization for concurrent requests

author Frank Ch. Eigler <fche@redhat.com>

Fri, 4 Jun 2021 12:40:11 +0000 (08:40 -0400)

committer Frank Ch. Eigler <fche@redhat.com>

Wed, 9 Jun 2021 14:44:05 +0000 (10:44 -0400)
author Frank Ch. Eigler <fche@redhat.com>
Fri, 4 Jun 2021 12:40:11 +0000 (08:40 -0400)
committer Frank Ch. Eigler <fche@redhat.com>
Wed, 9 Jun 2021 14:44:05 +0000 (10:44 -0400)
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog

index 21407dc2e524dcbc49b83155e30451e0e4f1c7fb..286c910a02f5305964bfa5c714aadd46b7df32a3 100644 (file)
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,9 @@
+2021-06-03  Frank Ch. Eigler <fche@redhat.com>
+
+       PR27863
+       * debuginfod.cxx (unique_set, unique_set_reserver): New classes.
+       (handler_cb): Use them to implement "after-you" queueing.
+
  2021-05-14  Frank Ch. Eigler <fche@redhat.com>
  
         PR27859
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx

index e0948eaba4b1167a2ee48660f204b0758ba6dadc..543044c6f0ea0c3c7a72a0e08db377214c3edd94 100644 (file)
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -705,6 +705,54 @@ static workq<scan_payload> scanq; // just a single one
  // idler: thread_main_groom()
  
  
+////////////////////////////////////////////////////////////////////////
+
+// Unique set is a thread-safe structure that lends 'ownership' of a value
+// to a thread.  Other threads requesting the same thing are made to wait.
+// It's like a semaphore-on-demand.
+template <typename T>
+class unique_set
+{
+private:
+  set<T> values;
+  mutex mtx;
+  condition_variable cv;
+public:
+  unique_set() {}
+  ~unique_set() {}
+
+  void acquire(const T& value)
+  {
+    unique_lock<mutex> lock(mtx);
+    while (values.find(value) != values.end())
+      cv.wait(lock);
+    values.insert(value);
+  }
+
+  void release(const T& value)
+  {
+    unique_lock<mutex> lock(mtx);
+    // assert (values.find(value) != values.end());
+    values.erase(value);
+    cv.notify_all();
+  }
+};
+
+
+// This is the object that's instantiate to uniquely hold a value in a
+// RAII-pattern way.
+template <typename T>
+class unique_set_reserver
+{
+private:
+  unique_set<T>& please_hold;
+  T mine;
+public:
+  unique_set_reserver(unique_set<T>& t, const T& value):
+    please_hold(t), mine(value)  { please_hold.acquire(mine); }
+  ~unique_set_reserver() { please_hold.release(mine); }
+};
+
  
  ////////////////////////////////////////////////////////////////////////
  
@@ -1961,6 +2009,7 @@ handler_cb (void * /*cls*/,
    off_t http_size = -1;
    struct timespec ts_start, ts_end;
    clock_gettime (CLOCK_MONOTONIC, &ts_start);
+  double afteryou = 0.0;
  
    try
      {
@@ -1973,7 +2022,25 @@ handler_cb (void * /*cls*/,
  
        if (slash1 != string::npos && url1 == "/buildid")
          {
+          // PR27863: block this thread awhile if another thread is already busy
+          // fetching the exact same thing.  This is better for Everyone.
+          // The latecomer says "... after you!" and waits.
+          add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
+#ifdef HAVE_PTHREAD_SETNAME_NP
+          (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
+#endif
+          struct timespec tsay_start, tsay_end;
+          clock_gettime (CLOCK_MONOTONIC, &tsay_start);
+          static unique_set<string> busy_urls;
+          unique_set_reserver<string> after_you(busy_urls, url1);
+          clock_gettime (CLOCK_MONOTONIC, &tsay_end);
+          afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
+          add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
+          
            tmp_inc_metric m ("thread_busy", "role", "http-buildid");
+#ifdef HAVE_PTHREAD_SETNAME_NP
+          (void) pthread_setname_np (pthread_self(), "mhd-buildid");
+#endif
            size_t slash2 = url_copy.find('/', slash1+1);
            if (slash2 == string::npos)
              throw reportable_exception("/buildid/ webapi error, need buildid");
@@ -2036,10 +2103,12 @@ handler_cb (void * /*cls*/,
  
    clock_gettime (CLOCK_MONOTONIC, &ts_end);
    double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
+  // afteryou: delay waiting for other client's identical query to complete
+  // deltas: total latency, including afteryou waiting
    obatched(clog) << conninfo(connection)
                   << ' ' << method << ' ' << url
                   << ' ' << http_code << ' ' << http_size
-                 << ' ' << (int)(deltas*1000) << "ms"
+                 << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
                   << endl;
  
    // related prometheus metrics
@@ -2053,6 +2122,10 @@ handler_cb (void * /*cls*/,
               deltas*1000); // prometheus prefers _seconds and floating point
    inc_metric("http_responses_duration_milliseconds_count","code",http_code_str);
  
+  add_metric("http_responses_after_you_milliseconds_sum","code",http_code_str,
+             afteryou*1000);
+  inc_metric("http_responses_after_you_milliseconds_count","code",http_code_str);
+  
    return rc;
  }
  
diff --git a/tests/ChangeLog b/tests/ChangeLog

index 38e92659f1172121aa7fb5782a9731d314b3edb3..b9e5ed4a5d7f21c68e5708a9bb6f7dab3c08c327 100644 (file)
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2021-06-03  Frank Ch. Eigler <fche@redhat.com>
+
+       PR27863
+       * run-debuginfod-find.sh: Test "after-you" queueing via flooding
+       with concurent curls.
+
  2021-05-14  Frank Ch. Eigler <fche@redhat.com>
  
         PR27859
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh

index 9183cccb72016deabbc3a5f49c53a8d2b7489c2d..0445bce1e7657dd29db98ea717f115a7ef3d723a 100755 (executable)
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -580,6 +580,20 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $BUILDID
  # Confirm that some debuginfod client pools are being used
  curl -s http://127.0.0.1:$PORT2/metrics | grep 'dc_pool_op.*reuse'
  
+# Trigger a flood of requests against the same archive content file.
+# Use a file that hasn't been previously extracted in to make it
+# likely that even this test debuginfod will experience concurrency
+# and impose some "after-you" delays.
+(for i in `seq 100`; do
+    curl -s http://127.0.0.1:$PORT1/buildid/87c08d12c78174f1082b7c888b3238219b0eb265/executable >/dev/null &
+ done;
+ wait)
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_after_you.*'
+# If we could guarantee some minimum number of seconds of CPU time, we
+# could assert that the after_you metrics show some nonzero amount of
+# waiting.  A few hundred ms is typical on this developer's workstation.
+
+
  ########################################################################
  # Corrupt the sqlite database and get debuginfod to trip across its errors
  curl -s http://127.0.0.1:$PORT1/metrics | grep 'sqlite3.*reset'
author	Frank Ch. Eigler <fche@redhat.com>
	Fri, 4 Jun 2021 12:40:11 +0000 (08:40 -0400)
committer	Frank Ch. Eigler <fche@redhat.com>
	Wed, 9 Jun 2021 14:44:05 +0000 (10:44 -0400)
debuginfod/ChangeLog		patch \| blob \| blame \| history
debuginfod/debuginfod.cxx		patch \| blob \| blame \| history
tests/ChangeLog		patch \| blob \| blame \| history
tests/run-debuginfod-find.sh		patch \| blob \| blame \| history