]> git.ipfire.org Git - thirdparty/elfutils.git/commitdiff
PR26775: restore thread_work_groom metric to cycle count
authorFrank Ch. Eigler <fche@redhat.com>
Fri, 30 Oct 2020 22:26:04 +0000 (18:26 -0400)
committerFrank Ch. Eigler <fche@redhat.com>
Fri, 30 Oct 2020 22:26:04 +0000 (18:26 -0400)
... and add new metrics about progress of traversal and groom
processes.  Correct one control flow abnormality that could
prematurely end a scanner thread and might have accounted for
the inconsistent test results from the previous patch.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
debuginfod/ChangeLog
debuginfod/debuginfod.cxx
tests/ChangeLog
tests/run-debuginfod-find.sh

index 5af4dc6ca08f57ba3606201f93827ac5ff443bd1..ec791c68d4606175b35b23bb40c7d2a895d47ccc 100644 (file)
@@ -1,3 +1,14 @@
+2020-10-30  Frank Ch. Eigler  <fche@redhat.com>
+
+       PR26775 cont'd.
+       (thread_main_scanner): Ensure control doesn't leave
+       infinite loop until program exit, even if SIGUSR2.
+       (scan_source_paths): Have traverser clean scanq on
+       SIGUSR2.  Emit additional traversed_total metrics.
+       (groom): Emit additional groomed_total metrics.
+       (thread_main_groom): Restore previous thread_work_total
+       metric.
+
 2020-10-29  Frank Ch. Eigler  <fche@redhat.com>
 
        PR26775
index 8448a501f35642d3629b9c082b83a46f295b2b45..1020a6405978edea02945410640ed526cd8cb07b 100644 (file)
@@ -2711,7 +2711,8 @@ thread_main_scanner (void* arg)
       add_metric("thread_busy", "role", "scan", -1);
       bool gotone = scanq.wait_front(p);
       add_metric("thread_busy", "role", "scan", 1);
-      if (! gotone) continue; // or break
+
+      if (! gotone) continue; // go back to waiting
 
       try
         {
@@ -2750,13 +2751,9 @@ thread_main_scanner (void* arg)
           e.report(cerr);
         }
 
-      inc_metric("thread_work_total", "role","scan");
-      
-      if (sigusr2 != forced_groom_count) // stop early if groom triggered 
-        {
-          scanq.clear();
-          break;
-        }
+      // finished a scanning step -- not a "loop", because we just
+      // consume the traversal loop's work, whenever
+      inc_metric("thread_work_total","role","scan");
     }
 
   add_metric("thread_busy", "role", "scan", -1);
@@ -2801,7 +2798,10 @@ scan_source_paths()
     if (interrupted) break;
 
     if (sigusr2 != forced_groom_count) // stop early if groom triggered 
-      break;
+      {
+        scanq.clear(); // clear previously issued work for scanner threads
+        break;
+      }
     
     fts_scanned ++;
 
@@ -2825,6 +2825,10 @@ scan_source_paths()
         if (verbose > 3)
           obatched(clog) << "fts skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
         fts_regex ++;
+        if (!ri)
+          inc_metric("traversed_total","type","regex-skipped-I");
+        if (rx)
+          inc_metric("traversed_total","type","regex-skipped-X");
         continue;
       }
 
@@ -2832,6 +2836,7 @@ scan_source_paths()
       {
       case FTS_F:
         scanq.push_back (make_pair(rps, *f->fts_statp));
+        inc_metric("traversed_total","type","file");
         break;
 
       case FTS_ERR:
@@ -2841,11 +2846,16 @@ scan_source_paths()
           auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
           x.report(cerr);
         }
+        inc_metric("traversed_total","type","error");
         break;
 
-      default:
-        ;
-        /* ignore */
+      case FTS_D: // ignore
+        inc_metric("traversed_total","type","directory");
+        break;
+        
+      default: // ignore
+        inc_metric("traversed_total","type","other");
+        break;
       }
   }
   gettimeofday (&tv_end, NULL);
@@ -2890,6 +2900,7 @@ thread_main_fts_source_paths (void* arg)
             set_metric("thread_busy", "role","traverse", 1);
             scan_source_paths();
             last_rescan = time(NULL); // NB: now was before scanning
+            // finished a traversal loop
             inc_metric("thread_work_total", "role","traverse");
             set_metric("thread_busy", "role","traverse", 0);
           }
@@ -2970,9 +2981,11 @@ void groom()
           files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
           files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
           files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+          inc_metric("groomed_total", "decision", "stale");
         }
-      
-      inc_metric("thread_work_total", "role", "groom");
+      else
+        inc_metric("groomed_total", "decision", "fresh");
+
       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
         break;
     }
@@ -3039,6 +3052,8 @@ thread_main_groom (void* /*arg*/)
             set_metric("thread_busy", "role", "groom", 1);
             groom ();
             last_groom = time(NULL); // NB: now was before grooming
+            // finished a grooming loop
+            inc_metric("thread_work_total", "role", "groom");
             set_metric("thread_busy", "role", "groom", 0);
           }
         catch (const sqlite_exception& e)
index 012e305cf4181ce1d58cf5a921931f2b9ddea262..13abf89ac5dd51f628947c28b500badc5b41b1ae 100644 (file)
@@ -1,3 +1,9 @@
+2020-10-30  Frank Ch. Eigler  <fche@redhat.com>
+
+       PR26775
+       * run-debuginfod-find.sh: Modify test for restored
+       thread_work_total semantics for grooming.
+
 2020-10-29  Frank Ch. Eigler  <fche@redhat.com>
 
        PR26775
index 3af04c00bcd6b0e34cf1902f8b580d81e595fdd1..0d1fcf1fb829fd7752fb0a38dd8013ba97a5b9da 100755 (executable)
@@ -346,8 +346,7 @@ rm -r R/debuginfod-rpms/rhel6/*
 kill -USR2 $PID1  # groom cycle
 # Expect 3 rpms to be deleted by the groom
 # 1 groom cycle already took place at/soon-after startup, so -USR2 makes 2
-# ... times the # of files checked in each cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 51
+wait_ready $PORT1 'thread_work_total{role="groom"}' 2
 wait_ready $PORT1 'groom{statistic="file d/e"}' 3
 
 rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
@@ -364,7 +363,7 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID2
 
 # run a groom cycle to force server to drop its fdcache
 kill -USR2 $PID1  # groom cycle
-wait_ready $PORT1 'thread_work_total{role="groom"}' 98 # 3 complete cycles
+wait_ready $PORT1 'thread_work_total{role="groom"}' 3
 # move it around a couple of times to make it likely to hit a nonexistent entry during iteration
 mv R/debuginfod-rpms/rhel7 R/debuginfod-rpms/rhel7renamed
 kill -USR1 $PID1  # scan cycle