PR25375: fdcache prefetching to reduce repeated archive decompression

author Frank Ch. Eigler <fche@redhat.com>

Sat, 25 Jan 2020 23:43:07 +0000 (18:43 -0500)

committer Frank Ch. Eigler <fche@redhat.com>

Tue, 25 Feb 2020 17:05:28 +0000 (12:05 -0500)
author Frank Ch. Eigler <fche@redhat.com>
Sat, 25 Jan 2020 23:43:07 +0000 (18:43 -0500)
committer Frank Ch. Eigler <fche@redhat.com>
Tue, 25 Feb 2020 17:05:28 +0000 (12:05 -0500)
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog

index b297a3749702d957ce7f1e6fbd4dde9f5c5c63ec..4bbe0d35b5a49fc69eb0a29f39ed81aea4bd88cc 100644 (file)
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,16 @@
+2020-02-25  Frank Ch. Eigler  <fche@redhat.com>
+
+       * debuginfod.cxx (fdcache_prefetch): New parameter.
+       (parse_opt): Parse it.
+       (main): Default it.
+       (fdcache::fd_size_mb): Change to double for accuracy.
+       (fdcache::probe): New function.
+       (fdcache::intern): New option to intern at end of LRU.
+       (fdcache::lookup): Clean fdcache.
+       (handle_buildid_r_match): Implement multi-stage archive
+       parsing, with optional prefetching of extracted contents
+       into the fdcache.
+
  2020-02-19  Aaron Merey  <amerey@redhat.com>
  
         * debuginfod-client.c (debuginfod_clean_cache): Restrict
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx

index 0acd70e4a91627e3e7e7bff43b3cfaace66b3d9e..be3868bb1e42d1c776331be7aad5b197b80ce707 100644 (file)
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -355,6 +355,8 @@ static const struct argp_option options[] =
     { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
  #define ARGP_KEY_FDCACHE_MBS 0x1002
     { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
+   { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
     { NULL, 0, NULL, 0, NULL, 0 }
    };
  
@@ -394,6 +396,7 @@ static regex_t file_exclude_regex;
  static bool traverse_logical;
  static long fdcache_fds;
  static long fdcache_mbs;
+static long fdcache_prefetch;
  static string tmpdir;
  
  static void set_metric(const string& key, int64_t value);
@@ -476,6 +479,9 @@ parse_opt (int key, char *arg,
      case ARGP_KEY_FDCACHE_MBS:
        fdcache_mbs = atol (arg);
        break;
+    case ARGP_KEY_FDCACHE_PREFETCH:
+      fdcache_prefetch = atol (arg);
+      break;
      case ARGP_KEY_ARG:
        source_paths.insert(string(arg));
        break;
@@ -975,14 +981,14 @@ private:
      string archive;
      string entry;
      string fd;
-    long fd_size_mb; // rounded up megabytes
+    double fd_size_mb; // slightly rounded up megabytes
    };
    deque<fdcache_entry> lru; // @head: most recently used
    long max_fds;
    long max_mbs;
  
  public:
-  void intern(const string& a, const string& b, string fd, off_t sz)
+  void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
    {
      {
        unique_lock<mutex> lock(fdcache_lock);
@@ -995,31 +1001,56 @@ public:
                break; // must not continue iterating
              }
          }
-      long mb = ((sz+1023)/1024+1023)/1024;
+      double mb = (sz+65535)/1048576.0; // round up to 64K block
        fdcache_entry n = { a, b, fd, mb };
-      lru.push_front(n);
+      if (front_p)
+        lru.push_front(n);
+      else
+        lru.push_back(n);
      if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+      obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
      }
  
-    this->limit(max_fds, max_mbs); // age cache if required
+    // NB: we age the cache at lookup time too
+    if (front_p)
+      this->limit(max_fds, max_mbs); // age cache if required
    }
  
    int lookup(const string& a, const string& b)
+  {
+    int fd = -1;
+    {
+      unique_lock<mutex> lock(fdcache_lock);
+      for (auto i = lru.begin(); i < lru.end(); i++)
+        {
+          if (i->archive == a && i->entry == b)
+            { // found it; move it to head of lru
+              fdcache_entry n = *i;
+              lru.erase(i); // invalidates i, so no more iteration!
+              lru.push_front(n);
+
+              fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
+              break;
+            }
+        }
+    }
+
+    if (fd >= 0)
+      this->limit(max_fds, max_mbs); // age cache if required
+
+    return fd;
+  }
+
+  int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
    {
      unique_lock<mutex> lock(fdcache_lock);
      for (auto i = lru.begin(); i < lru.end(); i++)
        {
          if (i->archive == a && i->entry == b)
-          { // found it; move it to head of lru
-            fdcache_entry n = *i;
-            lru.erase(i); // invalidates i, so no more iteration!
-            lru.push_front(n);
-
-            return open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
-          }
+          return true;
        }
-    return -1;
+    return false;
    }
  
    void clear(const string& a, const string& b)
@@ -1047,7 +1078,7 @@ public:
      this->max_mbs = maxmbs;
  
      long total_fd = 0;
-    long total_mb = 0;
+    double total_mb = 0.0;
      for (auto i = lru.begin(); i < lru.end(); i++)
        {
          // accumulate totals from most recently used one going backward
@@ -1117,6 +1148,7 @@ handle_buildid_r_match (int64_t b_mtime,
        return 0;
      }
  
+  // check for a match in the fdcache first
    int fd = fdcache.lookup(b_source0, b_source1);
    while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
      {
@@ -1152,6 +1184,7 @@ handle_buildid_r_match (int64_t b_mtime,
        // NB: see, we never go around the 'loop' more than once
      }
  
+  // no match ... grumble, must process the archive
    string archive_decoder = "/dev/null";
    string archive_extension = "";
    for (auto&& arch : scan_archives)
@@ -1196,8 +1229,19 @@ handle_buildid_r_match (int64_t b_mtime,
    if (rc != ARCHIVE_OK)
      throw archive_exception(a, "cannot open archive from pipe");
  
-  while(1) // parse cpio archive entries
+  // archive traversal is in three stages, no, four stages:
+  // 1) skip entries whose names do not match the requested one
+  // 2) extract the matching entry name (set r = result)
+  // 3) extract some number of prefetched entries (just into fdcache)
+  // 4) abort any further processing
+  struct MHD_Response* r = 0;                 // will set in stage 2
+  unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3
+
+  while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
      {
+      if (interrupted)
+        break;
+
        struct archive_entry *e;
        rc = archive_read_next_header (a, &e);
        if (rc != ARCHIVE_OK)
@@ -1207,7 +1251,10 @@ handle_buildid_r_match (int64_t b_mtime,
          continue;
  
        string fn = canonicalized_archive_entry_pathname (e);
-      if (fn != b_source1)
+      if ((r == 0) && (fn != b_source1)) // stage 1
+        continue;
+
+      if (fdcache.probe (b_source0, fn)) // skip if already interned
          continue;
  
        // extract this file to a temporary file
@@ -1229,18 +1276,32 @@ handle_buildid_r_match (int64_t b_mtime,
            throw archive_exception(a, "cannot extract file");
          }
  
+      if (r != 0) // stage 3
+        {
+          // NB: now we know we have a complete reusable file; make fdcache
+          // responsible for unlinking it later.
+          fdcache.intern(b_source0, fn,
+                         tmppath, archive_entry_size(e),
+                         false); // prefetched ones go to back of lru
+          prefetch_count --;
+          close (fd); // we're not saving this fd to make a mhd-response from!
+          continue;
+        }
+
        // NB: now we know we have a complete reusable file; make fdcache
        // responsible for unlinking it later.
-      fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+      fdcache.intern(b_source0, b_source1,
+                     tmppath, archive_entry_size(e),
+                     true); // requested ones go to the front of lru
  
        inc_metric ("http_responses_total","result",archive_extension + " archive");
-      struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
+      r = MHD_create_response_from_fd (archive_entry_size(e), fd);
        if (r == 0)
          {
            if (verbose)
              obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
            close(fd);
-          break; // assume no chance of better luck around another iteration
+          break; // assume no chance of better luck around another iteration; no other copies of same file
          }
        else
          {
@@ -1251,12 +1312,12 @@ handle_buildid_r_match (int64_t b_mtime,
            /* libmicrohttpd will close it. */
            if (result_fd)
              *result_fd = fd;
-          return r;
+          continue;
          }
      }
  
    // XXX: rpm/file not found: delete this R entry?
-  return 0;
+  return r;
  }
  
  
@@ -2809,7 +2870,8 @@ main (int argc, char *argv[])
      fdcache_mbs = 1024; // 1 gigabyte
    else
      fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
-  fdcache_fds = concurrency * 2;
+  fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
+  fdcache_fds = (concurrency + fdcache_prefetch) * 2;
  
    /* Parse and process arguments.  */
    int remaining;
@@ -2943,6 +3005,7 @@ main (int argc, char *argv[])
    obatched(clog) << "rescan time " << rescan_s << endl;
    obatched(clog) << "fdcache fds " << fdcache_fds << endl;
    obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
+  obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
    obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
    obatched(clog) << "groom time " << groom_s << endl;
    if (scan_archives.size()>0)
diff --git a/doc/ChangeLog b/doc/ChangeLog

index 36094d002f75a592eaf1e908b88be36f65720bae..3e57491c8ca645cb2f0fad584ec5101b55c5adf0 100644 (file)
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2020-02-25  Frank Ch. Eigler  <fche@redhat.com>
+
+       * debuginfod.8: Document new --fdcache-prefetch option.
+
  2020-02-05  Frank Ch. Eigler  <fche@redhat.com>
  
         * debuginfod.8: Document new -Z flag and tweak other bits.
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8

index ca844aedcfdcd78528be672387e194f70968aa62..ed9724d6f1cf7a63474b019d1bc2280d2d040910 100644 (file)
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite
  traversal\fP.
  
  .TP
-.B "\-\-fdcache-fds=NUM"  "\-\-fdcache-mbs=MB"
+.B "\-\-fdcache\-fds=NUM"  "\-\-fdcache\-mbs=MB"  "\-\-fdcache\-prefetch=NUM2"
  Configure limits on a cache that keeps recently extracted files from
-archives.  Up to NUM files and up to a total of MB megabytes will be
-kept extracted, in order to avoid having to decompress their archives
-again.  The default NUM and MB values depend on the concurrency of the
-system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
-filesystem.  This is because that is where the most recently used
-extracted files are kept.  Grooming cleans this cache.
+archives.  Up to NUM requested files and up to a total of MB megabytes
+will be kept extracted, in order to avoid having to decompress their
+archives over and over again.  In addition, up to NUM2 other files
+from an archive may be prefetched into the cache before they are even
+requested.  The default NUM, NUM2, and MB values depend on the
+concurrency of the system, and on the available disk space on the
+$TMPDIR or \fB/tmp\fP filesystem.  This is because that is where the
+most recently used extracted files are kept.  Grooming cleans this
+cache.
  
  .TP
  .B "\-v"
author	Frank Ch. Eigler <fche@redhat.com>
	Sat, 25 Jan 2020 23:43:07 +0000 (18:43 -0500)
committer	Frank Ch. Eigler <fche@redhat.com>
	Tue, 25 Feb 2020 17:05:28 +0000 (12:05 -0500)
debuginfod/ChangeLog		patch \| blob \| blame \| history
debuginfod/debuginfod.cxx		patch \| blob \| blame \| history
doc/ChangeLog		patch \| blob \| blame \| history
doc/debuginfod.8		patch \| blob \| blame \| history