From: Frank Ch. Eigler Date: Sat, 25 Jan 2020 23:43:07 +0000 (-0500) Subject: PR25375: fdcache prefetching to reduce repeated archive decompression X-Git-Tag: elfutils-0.179~31 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=577170fc84e1a347076bc6150a7b152da0a882ac;p=thirdparty%2Felfutils.git PR25375: fdcache prefetching to reduce repeated archive decompression Introduce new option --fdcache-prefetch to accelerate repeated queries from the same debuginfo archive. Signed-off-by: Frank Ch. Eigler --- diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index b297a3749..4bbe0d35b 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,16 @@ +2020-02-25 Frank Ch. Eigler + + * debuginfod.cxx (fdcache_prefetch): New parameter. + (parse_opt): Parse it. + (main): Default it. + (fdcache::fd_size_mb): Change to double for accuracy. + (fdcache::probe): New function. + (fdcache::intern): New option to intern at end of LRU. + (fdcache::lookup): Clean fdcache. + (handle_buildid_r_match): Implement multi-stage archive + parsing, with optional prefetching of extracted contents + into the fdcache. + 2020-02-19 Aaron Merey * debuginfod-client.c (debuginfod_clean_cache): Restrict diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 0acd70e4a..be3868bb1 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -355,6 +355,8 @@ static const struct argp_option options[] = { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 }, #define ARGP_KEY_FDCACHE_MBS 0x1002 { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 }, +#define ARGP_KEY_FDCACHE_PREFETCH 0x1003 + { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 }, { NULL, 0, NULL, 0, NULL, 0 } }; @@ -394,6 +396,7 @@ static regex_t file_exclude_regex; static bool traverse_logical; static long fdcache_fds; static long fdcache_mbs; +static long fdcache_prefetch; static string tmpdir; static void set_metric(const string& key, int64_t value); @@ -476,6 +479,9 @@ parse_opt (int key, char *arg, case ARGP_KEY_FDCACHE_MBS: fdcache_mbs = atol (arg); break; + case ARGP_KEY_FDCACHE_PREFETCH: + fdcache_prefetch = atol (arg); + break; case ARGP_KEY_ARG: source_paths.insert(string(arg)); break; @@ -975,14 +981,14 @@ private: string archive; string entry; string fd; - long fd_size_mb; // rounded up megabytes + double fd_size_mb; // slightly rounded up megabytes }; deque lru; // @head: most recently used long max_fds; long max_mbs; public: - void intern(const string& a, const string& b, string fd, off_t sz) + void intern(const string& a, const string& b, string fd, off_t sz, bool front_p) { { unique_lock lock(fdcache_lock); @@ -995,31 +1001,56 @@ public: break; // must not continue iterating } } - long mb = ((sz+1023)/1024+1023)/1024; + double mb = (sz+65535)/1048576.0; // round up to 64K block fdcache_entry n = { a, b, fd, mb }; - lru.push_front(n); + if (front_p) + lru.push_front(n); + else + lru.push_back(n); if (verbose > 3) - obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl; + obatched(clog) << "fdcache interned a=" << a << " b=" << b + << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl; } - this->limit(max_fds, max_mbs); // age cache if required + // NB: we age the cache at lookup time too + if (front_p) + this->limit(max_fds, max_mbs); // age cache if required } int lookup(const string& a, const string& b) + { + int fd = -1; + { + unique_lock lock(fdcache_lock); + for (auto i = lru.begin(); i < lru.end(); i++) + { + if (i->archive == a && i->entry == b) + { // found it; move it to head of lru + fdcache_entry n = *i; + lru.erase(i); // invalidates i, so no more iteration! + lru.push_front(n); + + fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss + break; + } + } + } + + if (fd >= 0) + this->limit(max_fds, max_mbs); // age cache if required + + return fd; + } + + int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open { unique_lock lock(fdcache_lock); for (auto i = lru.begin(); i < lru.end(); i++) { if (i->archive == a && i->entry == b) - { // found it; move it to head of lru - fdcache_entry n = *i; - lru.erase(i); // invalidates i, so no more iteration! - lru.push_front(n); - - return open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss - } + return true; } - return -1; + return false; } void clear(const string& a, const string& b) @@ -1047,7 +1078,7 @@ public: this->max_mbs = maxmbs; long total_fd = 0; - long total_mb = 0; + double total_mb = 0.0; for (auto i = lru.begin(); i < lru.end(); i++) { // accumulate totals from most recently used one going backward @@ -1117,6 +1148,7 @@ handle_buildid_r_match (int64_t b_mtime, return 0; } + // check for a match in the fdcache first int fd = fdcache.lookup(b_source0, b_source1); while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end { @@ -1152,6 +1184,7 @@ handle_buildid_r_match (int64_t b_mtime, // NB: see, we never go around the 'loop' more than once } + // no match ... grumble, must process the archive string archive_decoder = "/dev/null"; string archive_extension = ""; for (auto&& arch : scan_archives) @@ -1196,8 +1229,19 @@ handle_buildid_r_match (int64_t b_mtime, if (rc != ARCHIVE_OK) throw archive_exception(a, "cannot open archive from pipe"); - while(1) // parse cpio archive entries + // archive traversal is in three stages, no, four stages: + // 1) skip entries whose names do not match the requested one + // 2) extract the matching entry name (set r = result) + // 3) extract some number of prefetched entries (just into fdcache) + // 4) abort any further processing + struct MHD_Response* r = 0; // will set in stage 2 + unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3 + + while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3 { + if (interrupted) + break; + struct archive_entry *e; rc = archive_read_next_header (a, &e); if (rc != ARCHIVE_OK) @@ -1207,7 +1251,10 @@ handle_buildid_r_match (int64_t b_mtime, continue; string fn = canonicalized_archive_entry_pathname (e); - if (fn != b_source1) + if ((r == 0) && (fn != b_source1)) // stage 1 + continue; + + if (fdcache.probe (b_source0, fn)) // skip if already interned continue; // extract this file to a temporary file @@ -1229,18 +1276,32 @@ handle_buildid_r_match (int64_t b_mtime, throw archive_exception(a, "cannot extract file"); } + if (r != 0) // stage 3 + { + // NB: now we know we have a complete reusable file; make fdcache + // responsible for unlinking it later. + fdcache.intern(b_source0, fn, + tmppath, archive_entry_size(e), + false); // prefetched ones go to back of lru + prefetch_count --; + close (fd); // we're not saving this fd to make a mhd-response from! + continue; + } + // NB: now we know we have a complete reusable file; make fdcache // responsible for unlinking it later. - fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e)); + fdcache.intern(b_source0, b_source1, + tmppath, archive_entry_size(e), + true); // requested ones go to the front of lru inc_metric ("http_responses_total","result",archive_extension + " archive"); - struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd); + r = MHD_create_response_from_fd (archive_entry_size(e), fd); if (r == 0) { if (verbose) obatched(clog) << "cannot create fd-response for " << b_source0 << endl; close(fd); - break; // assume no chance of better luck around another iteration + break; // assume no chance of better luck around another iteration; no other copies of same file } else { @@ -1251,12 +1312,12 @@ handle_buildid_r_match (int64_t b_mtime, /* libmicrohttpd will close it. */ if (result_fd) *result_fd = fd; - return r; + continue; } } // XXX: rpm/file not found: delete this R entry? - return 0; + return r; } @@ -2809,7 +2870,8 @@ main (int argc, char *argv[]) fdcache_mbs = 1024; // 1 gigabyte else fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space - fdcache_fds = concurrency * 2; + fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression + fdcache_fds = (concurrency + fdcache_prefetch) * 2; /* Parse and process arguments. */ int remaining; @@ -2943,6 +3005,7 @@ main (int argc, char *argv[]) obatched(clog) << "rescan time " << rescan_s << endl; obatched(clog) << "fdcache fds " << fdcache_fds << endl; obatched(clog) << "fdcache mbs " << fdcache_mbs << endl; + obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl; obatched(clog) << "fdcache tmpdir " << tmpdir << endl; obatched(clog) << "groom time " << groom_s << endl; if (scan_archives.size()>0) diff --git a/doc/ChangeLog b/doc/ChangeLog index 36094d002..3e57491c8 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2020-02-25 Frank Ch. Eigler + + * debuginfod.8: Document new --fdcache-prefetch option. + 2020-02-05 Frank Ch. Eigler * debuginfod.8: Document new -Z flag and tweak other bits. diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index ca844aedc..ed9724d6f 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite traversal\fP. .TP -.B "\-\-fdcache-fds=NUM" "\-\-fdcache-mbs=MB" +.B "\-\-fdcache\-fds=NUM" "\-\-fdcache\-mbs=MB" "\-\-fdcache\-prefetch=NUM2" Configure limits on a cache that keeps recently extracted files from -archives. Up to NUM files and up to a total of MB megabytes will be -kept extracted, in order to avoid having to decompress their archives -again. The default NUM and MB values depend on the concurrency of the -system, and on the available disk space on the $TMPDIR or \fB/tmp\fP -filesystem. This is because that is where the most recently used -extracted files are kept. Grooming cleans this cache. +archives. Up to NUM requested files and up to a total of MB megabytes +will be kept extracted, in order to avoid having to decompress their +archives over and over again. In addition, up to NUM2 other files +from an archive may be prefetched into the cache before they are even +requested. The default NUM, NUM2, and MB values depend on the +concurrency of the system, and on the available disk space on the +$TMPDIR or \fB/tmp\fP filesystem. This is because that is where the +most recently used extracted files are kept. Grooming cleans this +cache. .TP .B "\-v"