From: Frank Ch. Eigler Date: Wed, 5 Feb 2020 20:04:18 +0000 (-0500) Subject: debuginfod: generalized archive support X-Git-Tag: elfutils-0.179~41 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ef876aa170abec983d4359e51a33209ceb01caa;p=thirdparty%2Felfutils.git debuginfod: generalized archive support Add a '-Z EXT[=CMD]' option to debuginfod, which lets it scan any given extension and run CMD on it to unwrap distro archives. For example, for arch-linux pacman files, -Z '.tar.zst=zstdcat' lets debuginfod grok debug and source content in split-debuginfo files. Signed-off-by: Frank Ch. Eigler --- diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 8c97fdcf7..d812e6d71 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,9 @@ +2020-02-05 Frank Ch. Eigler + + * debuginfod.cxx (argp options): Add -Z option. + (canonicalized_archive_entry_pathname): New function for + distro-agnostic file name matching/storage. + 2020-01-22 Frank Ch. Eigler * debuginfod.cxx (dwarf_extract_source_paths): Don't print diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 623dbc593..6d7290237 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -333,9 +333,10 @@ ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; static const struct argp_option options[] = { { NULL, 0, NULL, 0, "Scanners:", 1 }, - { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning threads.", 0 }, - { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning threads.", 0 }, - { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning threads.", 0 }, + { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 }, + { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 }, + { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 }, + { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 }, // "source-oci-imageregistry" ... { NULL, 0, NULL, 0, "Options:", 2 }, @@ -428,6 +429,17 @@ parse_opt (int key, char *arg, scan_archives[".deb"]="dpkg-deb --fsys-tarfile"; scan_archives[".ddeb"]="dpkg-deb --fsys-tarfile"; break; + case 'Z': + { + char* extension = strchr(arg, '='); + if (arg[0] == '\0') + argp_failure(state, 1, EINVAL, "missing EXT"); + else if (extension) + scan_archives[string(arg, (extension-arg))]=string(extension+1); + else + scan_archives[string(arg)]=string("cat"); + } + break; case 'L': traverse_logical = true; break; @@ -1068,6 +1080,25 @@ public: static libarchive_fdcache fdcache; +// For security/portability reasons, many distro-package archives have +// a "./" in front of path names; others have nothing, others have +// "/". Canonicalize them all to a single leading "/", with the +// assumption that this matches the dwarf-derived file names too. +string canonicalized_archive_entry_pathname(struct archive_entry *e) +{ + string fn = archive_entry_pathname(e); + if (fn.size() == 0) + return fn; + if (fn[0] == '/') + return fn; + if (fn[0] == '.') + return fn.substr(1); + else + return string("/")+fn; +} + + + static struct MHD_Response* handle_buildid_r_match (int64_t b_mtime, const string& b_source0, @@ -1162,8 +1193,8 @@ handle_buildid_r_match (int64_t b_mtime, if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely continue; - string fn = archive_entry_pathname (e); - if (fn != string(".")+b_source1) + string fn = canonicalized_archive_entry_pathname (e); + if (fn != b_source1) continue; // extract this file to a temporary file @@ -2055,9 +2086,7 @@ archive_classify (const string& rps, string& archive_extension, if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely continue; - string fn = archive_entry_pathname (e); - if (fn.size() > 1 && fn[0] == '.') - fn = fn.substr(1); // trim off the leading '.' + string fn = canonicalized_archive_entry_pathname (e); if (verbose > 3) obatched(clog) << "libarchive checking " << fn << endl; @@ -2764,7 +2793,7 @@ main (int argc, char *argv[]) "unexpected argument: %s", argv[remaining]); if (scan_archives.size()==0 && !scan_files && source_paths.size()>0) - obatched(clog) << "warning: without -F -R -U, ignoring PATHs" << endl; + obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl; fdcache.limit(fdcache_fds, fdcache_mbs); @@ -2894,7 +2923,7 @@ main (int argc, char *argv[]) obatched ob(clog); auto& o = ob << "scanning archive types "; for (auto&& arch : scan_archives) - o << arch.first << " "; + o << arch.first << "(" << arch.second << ") "; o << endl; } const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR); diff --git a/doc/ChangeLog b/doc/ChangeLog index 651ea33d4..36094d002 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2020-02-05 Frank Ch. Eigler + + * debuginfod.8: Document new -Z flag and tweak other bits. + 2020-01-10 Mark Wielaard * debuginfod_find_debuginfo.3 (DEBUGINFOD_PROGRESS): Mention progress diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 166c7c459..ca844aedc 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -61,20 +61,22 @@ or even use debuginfod itself: ^C .ESAMPLE -If the \fB\-R\fP and/or \fB-U\fP option is given, each file is scanned -as an archive file that may contain ELF/DWARF/source files. If \-R is -given, the will scan RPMs; and/or if \-U is given, they will scan DEB -/ DDEB files. (The terms RPM and DEB and DDEB are used synonymously -as "archives" in diagnostic messages.) Because of complications such -as DWZ-compressed debuginfo, may require \fItwo\fP traversal passes to -identify all source code. Source files for RPMs are only served from -other RPMs, so the caution for \-F does not apply. Note that due to -Debian/Ubuntu packaging policies & mechanisms, debuginfod cannot -resolve source files for DEB/DDEB at all. - -If no PATH is listed, or neither \fB\-F\fP nor \fB\-R\fP nor \fB\-U\fP -option is given, then \fBdebuginfod\fP will simply serve content that -it accumulated into its index in all previous runs. +If any of the \fB\-R\fP, \fB-U\fP, or \fB-Z\fP options is given, each +file is scanned as an archive file that may contain ELF/DWARF/source +files. Archive files are recognized by extension. If \-R is given, +".rpm" files are scanned; if \-D is given, ".deb" and ".ddeb" files +are scanned; if \-Z is given, the listed extensions are scanned. +Because of complications such as DWZ-compressed debuginfo, may require +\fItwo\fP traversal passes to identify all source code. Source files +for RPMs are only served from other RPMs, so the caution for \-F does +not apply. Note that due to Debian/Ubuntu packaging policies & +mechanisms, debuginfod cannot resolve source files for DEB/DDEB at +all. + +If no PATH is listed, or none of the scanning options is given, then +\fBdebuginfod\fP will simply serve content that it accumulated into +its index in all previous runs, and federate to any upstream +debuginfod servers. .SH OPTIONS @@ -83,13 +85,27 @@ it accumulated into its index in all previous runs. .B "\-F" Activate ELF/DWARF file scanning. The default is off. +.TP +.B "\-Z EXT" "\-Z EXT=CMD" +Activate an additional pattern in archive scanning. Files with name +extension EXT (include the dot) will be processed. If CMD is given, +it is invoked with the file name added to its argument list, and +should produce a common archive on its standard output. Otherwise, +the file is read as if CMD were "cat". Since debuginfod internally +uses \fBlibarchive\fP to read archive files, it can accept a wide +range of archive formats and compression modes. The default is no +additional patterns. This option may be repeated. + .TP .B "\-R" Activate RPM patterns in archive scanning. The default is off. +Equivalent to \fB\%\-Z\~.rpm=rpm2cpio\fP. .TP .B "\-U" Activate DEB/DDEB patterns in archive scanning. The default is off. +Equivalent to \fB\%\-Z\ .deb='dpkg-deb\ \-\-fsys\-tarfile\fP' +\fB\%\-Z\ .ddeb='dpkg-deb\ \-\-fsys\-tarfile'\fP. .TP .B "\-d FILE" "\-\-database=FILE" @@ -123,7 +139,8 @@ against the full path of each file, based on its \fBrealpath(3)\fP canonicalization. By default, all files are included and none are excluded. A file that matches both include and exclude REGEX is excluded. (The \fIcontents\fP of archive files are not subject to -inclusion or exclusion filtering: they are all processed.) +inclusion or exclusion filtering: they are all processed.) Only the +last of each type of regular expression given is used. .TP .B "\-t SECONDS" "\-\-rescan\-time=SECONDS"