]> git.ipfire.org Git - thirdparty/elfutils.git/commitdiff
PR25548: support canonicalized source-path names in debuginfod webapi
authorFrank Ch. Eigler <fche@redhat.com>
Mon, 23 Mar 2020 19:33:56 +0000 (15:33 -0400)
committerFrank Ch. Eigler <fche@redhat.com>
Thu, 26 Mar 2020 14:35:38 +0000 (10:35 -0400)
Programs are sometimes compiled with source path names containing
noise like /./ or // or /foo/../, and these survive into DWARF.  This
code allows either raw or canonicalized pathnames in the webapi, by
letting the client pass things verbatim, and letting the server
store/accept both raw and canonicalized path names for source files.
Tests included & docs updated.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
Reported-by: Eli Schwartz <eschwartz@archlinux.org>
Tested-by: Eli Schwartz <eschwartz@archlinux.org>
16 files changed:
debuginfod/ChangeLog
debuginfod/debuginfod-client.c
debuginfod/debuginfod.cxx
doc/ChangeLog
doc/debuginfod-find.1
doc/debuginfod.8
doc/debuginfod_find_debuginfo.3
tests/ChangeLog
tests/debuginfod-rpms/fedora31/hello3-1.0-2.src.rpm [new file with mode: 0644]
tests/debuginfod-rpms/fedora31/hello3-1.0-2.x86_64.rpm [new file with mode: 0644]
tests/debuginfod-rpms/fedora31/hello3-debuginfo-1.0-2.x86_64.rpm [new file with mode: 0644]
tests/debuginfod-rpms/fedora31/hello3-debugsource-1.0-2.x86_64.rpm [new file with mode: 0644]
tests/debuginfod-rpms/fedora31/hello3-two-1.0-2.x86_64.rpm [new file with mode: 0644]
tests/debuginfod-rpms/fedora31/hello3-two-debuginfo-1.0-2.x86_64.rpm [new file with mode: 0644]
tests/debuginfod-rpms/hello3.spec. [new file with mode: 0644]
tests/run-debuginfod-find.sh

index 9eb06719ec0de6ce05f536f0b9a5a40e47eece9b..7518e886031c59a3ce15df4e65dde0641251206b 100644 (file)
@@ -1,3 +1,14 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+       * debuginfod-client.c (debuginfod_query_server): Set
+       CURLOPT_PATH_AS_IS, to propagate file names verbatim.
+       * debuginfod.cxx (canon_pathname): Implement RFC3986
+       style pathname canonicalization.
+       (handle_buildid): Canonicalize incoming webapi source
+       paths, accept either one.
+       (scan_source_file, archive_classify): Store both
+       original and canonicalized dwarf-source file names.
+
 2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
 
        * debuginfod.cxx (handle_buildid): In case of federated fallback
index ea2d16249a31758a8ca11f6a0b8f1c7f6b5e51f3..251047caf53f1415af01910e07071c06652361ed 100644 (file)
@@ -716,6 +716,7 @@ debuginfod_query_server (debuginfod_client *c,
       curl_easy_setopt(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_FAILONERROR, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_NOSIGNAL, (long) 1);
+      curl_easy_setopt(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1);
       curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, "");
       add_extra_headers(data[i].handle);
index 490169a40dedca197a5aff38a963b70bbbd4ae7a..2f28a877faa8bb2a07671349ed6032de9cd4f5e4 100644 (file)
@@ -945,6 +945,82 @@ shell_escape(const string& str)
 }
 
 
+// PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
+//
+// Namely:
+//    //         ->   /
+//    /foo/../   ->   /
+//    /./        ->   /
+//
+// This mapping is done on dwarf-side source path names, which may
+// include these constructs, so we can deal with debuginfod clients
+// that accidentally canonicalize the paths.
+//
+// realpath(3) is close but not quite right, because it also resolves
+// symbolic links.  Symlinks at the debuginfod server have nothing to
+// do with the build-time symlinks, thus they must not be considered.
+//
+// see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
+// see also libc __realpath()
+// see also llvm llvm::sys::path::remove_dots()
+static string
+canon_pathname (const string& input)
+{
+  string i = input; // 5.2.4 (1)
+  string o;
+
+  while (i.size() != 0)
+    {
+      // 5.2.4 (2) A
+      if (i.substr(0,3) == "../")
+        i = i.substr(3);
+      else if(i.substr(0,2) == "./")
+        i = i.substr(2);
+
+      // 5.2.4 (2) B
+      else if (i.substr(0,3) == "/./")
+        i = i.substr(2);
+      else if (i == "/.")
+        i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
+
+      // 5.2.4 (2) C
+      else if (i.substr(0,4) == "/../") {
+        i = i.substr(3);
+        string::size_type sl = o.rfind("/");
+        if (sl != string::npos)
+          o = o.substr(0, sl);
+        else
+          o = "";
+      } else if (i == "/..")
+        i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
+
+      // 5.2.4 (2) D
+      // no need to handle these cases; we're dealing with file names
+      else if (i == ".")
+        i = "";
+      else if (i == "..")
+        i = "";
+
+      // POSIX special: map // to /
+      else if (i.substr(0,2) == "//")
+        i = i.substr(1);
+
+      // 5.2.4 (2) E
+      else {
+        string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
+        o += i.substr(0, next_slash);
+        if (next_slash == string::npos)
+          i = "";
+        else
+          i = i.substr(next_slash);
+      }
+    }
+
+  return o;
+}
+
+
+
 // A map-like class that owns a cache of file descriptors (indexed by
 // file / content names).
 //
@@ -1394,12 +1470,17 @@ static struct MHD_Response* handle_buildid (const string& buildid /* unsafe */,
     }
   else if (atype_code == "S")
     {
+      // PR25548
+      // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
+      // We let the query pass with either one.
+
       pp = new sqlite_ps (db, "mhd-query-s",
-                          "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc = ? "
+                          "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
       pp->reset();
       pp->bind(1, buildid);
       pp->bind(2, suffix);
+      pp->bind(3, canon_pathname(suffix));
     }
   unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
 
@@ -2117,6 +2198,27 @@ scan_source_file (const string& rps, const stat_t& st,
             .bind(4, sfs.st_mtime)
             .step_ok_done();
 
+          // PR25548: also store canonicalized source path
+          string dwarfsrc_canon = canon_pathname (dwarfsrc);
+          if (dwarfsrc_canon != dwarfsrc)
+            {
+              if (verbose > 3)
+                obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+
+              ps_upsert_files
+                .reset()
+                .bind(1, dwarfsrc_canon)
+                .step_ok_done();
+
+              ps_upsert_s
+                .reset()
+                .bind(1, buildid)
+                .bind(2, dwarfsrc_canon)
+                .bind(3, srps)
+                .bind(4, sfs.st_mtime)
+                .step_ok_done();
+            }
+
           inc_metric("found_sourcerefs_total","source","files");
         }
     }
@@ -2278,6 +2380,26 @@ archive_classify (const string& rps, string& archive_extension,
                     .bind(2, s)
                     .step_ok_done();
 
+                  // PR25548: also store canonicalized source path
+                  const string& dwarfsrc = s;
+                  string dwarfsrc_canon = canon_pathname (dwarfsrc);
+                  if (dwarfsrc_canon != dwarfsrc)
+                    {
+                      if (verbose > 3)
+                        obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+
+                      ps_upsert_files
+                        .reset()
+                        .bind(1, dwarfsrc_canon)
+                        .step_ok_done();
+
+                      ps_upsert_sref
+                        .reset()
+                        .bind(1, buildid)
+                        .bind(2, dwarfsrc_canon)
+                        .step_ok_done();
+                    }
+
                   fts_sref ++;
                 }
             }
index 59809ea8a1e2c0b7d838a59706a9ee722f6cccac..564644f41907cc6dc4e1a6d8a4e2a9b72f004841 100644 (file)
@@ -1,3 +1,8 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+       * debuginfod-find.1, debuginfod_find_debuginfo.3: Document
+       source path canonicalization.
+
 2020-03-22  Frank Ch. Eigler  <fche@redhat.com>
 
        * debuginfod_get_url.3: New function, documented ...
index e71ca29be96ecf943556a7f138160a0c25d0b3dd..a861a48ed1d16ec5a8c3dc78418487c85f49af88 100644 (file)
@@ -78,10 +78,11 @@ is made up of multiple CUs.  Therefore, to disambiguate, debuginfod
 expects source queries to prefix relative path names with the CU
 compilation-directory, followed by a mandatory "/".
 
-Note: the user should not elide \fB../\fP or \fB/./\fP or extraneous
-\fB///\fP sorts of path components in the directory names, because if
-this is how those names appear in the DWARF files, that is what
-debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names.  debuginfod
+accepts both forms.  Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
 
 For example:
 .TS
index 39d1904e45f759ba3294bc70e9d5ff5928b16c7a..64795c245f2764ccb52f6db2492bae52ac1586dc 100644 (file)
@@ -265,10 +265,11 @@ is made up of multiple CUs.  Therefore, to disambiguate, debuginfod
 expects source queries to prefix relative path names with the CU
 compilation-directory, followed by a mandatory "/".
 
-Note: contrary to RFC 3986, the client should not elide \fB../\fP or
-\fB/./\fP or extraneous \fB///\fP sorts of path components in the
-directory names, because if this is how those names appear in the
-DWARF files, that is what debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names.  debuginfod
+accepts both forms.  Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
 
 For example:
 .TS
index f7ec6cc134ba8cec73026d630f19d03c19e8a398..42f2265b255c987dac401dd4bf43e43f2fcce13a 100644 (file)
@@ -87,10 +87,11 @@ paths, and yet an executable is made up of multiple CUs. Therefore, to
 disambiguate, debuginfod expects source queries to prefix relative path
 names with the CU compilation-directory, followed by a mandatory "/".
 
-Note: the caller should not elide \fB../\fP or \fB/./\fP or extraneous
-\fB///\fP sorts of path components in the directory names, because if
-this is how those names appear in the DWARF files, that is what
-debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names.  debuginfod
+accepts both forms.  Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
 
 If \fIpath\fP is not NULL and the query is successful, \fIpath\fP is set
 to the path of the file in the cache. The caller must \fBfree\fP() this value.
index 7c1c307a532ff1697f1e6bdcee4c00ab3d5c49c2..933e6726b0c8253afe710669abfddd20927d4e74 100644 (file)
@@ -1,3 +1,9 @@
+2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
+
+       * debuginfod-rpms/hello3.spec., /fedora31/*: New files with
+       uncanonicalized source paths.
+       * run-debuginfod-find.sh: Test them.
+
 2020-03-24  Frank Ch. Eigler  <fche@redhat.com>
 
        * run-debuginfod-find.sh: Test the more detailed debuginfod
diff --git a/tests/debuginfod-rpms/fedora31/hello3-1.0-2.src.rpm b/tests/debuginfod-rpms/fedora31/hello3-1.0-2.src.rpm
new file mode 100644 (file)
index 0000000..d0b3454
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-1.0-2.src.rpm differ
diff --git a/tests/debuginfod-rpms/fedora31/hello3-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora31/hello3-1.0-2.x86_64.rpm
new file mode 100644 (file)
index 0000000..8b2fe9b
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-1.0-2.x86_64.rpm differ
diff --git a/tests/debuginfod-rpms/fedora31/hello3-debuginfo-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora31/hello3-debuginfo-1.0-2.x86_64.rpm
new file mode 100644 (file)
index 0000000..ee479ec
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-debuginfo-1.0-2.x86_64.rpm differ
diff --git a/tests/debuginfod-rpms/fedora31/hello3-debugsource-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora31/hello3-debugsource-1.0-2.x86_64.rpm
new file mode 100644 (file)
index 0000000..890478e
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-debugsource-1.0-2.x86_64.rpm differ
diff --git a/tests/debuginfod-rpms/fedora31/hello3-two-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora31/hello3-two-1.0-2.x86_64.rpm
new file mode 100644 (file)
index 0000000..73fd939
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-two-1.0-2.x86_64.rpm differ
diff --git a/tests/debuginfod-rpms/fedora31/hello3-two-debuginfo-1.0-2.x86_64.rpm b/tests/debuginfod-rpms/fedora31/hello3-two-debuginfo-1.0-2.x86_64.rpm
new file mode 100644 (file)
index 0000000..0cc2407
Binary files /dev/null and b/tests/debuginfod-rpms/fedora31/hello3-two-debuginfo-1.0-2.x86_64.rpm differ
diff --git a/tests/debuginfod-rpms/hello3.spec. b/tests/debuginfod-rpms/hello3.spec.
new file mode 100644 (file)
index 0000000..ffb9513
--- /dev/null
@@ -0,0 +1,60 @@
+Summary: hello3 -- double hello, world rpm
+Name: hello3
+Version: 1.0
+Release: 2
+Group: Utilities
+License: GPL
+Distribution: RPM ^W Elfutils test suite.
+Vendor: Red Hat Software
+Packager: Red Hat Software <bugs@redhat.com>
+URL: http://www.redhat.com
+BuildRequires: gcc make
+Source0: hello-1.0.tar.gz
+
+%description
+Simple rpm demonstration with an eye to consumption by debuginfod.
+
+%package two
+Summary: hello3two
+License: GPL
+
+%description two
+Dittoish.
+
+%prep
+%setup -q -n hello-1.0
+
+%build
+mkdir foobar
+gcc -g -O1 foobar///./../hello.c -o hello
+gcc -g -O2 -D_FORTIFY_SOURCE=2 foobar///./../hello.c -o hello3
+
+%install
+rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT/usr/local/bin
+cp hello $RPM_BUILD_ROOT/usr/local/bin/
+cp hello3 $RPM_BUILD_ROOT/usr/local/bin/
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files 
+%defattr(-,root,root)
+%attr(0751,root,root)   /usr/local/bin/hello
+
+%files two
+%defattr(-,root,root)
+%attr(0751,root,root)   /usr/local/bin/hello3
+
+%changelog
+* Tue Mar 24 2020 Frank Ch. Eigler <fche@redhat.com>
+- New variant of hello2, with crazy source file paths
+
+* Thu Nov 14 2019 Frank Ch. Eigler <fche@redhat.com>
+- Dropped misc files not relevant to debuginfod testing.
+
+* Wed May 18 2016 Mark Wielaard <mjw@redhat.com>
+- Add hello2 for dwz testing support.
+
+* Tue Oct 20 1998 Jeff Johnson <jbj@redhat.com>
+- create.
index cbc2895865364a8893a8ad21282b9b9348cdfe41..a6958fad758c6124ab89604b2d9add169d9b5e23 100755 (executable)
@@ -23,8 +23,8 @@ type rpm2cpio 2>/dev/null || (echo "need rpm2cpio"; exit 77)
 type bzcat 2>/dev/null || (echo "need bzcat"; exit 77)
 
 # for test case debugging, uncomment:
-# set -x
-# VERBOSE=-vvvv
+#set -x
+#VERBOSE=-vvvv
 
 DB=${PWD}/.debuginfod_tmp.sqlite
 tempfiles $DB
@@ -40,7 +40,7 @@ cleanup()
   if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
   if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
 
-  rm -rf F R D L Z ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
+  rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
   exit_cleanup
 }
 
@@ -112,7 +112,9 @@ export DEBUGINFOD_TIMEOUT=10
 # cannot find it without debuginfod.
 echo "int main() { return 0; }" > ${PWD}/prog.c
 tempfiles prog.c
-gcc -Wl,--build-id -g -o prog ${PWD}/prog.c
+# Create a subdirectory to confound source path names
+mkdir foobar
+gcc -Wl,--build-id -g -o prog ${PWD}/foobar///./../prog.c
 testrun ${abs_top_builddir}/src/strip -g -f prog.debug ${PWD}/prog
 BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
           -a prog | grep 'Build ID' | cut -d ' ' -f 7`
@@ -142,9 +144,15 @@ cmp $filename F/prog.debug
 filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID`
 cmp $filename F/prog
 
+# raw source filename
+filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/foobar///./../prog.c`
+cmp $filename  ${PWD}/prog.c
+
+# and also the canonicalized one
 filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/prog.c`
 cmp $filename  ${PWD}/prog.c
 
+
 ########################################################################
 
 # Test whether the cache default locations are correct
@@ -291,6 +299,9 @@ archive_test() {
 
 # common source file sha1
 SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
+# fedora31
+archive_test 420e9e3308971f4b817cc5bf83928b41a6909d88 /usr/src/debug/hello3-1.0-2.x86_64/foobar////./../hello.c $SHA
+archive_test 87c08d12c78174f1082b7c888b3238219b0eb265 /usr/src/debug/hello3-1.0-2.x86_64///foobar/./..//hello.c $SHA
 # fedora30
 archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
 archive_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA