+2020-03-24 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod-client.c (debuginfod_query_server): Set
+ CURLOPT_PATH_AS_IS, to propagate file names verbatim.
+ * debuginfod.cxx (canon_pathname): Implement RFC3986
+ style pathname canonicalization.
+ (handle_buildid): Canonicalize incoming webapi source
+ paths, accept either one.
+ (scan_source_file, archive_classify): Store both
+ original and canonicalized dwarf-source file names.
+
2020-03-24 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (handle_buildid): In case of federated fallback
curl_easy_setopt(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1);
curl_easy_setopt(data[i].handle, CURLOPT_FAILONERROR, (long) 1);
curl_easy_setopt(data[i].handle, CURLOPT_NOSIGNAL, (long) 1);
+ curl_easy_setopt(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1);
curl_easy_setopt(data[i].handle, CURLOPT_AUTOREFERER, (long) 1);
curl_easy_setopt(data[i].handle, CURLOPT_ACCEPT_ENCODING, "");
add_extra_headers(data[i].handle);
}
+// PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
+//
+// Namely:
+// // -> /
+// /foo/../ -> /
+// /./ -> /
+//
+// This mapping is done on dwarf-side source path names, which may
+// include these constructs, so we can deal with debuginfod clients
+// that accidentally canonicalize the paths.
+//
+// realpath(3) is close but not quite right, because it also resolves
+// symbolic links. Symlinks at the debuginfod server have nothing to
+// do with the build-time symlinks, thus they must not be considered.
+//
+// see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
+// see also libc __realpath()
+// see also llvm llvm::sys::path::remove_dots()
+static string
+canon_pathname (const string& input)
+{
+ string i = input; // 5.2.4 (1)
+ string o;
+
+ while (i.size() != 0)
+ {
+ // 5.2.4 (2) A
+ if (i.substr(0,3) == "../")
+ i = i.substr(3);
+ else if(i.substr(0,2) == "./")
+ i = i.substr(2);
+
+ // 5.2.4 (2) B
+ else if (i.substr(0,3) == "/./")
+ i = i.substr(2);
+ else if (i == "/.")
+ i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
+
+ // 5.2.4 (2) C
+ else if (i.substr(0,4) == "/../") {
+ i = i.substr(3);
+ string::size_type sl = o.rfind("/");
+ if (sl != string::npos)
+ o = o.substr(0, sl);
+ else
+ o = "";
+ } else if (i == "/..")
+ i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
+
+ // 5.2.4 (2) D
+ // no need to handle these cases; we're dealing with file names
+ else if (i == ".")
+ i = "";
+ else if (i == "..")
+ i = "";
+
+ // POSIX special: map // to /
+ else if (i.substr(0,2) == "//")
+ i = i.substr(1);
+
+ // 5.2.4 (2) E
+ else {
+ string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
+ o += i.substr(0, next_slash);
+ if (next_slash == string::npos)
+ i = "";
+ else
+ i = i.substr(next_slash);
+ }
+ }
+
+ return o;
+}
+
+
+
// A map-like class that owns a cache of file descriptors (indexed by
// file / content names).
//
}
else if (atype_code == "S")
{
+ // PR25548
+ // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
+ // We let the query pass with either one.
+
pp = new sqlite_ps (db, "mhd-query-s",
- "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc = ? "
+ "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
"order by sharedprefix(source0,source0ref) desc, mtime desc");
pp->reset();
pp->bind(1, buildid);
pp->bind(2, suffix);
+ pp->bind(3, canon_pathname(suffix));
}
unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
.bind(4, sfs.st_mtime)
.step_ok_done();
+ // PR25548: also store canonicalized source path
+ string dwarfsrc_canon = canon_pathname (dwarfsrc);
+ if (dwarfsrc_canon != dwarfsrc)
+ {
+ if (verbose > 3)
+ obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+
+ ps_upsert_files
+ .reset()
+ .bind(1, dwarfsrc_canon)
+ .step_ok_done();
+
+ ps_upsert_s
+ .reset()
+ .bind(1, buildid)
+ .bind(2, dwarfsrc_canon)
+ .bind(3, srps)
+ .bind(4, sfs.st_mtime)
+ .step_ok_done();
+ }
+
inc_metric("found_sourcerefs_total","source","files");
}
}
.bind(2, s)
.step_ok_done();
+ // PR25548: also store canonicalized source path
+ const string& dwarfsrc = s;
+ string dwarfsrc_canon = canon_pathname (dwarfsrc);
+ if (dwarfsrc_canon != dwarfsrc)
+ {
+ if (verbose > 3)
+ obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+
+ ps_upsert_files
+ .reset()
+ .bind(1, dwarfsrc_canon)
+ .step_ok_done();
+
+ ps_upsert_sref
+ .reset()
+ .bind(1, buildid)
+ .bind(2, dwarfsrc_canon)
+ .step_ok_done();
+ }
+
fts_sref ++;
}
}
+2020-03-24 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod-find.1, debuginfod_find_debuginfo.3: Document
+ source path canonicalization.
+
2020-03-22 Frank Ch. Eigler <fche@redhat.com>
* debuginfod_get_url.3: New function, documented ...
expects source queries to prefix relative path names with the CU
compilation-directory, followed by a mandatory "/".
-Note: the user should not elide \fB../\fP or \fB/./\fP or extraneous
-\fB///\fP sorts of path components in the directory names, because if
-this is how those names appear in the DWARF files, that is what
-debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names. debuginfod
+accepts both forms. Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
For example:
.TS
expects source queries to prefix relative path names with the CU
compilation-directory, followed by a mandatory "/".
-Note: contrary to RFC 3986, the client should not elide \fB../\fP or
-\fB/./\fP or extraneous \fB///\fP sorts of path components in the
-directory names, because if this is how those names appear in the
-DWARF files, that is what debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names. debuginfod
+accepts both forms. Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
For example:
.TS
disambiguate, debuginfod expects source queries to prefix relative path
names with the CU compilation-directory, followed by a mandatory "/".
-Note: the caller should not elide \fB../\fP or \fB/./\fP or extraneous
-\fB///\fP sorts of path components in the directory names, because if
-this is how those names appear in the DWARF files, that is what
-debuginfod needs to see too.
+Note: the caller may or may not elide \fB../\fP or \fB/./\fP or extraneous
+\fB///\fP sorts of path components in the directory names. debuginfod
+accepts both forms. Specifically, debuginfod canonicalizes path names
+according to RFC3986 section 5.2.4 (Remove Dot Segments), plus reducing
+any \fB//\fP to \fB/\fP in the path.
If \fIpath\fP is not NULL and the query is successful, \fIpath\fP is set
to the path of the file in the cache. The caller must \fBfree\fP() this value.
+2020-03-24 Frank Ch. Eigler <fche@redhat.com>
+
+ * debuginfod-rpms/hello3.spec., /fedora31/*: New files with
+ uncanonicalized source paths.
+ * run-debuginfod-find.sh: Test them.
+
2020-03-24 Frank Ch. Eigler <fche@redhat.com>
* run-debuginfod-find.sh: Test the more detailed debuginfod
--- /dev/null
+Summary: hello3 -- double hello, world rpm
+Name: hello3
+Version: 1.0
+Release: 2
+Group: Utilities
+License: GPL
+Distribution: RPM ^W Elfutils test suite.
+Vendor: Red Hat Software
+Packager: Red Hat Software <bugs@redhat.com>
+URL: http://www.redhat.com
+BuildRequires: gcc make
+Source0: hello-1.0.tar.gz
+
+%description
+Simple rpm demonstration with an eye to consumption by debuginfod.
+
+%package two
+Summary: hello3two
+License: GPL
+
+%description two
+Dittoish.
+
+%prep
+%setup -q -n hello-1.0
+
+%build
+mkdir foobar
+gcc -g -O1 foobar///./../hello.c -o hello
+gcc -g -O2 -D_FORTIFY_SOURCE=2 foobar///./../hello.c -o hello3
+
+%install
+rm -rf $RPM_BUILD_ROOT
+mkdir -p $RPM_BUILD_ROOT/usr/local/bin
+cp hello $RPM_BUILD_ROOT/usr/local/bin/
+cp hello3 $RPM_BUILD_ROOT/usr/local/bin/
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%attr(0751,root,root) /usr/local/bin/hello
+
+%files two
+%defattr(-,root,root)
+%attr(0751,root,root) /usr/local/bin/hello3
+
+%changelog
+* Tue Mar 24 2020 Frank Ch. Eigler <fche@redhat.com>
+- New variant of hello2, with crazy source file paths
+
+* Thu Nov 14 2019 Frank Ch. Eigler <fche@redhat.com>
+- Dropped misc files not relevant to debuginfod testing.
+
+* Wed May 18 2016 Mark Wielaard <mjw@redhat.com>
+- Add hello2 for dwz testing support.
+
+* Tue Oct 20 1998 Jeff Johnson <jbj@redhat.com>
+- create.
type bzcat 2>/dev/null || (echo "need bzcat"; exit 77)
# for test case debugging, uncomment:
-# set -x
-# VERBOSE=-vvvv
+#set -x
+#VERBOSE=-vvvv
DB=${PWD}/.debuginfod_tmp.sqlite
tempfiles $DB
if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
- rm -rf F R D L Z ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
+ rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
exit_cleanup
}
# cannot find it without debuginfod.
echo "int main() { return 0; }" > ${PWD}/prog.c
tempfiles prog.c
-gcc -Wl,--build-id -g -o prog ${PWD}/prog.c
+# Create a subdirectory to confound source path names
+mkdir foobar
+gcc -Wl,--build-id -g -o prog ${PWD}/foobar///./../prog.c
testrun ${abs_top_builddir}/src/strip -g -f prog.debug ${PWD}/prog
BUILDID=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../src/readelf \
-a prog | grep 'Build ID' | cut -d ' ' -f 7`
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find executable $BUILDID`
cmp $filename F/prog
+# raw source filename
+filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/foobar///./../prog.c`
+cmp $filename ${PWD}/prog.c
+
+# and also the canonicalized one
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source $BUILDID ${PWD}/prog.c`
cmp $filename ${PWD}/prog.c
+
########################################################################
# Test whether the cache default locations are correct
# common source file sha1
SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
+# fedora31
+archive_test 420e9e3308971f4b817cc5bf83928b41a6909d88 /usr/src/debug/hello3-1.0-2.x86_64/foobar////./../hello.c $SHA
+archive_test 87c08d12c78174f1082b7c888b3238219b0eb265 /usr/src/debug/hello3-1.0-2.x86_64///foobar/./..//hello.c $SHA
# fedora30
archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
archive_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA