]> git.ipfire.org Git - thirdparty/elfutils.git/commitdiff
PR32218: debuginfod-client: support very long source file names
authorFrank Ch. Eigler <fche@redhat.com>
Thu, 10 Oct 2024 20:30:19 +0000 (16:30 -0400)
committerFrank Ch. Eigler <fche@redhat.com>
Wed, 16 Oct 2024 13:49:35 +0000 (09:49 -0400)
debuginfod clients & servers may sometimes encounter very long source
file names.  Previously, the client would synthesize a path name like
   $CACHEDIR/$BUILDID/source-$PATHNAME
where $PATHNAME was a funky ##-encoded version of the entire source
path name.  See https://sourceware.org/PR32218 for a horror case.
This can get too long to store as a single component of a file system
pathname (e.g. linux/limits.h NAME_MAX), resulting on client-side
errors even after a successful download.

New code switches encoding of the $PATHNAME part to use less escaping,
and a merciless truncation to the tail part of the filename.  (We keep
the tail rather than the head, so that the extension is preserved,
which makes some consumers happier.)  To limit collision damage from
truncation, we add also insert a goofy hash (4-byte DJBX33A) of the
name into the path name.  The result is a relatively short name:

   $CACHEDIR/$BUILDID/source-$HASH-$NAMETAIL

This is a transparent change to clients, who are not to make any
assumptions about cache file naming structure.  However, one existing
test did make such assumptions, so is fixed with some globness.  A new
test is also added, using a pre-baked tarball with a very long srcfile
name.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
debuginfod/debuginfod-client.c
tests/Makefile.am
tests/debuginfod-tars/bighello-sources/bighello.c [new file with mode: 0644]
tests/debuginfod-tars/bighello-sources/bighello.h [new file with mode: 0644]
tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore [new symlink]
tests/debuginfod-tars/bighello.tar [new file with mode: 0644]
tests/run-debuginfod-longsource.sh [new file with mode: 0755]
tests/run-debuginfod-section.sh

index 24ede19af3853d3a39b9d53f328240a937d8b8a1..deff19ff4568a9be2915b93b44a283fccfd50844 100644 (file)
@@ -1201,29 +1201,69 @@ perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, de
 /* Copy SRC to DEST, s,/,#,g */
 
 static void
-path_escape (const char *src, char *dest)
+path_escape (const char *src, char *dest, size_t dest_len)
 {
-  unsigned q = 0;
+  /* PR32218: Reversibly-escaping src character-by-character, for
+     large enough strings, risks ENAMETOOLONG errors.  For long names,
+     a simple hash based generated name instead, while still
+     attempting to preserve the as much of the content as possible.
+     It's possible that absurd choices of incoming names will collide
+     or still get truncated, but c'est la vie.
+  */
+  
+  /* Compute a three-way min() for the actual output string generated. */
+  assert (dest_len > 10); /* Space enough for degenerate case of
+                             "HASHHASH-\0". NB: dest_len is not
+                             user-controlled. */
+  /* Use only NAME_MAX/2 characters in the output file name.
+     ENAMETOOLONG has been observed even on 300-ish character names on
+     some filesystems. */
+  const size_t max_dest_len = NAME_MAX/2;
+  dest_len = dest_len > max_dest_len ? max_dest_len : dest_len;
+  /* Use only strlen(src)+10 bytes, if that's smaller.  Yes, we could
+     just fit an entire escaped name in there in theory, without the
+     hash+etc.  But then again the hashing protects against #-escape
+     aliasing collisions: "foo[bar" "foo]bar" both escape to
+     "foo#bar", thus aliasing, but have different "HASH-foo#bar".
+  */
+  const size_t hash_prefix_destlen = strlen(src)+10; /* DEADBEEF-src\0 */
+  dest_len = dest_len > hash_prefix_destlen ? hash_prefix_destlen : dest_len;
 
-  for (unsigned fi=0; q < PATH_MAX-2; fi++) /* -2, escape is 2 chars.  */
-    switch (src[fi])
-      {
-      case '\0':
-        dest[q] = '\0';
-       return;
-      case '/': /* escape / to prevent dir escape */
-        dest[q++]='#';
-        dest[q++]='#';
-        break;
-      case '#': /* escape # to prevent /# vs #/ collisions */
-        dest[q++]='#';
-        dest[q++]='_';
-        break;
-      default:
-        dest[q++]=src[fi];
-      }
+  char *dest_write = dest + dest_len - 1;
+  (*dest_write--) = '\0'; /* Ensure a \0 there. */
 
-  dest[q] = '\0';
+  /* Copy from back toward front, preferring to keep the .extension. */
+  for (int fi=strlen(src)-1; fi >= 0 && dest_write >= dest; fi--)
+    {
+      char src_char = src[fi];
+      switch (src_char)
+        {
+          /* Pass through ordinary identifier chars. */
+        case '.': case '-': case '_':
+        case 'a'...'z':
+        case 'A'...'Z':
+        case '0'...'9':
+          *dest_write-- = src_char;
+          break;
+          
+          /* Replace everything else, esp. security-sensitive /. */
+        default:
+          *dest_write-- = '#';
+          break;
+        }
+    }
+
+  /* djb2 hash algorithm: DJBX33A */
+  unsigned long hash = 5381;
+  const char *c = src;
+  while (*c)
+    hash = ((hash << 5) + hash) + *c++;
+  char name_hash_str [9];
+  /* Truncate to 4 bytes; along with the remaining hundredish bytes of text,
+     should be ample against accidental collisions. */
+  snprintf (name_hash_str, sizeof(name_hash_str), "%08x", (unsigned int) hash);
+  memcpy (&dest[0], name_hash_str, 8); /* Overwrite the first few characters */
+  dest[8] = '-'; /* Add a bit of punctuation to make hash stand out */
 }
 
 /* Attempt to update the atime */
@@ -1267,7 +1307,6 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path)
     }
 
   int sec_fd = -1;
-  char *escaped_name = NULL;
   char *sec_path_tmp = NULL;
   Elf_Scn *scn = NULL;
 
@@ -1332,16 +1371,10 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path)
              --i;
            }
 
-         escaped_name = malloc (strlen (section) * 2 + 1);
-         if (escaped_name == NULL)
-           {
-             rc = -ENOMEM;
-             goto out;
-           }
-         path_escape (section, escaped_name);
-
+          char suffix[NAME_MAX];
+         path_escape (section, suffix, sizeof(suffix));
          rc = asprintf (&sec_path_tmp, "%s/section-%s.XXXXXX",
-                        fd_path, escaped_name);
+                        fd_path, suffix);
          if (rc == -1)
            {
              rc = -ENOMEM;
@@ -1396,8 +1429,6 @@ out2:
   free (sec_path_tmp);
 
 out1:
-  free (escaped_name);
-
 out:
   elf_end (elf);
   return rc;
@@ -1747,7 +1778,7 @@ debuginfod_query_server_by_buildid (debuginfod_client *c,
   char *target_cache_dir = NULL;
   char *target_cache_path = NULL;
   char *target_cache_tmppath = NULL;
-  char suffix[PATH_MAX + 1]; /* +1 for zero terminator.  */
+  char suffix[NAME_MAX];
   char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1];
   int vfd = c->verbose_fd;
   int rc, r;
@@ -1861,13 +1892,13 @@ debuginfod_query_server_by_buildid (debuginfod_client *c,
          goto out;
        }
 
-      path_escape (filename, suffix);
+      path_escape (filename, suffix, sizeof(suffix));
       /* If the DWARF filenames are super long, this could exceed
          PATH_MAX and truncate/collide.  Oh well, that'll teach
          them! */
     }
   else if (section != NULL)
-    path_escape (section, suffix);
+    path_escape (section, suffix, sizeof(suffix));
   else
     suffix[0] = '\0';
 
@@ -1879,7 +1910,8 @@ debuginfod_query_server_by_buildid (debuginfod_client *c,
      cache_path:        $HOME/.cache
      target_cache_dir:  $HOME/.cache/0123abcd
      target_cache_path: $HOME/.cache/0123abcd/debuginfo
-     target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ?
+     target_cache_path: $HOME/.cache/0123abcd/executable-.debug_info
+     target_cache_path: $HOME/.cache/0123abcd/source-HASH-#PATH#TO#SOURCE
   */
 
   cache_path = make_cache_path();
@@ -1891,10 +1923,10 @@ debuginfod_query_server_by_buildid (debuginfod_client *c,
   xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes);
   (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too
 
-  if (section != NULL)
+  if (suffix[0] != '\0') /* section, source queries */
     xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix);
   else
-    xalloc_str (target_cache_path, "%s/%s%s", target_cache_dir, type, suffix);
+    xalloc_str (target_cache_path, "%s/%s", target_cache_dir, type);
   xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path);
 
   /* XXX combine these */
index 424c184bc200a7a1a5832391dbfe624e2f465735..ffccb0cd0e1fd06838d11b8e2cca31ca7003072e 100644 (file)
@@ -273,7 +273,8 @@ TESTS += run-srcfiles-self.sh \
         run-debuginfod-section.sh \
         run-debuginfod-IXr.sh \
         run-debuginfod-client-profile.sh \
-        run-debuginfod-find-metadata.sh
+        run-debuginfod-find-metadata.sh \
+        run-debuginfod-longsource.sh
 endif
 if !OLD_LIBMICROHTTPD
 # Will crash on too old libmicrohttpd
@@ -607,6 +608,7 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
             run-debuginfod-IXr.sh \
             run-debuginfod-ima-verification.sh \
             run-debuginfod-find-metadata.sh \
+            run-debuginfod-longsource.sh \
             debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \
             debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \
             debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \
@@ -654,6 +656,9 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \
             debuginfod-tars/pacman-sources/PKGBUILD \
             debuginfod-tars/pacman-sources/README.md \
             debuginfod-tars/pacman-sources/hello.c \
+            debuginfod-tars/bighello.tar \
+            debuginfod-tars/bighello-sources/bighello.c \
+            debuginfod-tars/bighello-sources/bighello.h \
             run-pt_gnu_prop-tests.sh \
             testfile_pt_gnu_prop.bz2 testfile_pt_gnu_prop32.bz2 \
             run-getphdrnum.sh testfile-phdrs.elf.bz2 \
diff --git a/tests/debuginfod-tars/bighello-sources/bighello.c b/tests/debuginfod-tars/bighello-sources/bighello.c
new file mode 100644 (file)
index 0000000..e114527
--- /dev/null
@@ -0,0 +1,7 @@
+#include <stdio.h>
+#include "moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/bighello.h"
+
+int main() {
+   printf("%s\n", HELLO);
+   return 0;
+}
diff --git a/tests/debuginfod-tars/bighello-sources/bighello.h b/tests/debuginfod-tars/bighello-sources/bighello.h
new file mode 100644 (file)
index 0000000..e3de04b
--- /dev/null
@@ -0,0 +1 @@
+char *HELLO = "hello";
diff --git a/tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore b/tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore
new file mode 120000 (symlink)
index 0000000..945c9b4
--- /dev/null
@@ -0,0 +1 @@
+.
\ No newline at end of file
diff --git a/tests/debuginfod-tars/bighello.tar b/tests/debuginfod-tars/bighello.tar
new file mode 100644 (file)
index 0000000..6d5d1d6
Binary files /dev/null and b/tests/debuginfod-tars/bighello.tar differ
diff --git a/tests/run-debuginfod-longsource.sh b/tests/run-debuginfod-longsource.sh
new file mode 100755 (executable)
index 0000000..773af1f
--- /dev/null
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2024 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/debuginfod-subr.sh
+
+# for test case debugging
+set -x
+unset VALGRIND_CMD
+
+DB=${PWD}/.debuginfod_tmp.sqlite
+tempfiles $DB
+export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
+
+# Set up directories for scanning
+mkdir Z
+cp -rvp ${abs_srcdir}/debuginfod-tars/bighello.tar Z
+
+# This variable is essential and ensures no time-race for claiming ports occurs
+# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test
+base=14200
+get_ports
+
+# We use -t0 and -g0 here to turn off time-based scanning & grooming.
+# For testing purposes, we just sic SIGUSR1 at the process.
+
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE \
+    -Ztar -p $PORT1 -d $DB -t0 -g0 -v ./Z > vlog$PORT1 2>&1 &
+PID1=$!
+tempfiles vlog$PORT1
+errfiles vlog$PORT1
+# Server must become ready
+wait_ready $PORT1 'ready' 1
+# And initial scan should be done
+wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
+
+kill -USR1 $PID1 # run another index pass to make sure the srcdef/srcref stuff is fully located
+
+# Wait till both files are in the index.
+wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
+
+export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1
+
+########################################################################
+
+# Build-id for a.out in said tarball
+BUILDID=7fc69cb0e8fb9d4b57e594271b9941b67410aaaa
+
+# Download short & long files
+testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv source $BUILDID /tmp/bighello-sources/bighello.c
+testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv source $BUILDID /tmp/bighello-sources/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/bighello.h
+
+exit 0
index 6ac5968872e33404ac4d317c96cd19b589de830d..1746d7ef7d437722ca2b8e078807ec500c104810 100755 (executable)
@@ -95,11 +95,11 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv section $RPM_BUILDID
 # Verify that the downloaded files match the contents of the original sections
 tempfiles ${BUILDID}.debug_info
 objcopy F/prog.debug -O binary --only-section=.debug_info --set-section-flags .debug_info=alloc $BUILDID.debug_info
-cmp ${BUILDID}.debug_info ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.debug_info
+cmp ${BUILDID}.debug_info ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.debug_info
 
 tempfiles ${BUILDID}.text
 objcopy F/prog -O binary --only-section=.text ${BUILDID}.text
-cmp ${BUILDID}.text ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.text
+cmp ${BUILDID}.text ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.text
 
 # Download the original debuginfo/executable files.
 DEBUGFILE=`env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $RPM_BUILDID`
@@ -110,11 +110,11 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv executable $BUILDID
 if test "$(arch)" == "x86_64"; then
   tempfiles DEBUGFILE.debug_info
   objcopy $DEBUGFILE -O binary --only-section=.debug_info --set-section-flags .debug_info=alloc DEBUGFILE.debug_info
-  testrun diff -u DEBUGFILE.debug_info ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.debug_info
+  testrun diff -u DEBUGFILE.debug_info ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.debug_info
 
   tempfiles EXECFILE.text
   objcopy $EXECFILE -O binary --only-section=.text EXECFILE.text
-  testrun diff -u EXECFILE.text ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.text
+  testrun diff -u EXECFILE.text ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.text
 fi
 
 # Kill the server.
@@ -123,10 +123,10 @@ wait $PID1
 PID1=0
 
 # Delete the section files from the cache.
-rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.text
-rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.debug_info
-rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.text
-rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.debug_info
+rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.text
+rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.debug_info
+rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.text
+rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.debug_info
 
 # Verify that the client can extract sections from the debuginfo or executable
 # if they're already in the cache.