]> git.ipfire.org Git - thirdparty/elfutils.git/commitdiff
DRAFT libdwfl: Elf* caching via dwfl_process_tracker
authorSerhei Makarov <serhei@serhei.io>
Tue, 28 Jan 2025 17:20:36 +0000 (12:20 -0500)
committerSerhei Makarov <serhei@serhei.io>
Tue, 28 Jan 2025 17:30:50 +0000 (12:30 -0500)
(Work in progress, based on my testing it's still missing a couple of
pieces in open_elf. Also need to look into using
lib/dynamicsizehash_concurrent.c rather than the boilerplate imitation
here.)

* libdwfl/libdwfl.h (dwfl_process_tracker_find_elf): New function,
  serves as a cached version of the dwfl_linux_proc_find_elf callback.
* libdwfl/libdwflP.h (DWFFL_ELFTAB_ENT_USED): New macro.
  (dwfltracker_elftab_ent): New struct typedef.
  (struct Dwfl_Process_Tracker): Add fields for table of Elf*.
  (__libdwfl_process_tracker_elftab_find): New function.
  (__libdwfl_process_tracker_elftab_mark_used): New function.
* libdwfl/dwfl_process_tracker.c (dwfl_process_tracker_begin): Init elftab.
  (dwfl_process_tracker_end): Free elftab.
  (__libdwfl_process_tracker_elftab_resize): New function.
  (djb2_hash): New function.
  (__libdwfl_process_tracker_elftab_find): New function.
  (__libdwfl_process_tracker_elftab_mark_used): New function.
* libdwfl/dwfl_process_tracker_find_elf.c: New file.
* libdwfl/Makefile.am (libdwfl_a_SOURCES): Add dwfl_process_tracker_find_elf.c.
* libdw/libdw.map: Add dwfl_process_tracker_find_elf.

libdw/libdw.map
libdwfl/Makefile.am
libdwfl/dwfl_process_tracker.c
libdwfl/dwfl_process_tracker_find_elf.c [new file with mode: 0644]
libdwfl/libdwfl.h
libdwfl/libdwflP.h

index 47bad5f4e9874d119ecdd4bf5ee441e0c700cb27..5f43248ad96d68bd2b242af04e9131fc0b9e9927 100644 (file)
@@ -391,4 +391,5 @@ ELFUTILS_0.193 {
     dwfl_process_tracker_begin;
     dwfl_begin_with_tracker;
     dwfl_process_tracker_end;
+    dwfl_process_tracker_find_elf;
 } ELFUTILS_0.192;
index 7221a3d7214ea871a8f72c7219aab67d4995e9d4..78760e875a61e6ff97db1c3872c6a1917ac28bfb 100644 (file)
@@ -71,7 +71,7 @@ libdwfl_a_SOURCES = dwfl_begin.c dwfl_end.c dwfl_error.c dwfl_version.c \
                    link_map.c core-file.c open.c image-header.c \
                    dwfl_frame.c frame_unwind.c dwfl_frame_pc.c \
                    linux-pid-attach.c linux-core-attach.c dwfl_frame_regs.c \
-                   dwfl_process_tracker.c \
+                   dwfl_process_tracker.c dwfl_process_tracker_find_elf.c \
                    gzip.c debuginfod-client.c
 
 if BZLIB
index a66bcaced615aa5bd0b3827e2621a8888330fc27..c332779d1e9f09117cb979c5bdda6a93bb865570 100644 (file)
@@ -32,6 +32,9 @@
 
 #include "libdwflP.h"
 
+#define HTAB_DEFAULT_SIZE 1021
+extern size_t next_prime (size_t); /* XXX from libeu.a lib/next_prime.c */
+
 Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callbacks)
 {
   Dwfl_Process_Tracker *tracker = calloc (1, sizeof *tracker);
@@ -41,6 +44,11 @@ Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callback
       return tracker;
     }
 
+  /* XXX based on lib/dynamicsizehash.* *_init */
+  tracker->elftab_size = HTAB_DEFAULT_SIZE;
+  tracker->elftab_filled = 0;
+  tracker->elftab = calloc ((tracker->elftab_size + 1), sizeof(tracker->elftab[0]));
+
   tracker->callbacks = callbacks;
   return tracker;
 }
@@ -61,6 +69,114 @@ void dwfl_process_tracker_end (Dwfl_Process_Tracker *tracker)
   if (tracker == NULL)
     return;
 
+  for (unsigned idx = 1; idx < tracker->elftab_size; idx++)
+    {
+      dwfltracker_elftab_ent *t = &tracker->elftab[idx];
+      if (!DWFL_ELFTAB_ENT_USED(t))
+       continue;
+      if (t->fd >= 0)
+       close(t->fd);
+      free(t->module_name);
+      elf_end(t->elf);
+    }
+  free(tracker->elftab);
+
   /* TODO: Call dwfl_end for each Dwfl connected to this tracker. */
   free (tracker);
 }
+
+/* XXX based on lib/dynamicsizehash.* insert_entry_2 */
+bool
+__libdwfl_process_tracker_elftab_resize (Dwfl_Process_Tracker *tracker)
+{
+  ssize_t old_size = tracker->elftab_size;
+  dwfltracker_elftab_ent *oldtab = tracker->elftab;
+  tracker->elftab_size = next_prime (tracker->elftab_size * 2);
+  tracker->elftab = calloc ((tracker->elftab_size + 1), sizeof(tracker->elftab[0]));
+  if (tracker->elftab == NULL)
+    {
+      tracker->elftab_size = old_size;
+      tracker->elftab = oldtab;
+      return false;
+    }
+  tracker->elftab_filled = 0;
+  /* Transfer the old entries to the new table. */
+  for (ssize_t idx = 1; idx <= old_size; ++idx)
+    if (DWFL_ELFTAB_ENT_USED(&oldtab[idx]))
+      {
+       dwfltracker_elftab_ent *ent0 = &oldtab[idx];
+       dwfltracker_elftab_ent *ent1 = __libdwfl_process_tracker_elftab_find(tracker, ent0->module_name, false/* should_resize */);
+       assert (ent1 != NULL);
+       memcpy (ent1, ent0, sizeof(dwfltracker_elftab_ent));
+      }
+  free(oldtab);
+  return true;
+}
+
+/* TODO: Hashing is tentative, consider direct use of lib/dynamicsizehash_concurrent.c for this. */
+ssize_t
+djb2_hash (const char *str)
+{
+  unsigned long hash = 5381;
+  int c;
+
+  while ((c = *str++))
+    hash = ((hash << 5) + hash) ^ c; /* hash * 33 XOR c */
+
+  ssize_t shash = (ssize_t)hash;
+  if (shash < 0) shash = -shash;
+  return shash;
+}
+
+/* XXX based on lib/dynamicsizehash.* *_find */
+dwfltracker_elftab_ent *
+__libdwfl_process_tracker_elftab_find (Dwfl_Process_Tracker *tracker,
+                                      const char *module_name,
+                                      bool should_resize)
+{
+  dwfltracker_elftab_ent *htab = tracker->elftab;
+  ssize_t hval = djb2_hash(module_name);
+  ssize_t idx = 1 + (hval < tracker->elftab_size ? hval : hval % tracker->elftab_size);
+
+  if (!DWFL_ELFTAB_ENT_USED(&htab[idx]))
+    goto found;
+  if (strcmp(htab[idx].module_name, module_name) == 0)
+    goto found;
+
+  int64_t hash = 1 + hval % (tracker->elftab_size - 2);
+  do
+    {
+      if (idx <= hash)
+       idx = tracker->elftab_size + idx - hash;
+      else
+       idx -= hash;
+
+      if (!DWFL_ELFTAB_ENT_USED(&htab[idx]))
+       goto found;
+      if (strcmp(htab[idx].module_name, module_name) == 0)
+       goto found;
+    }
+  while (true);
+
+ found:
+  if (!DWFL_ELFTAB_ENT_USED(&htab[idx]))
+    {
+      if (100 * tracker->elftab_filled > 90 * tracker->elftab_size)
+       {
+         if (!should_resize || !__libdwfl_process_tracker_elftab_resize (tracker))
+           return NULL;
+       }
+      /* XXX Caller is responsible for setting module_name,
+        calling __libdwfl_process_tracker_elftab_mark_used;
+         not guaranteed that caller will want to do this. */
+    }
+  return &htab[idx];
+}
+
+void
+__libdwfl_process_tracker_elftab_mark_used (Dwfl_Process_Tracker *tracker,
+                                           const dwfltracker_elftab_ent *ent)
+{
+  assert(DWFL_ELFTAB_ENT_USED(ent));
+  tracker->elftab_filled ++;
+}
diff --git a/libdwfl/dwfl_process_tracker_find_elf.c b/libdwfl/dwfl_process_tracker_find_elf.c
new file mode 100644 (file)
index 0000000..d187107
--- /dev/null
@@ -0,0 +1,98 @@
+/* Find Elf file from dwfl_linux_proc_report, cached via Dwfl_Process_Tracker.
+   Copyright (C) 2025, Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/stat.h>
+
+#include "libdwflP.h"
+
+int
+dwfl_process_tracker_find_elf (Dwfl_Module *mod,
+                         void **userdata __attribute__ ((unused)),
+                         const char *module_name, Dwarf_Addr base,
+                         char **file_name, Elf **elfp)
+{
+  /* TODO(WIP): Do we need to handle if elfp is already set?? */
+  assert (*elfp == NULL);
+  Dwfl_Process_Tracker *tracker = mod->dwfl->tracker;
+  dwfltracker_elftab_ent *ent = NULL;
+  int rc;
+  struct stat sb;
+
+  if (tracker != NULL)
+    {
+      ent = __libdwfl_process_tracker_elftab_find(tracker, module_name,
+                                                 true/*should_resize*/);
+      /* TODO: Also reopen the file when module_name set but fd not set? */
+      if (DWFL_ELFTAB_ENT_USED(ent))
+       {
+         rc = fstat(ent->fd, &sb);
+         if (rc < 0 || ent->dev != sb.st_dev || ent->ino != sb.st_ino
+             || ent->last_mtime != sb.st_mtime)
+             ent = NULL; /* file modified, fall back to uncached behaviour */
+         else
+           {
+             *elfp = ent->elf;
+             *file_name = strdup(ent->module_name);
+             return ent->fd;
+           }
+       }
+      else if (ent->module_name == NULL)
+       {
+         /* TODO: For multithreaded access, we mark used here rather
+            than after the dwfl_linux_proc_find_elf() call.  Need to
+            add appropriate locking.  */
+         ent->module_name = strdup(module_name);
+         __libdwfl_process_tracker_elftab_mark_used(tracker, ent);
+       }
+    }
+
+  int fd = INTUSE(dwfl_linux_proc_find_elf) (mod, userdata, module_name,
+                                            base, file_name, elfp);
+
+  /* XXX fd < 0 implies elf_from_remote_memory, uses base, not cacheable */
+  if (tracker != NULL && ent != NULL && fd >= 0 && *file_name != NULL)
+    {
+      /* TODO(WIP): *elfp may be NULL here, need to be populated later. */
+      ent->elf = *elfp;
+      ent->fd = fd;
+      rc = fstat(fd, &sb);
+      if (rc == 0) /* TODO: report error otherwise */
+       {
+         ent->dev = sb.st_dev;
+         ent->ino = sb.st_ino;
+         ent->last_mtime = sb.st_mtime;
+       }
+    }
+
+  return fd;
+}
index 4ba68cc289d252852c5659a983f41be13aa64e04..c27da8876a71bab724a2ed05fab4f979d8b37806 100644 (file)
@@ -409,6 +409,13 @@ extern int dwfl_linux_proc_find_elf (Dwfl_Module *mod, void **userdata,
                                     const char *module_name, Dwarf_Addr base,
                                     char **file_name, Elf **);
 
+/* The same callback, except this first attempts to look up a cached
+   Elf* and fd from the Dwfl_Module's Dwfl_Process_Tracker (if any).
+   If a new Elf* has to be created, this saves it to the cache.  */
+extern int dwfl_process_tracker_find_elf (Dwfl_Module *mod, void **userdata,
+                                    const char *module_name, Dwarf_Addr base,
+                                    char **file_name, Elf **);
+
 /* Standard argument parsing for using a standard callback set.  */
 struct argp;
 extern const struct argp *dwfl_standard_argp (void) __const_attribute__;
index 9bcfea6c77c493fb03e908412dd261bbb142b2c2..f40f77203e0c29a896478e04e9938ded14a432cc 100644 (file)
@@ -101,12 +101,43 @@ typedef enum { DWFL_ERRORS DWFL_E_NUM } Dwfl_Error;
 extern int __libdwfl_canon_error (Dwfl_Error) internal_function;
 extern void __libdwfl_seterrno (Dwfl_Error) internal_function;
 
+#define DWFL_ELFTAB_ENT_USED(e) ((e)->module_name != NULL)
+typedef struct
+{
+  char *module_name; /* dwfltracker_elftab_ent is used iff non-NULL.  */
+  int fd;
+  Elf *elf;
+  dev_t dev;
+  ino_t ino;
+  time_t last_mtime;
+} dwfltracker_elftab_ent;
+
 struct Dwfl_Process_Tracker
 {
   const Dwfl_Callbacks *callbacks;
-  /* ... */
+
+  /* Table of cached Elf * including fd, path, fstat info.  */
+  ssize_t elftab_size;
+  ssize_t elftab_filled;
+  dwfltracker_elftab_ent *elftab;
 };
 
+/* Find the location for an existing or new MODULE_NAME and return a
+   dwfl_tracker_elftab_ent * for it.  If MODULE_NAME is not found
+   and SHOULD_RESIZE is set, expand the table as necessary to make
+   room for the new entry. Otherwise, return NULL if MODULE_NAME is
+   not found.  */
+dwfltracker_elftab_ent *
+__libdwfl_process_tracker_elftab_find (Dwfl_Process_Tracker *tracker,
+                                      const char *module_name,
+                                      bool should_resize);
+
+/* After populating a dwfltracker_elftab_ent with data, update the
+   elftab_filled stats to properly mark the entry as used.  */
+void
+__libdwfl_process_tracker_elftab_mark_used (Dwfl_Process_Tracker *tracker,
+                                           const dwfltracker_elftab_ent *ent);
+
 /* Resources we might keep for the user about the core file that the
    Dwfl might have been created from.  Can currently only be set
    through std-argp.  */