From e38ce78135d152a9b21c886b452dce9522b665b2 Mon Sep 17 00:00:00 2001 From: Serhei Makarov Date: Tue, 28 Jan 2025 12:20:36 -0500 Subject: [PATCH] DRAFT libdwfl: Elf* caching via dwfl_process_tracker (Work in progress, based on my testing it's still missing a couple of pieces in open_elf. Also need to look into using lib/dynamicsizehash_concurrent.c rather than the boilerplate imitation here.) * libdwfl/libdwfl.h (dwfl_process_tracker_find_elf): New function, serves as a cached version of the dwfl_linux_proc_find_elf callback. * libdwfl/libdwflP.h (DWFFL_ELFTAB_ENT_USED): New macro. (dwfltracker_elftab_ent): New struct typedef. (struct Dwfl_Process_Tracker): Add fields for table of Elf*. (__libdwfl_process_tracker_elftab_find): New function. (__libdwfl_process_tracker_elftab_mark_used): New function. * libdwfl/dwfl_process_tracker.c (dwfl_process_tracker_begin): Init elftab. (dwfl_process_tracker_end): Free elftab. (__libdwfl_process_tracker_elftab_resize): New function. (djb2_hash): New function. (__libdwfl_process_tracker_elftab_find): New function. (__libdwfl_process_tracker_elftab_mark_used): New function. * libdwfl/dwfl_process_tracker_find_elf.c: New file. * libdwfl/Makefile.am (libdwfl_a_SOURCES): Add dwfl_process_tracker_find_elf.c. * libdw/libdw.map: Add dwfl_process_tracker_find_elf. --- libdw/libdw.map | 1 + libdwfl/Makefile.am | 2 +- libdwfl/dwfl_process_tracker.c | 116 ++++++++++++++++++++++++ libdwfl/dwfl_process_tracker_find_elf.c | 98 ++++++++++++++++++++ libdwfl/libdwfl.h | 7 ++ libdwfl/libdwflP.h | 33 ++++++- 6 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 libdwfl/dwfl_process_tracker_find_elf.c diff --git a/libdw/libdw.map b/libdw/libdw.map index 47bad5f4..5f43248a 100644 --- a/libdw/libdw.map +++ b/libdw/libdw.map @@ -391,4 +391,5 @@ ELFUTILS_0.193 { dwfl_process_tracker_begin; dwfl_begin_with_tracker; dwfl_process_tracker_end; + dwfl_process_tracker_find_elf; } ELFUTILS_0.192; diff --git a/libdwfl/Makefile.am b/libdwfl/Makefile.am index 7221a3d7..78760e87 100644 --- a/libdwfl/Makefile.am +++ b/libdwfl/Makefile.am @@ -71,7 +71,7 @@ libdwfl_a_SOURCES = dwfl_begin.c dwfl_end.c dwfl_error.c dwfl_version.c \ link_map.c core-file.c open.c image-header.c \ dwfl_frame.c frame_unwind.c dwfl_frame_pc.c \ linux-pid-attach.c linux-core-attach.c dwfl_frame_regs.c \ - dwfl_process_tracker.c \ + dwfl_process_tracker.c dwfl_process_tracker_find_elf.c \ gzip.c debuginfod-client.c if BZLIB diff --git a/libdwfl/dwfl_process_tracker.c b/libdwfl/dwfl_process_tracker.c index a66bcace..c332779d 100644 --- a/libdwfl/dwfl_process_tracker.c +++ b/libdwfl/dwfl_process_tracker.c @@ -32,6 +32,9 @@ #include "libdwflP.h" +#define HTAB_DEFAULT_SIZE 1021 +extern size_t next_prime (size_t); /* XXX from libeu.a lib/next_prime.c */ + Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callbacks) { Dwfl_Process_Tracker *tracker = calloc (1, sizeof *tracker); @@ -41,6 +44,11 @@ Dwfl_Process_Tracker *dwfl_process_tracker_begin (const Dwfl_Callbacks *callback return tracker; } + /* XXX based on lib/dynamicsizehash.* *_init */ + tracker->elftab_size = HTAB_DEFAULT_SIZE; + tracker->elftab_filled = 0; + tracker->elftab = calloc ((tracker->elftab_size + 1), sizeof(tracker->elftab[0])); + tracker->callbacks = callbacks; return tracker; } @@ -61,6 +69,114 @@ void dwfl_process_tracker_end (Dwfl_Process_Tracker *tracker) if (tracker == NULL) return; + for (unsigned idx = 1; idx < tracker->elftab_size; idx++) + { + dwfltracker_elftab_ent *t = &tracker->elftab[idx]; + if (!DWFL_ELFTAB_ENT_USED(t)) + continue; + if (t->fd >= 0) + close(t->fd); + free(t->module_name); + elf_end(t->elf); + } + free(tracker->elftab); + /* TODO: Call dwfl_end for each Dwfl connected to this tracker. */ free (tracker); } + +/* XXX based on lib/dynamicsizehash.* insert_entry_2 */ +bool +__libdwfl_process_tracker_elftab_resize (Dwfl_Process_Tracker *tracker) +{ + ssize_t old_size = tracker->elftab_size; + dwfltracker_elftab_ent *oldtab = tracker->elftab; + tracker->elftab_size = next_prime (tracker->elftab_size * 2); + tracker->elftab = calloc ((tracker->elftab_size + 1), sizeof(tracker->elftab[0])); + if (tracker->elftab == NULL) + { + tracker->elftab_size = old_size; + tracker->elftab = oldtab; + return false; + } + tracker->elftab_filled = 0; + /* Transfer the old entries to the new table. */ + for (ssize_t idx = 1; idx <= old_size; ++idx) + if (DWFL_ELFTAB_ENT_USED(&oldtab[idx])) + { + dwfltracker_elftab_ent *ent0 = &oldtab[idx]; + dwfltracker_elftab_ent *ent1 = __libdwfl_process_tracker_elftab_find(tracker, ent0->module_name, false/* should_resize */); + assert (ent1 != NULL); + memcpy (ent1, ent0, sizeof(dwfltracker_elftab_ent)); + } + free(oldtab); + return true; +} + +/* TODO: Hashing is tentative, consider direct use of lib/dynamicsizehash_concurrent.c for this. */ +ssize_t +djb2_hash (const char *str) +{ + unsigned long hash = 5381; + int c; + + while ((c = *str++)) + hash = ((hash << 5) + hash) ^ c; /* hash * 33 XOR c */ + + ssize_t shash = (ssize_t)hash; + if (shash < 0) shash = -shash; + return shash; +} + +/* XXX based on lib/dynamicsizehash.* *_find */ +dwfltracker_elftab_ent * +__libdwfl_process_tracker_elftab_find (Dwfl_Process_Tracker *tracker, + const char *module_name, + bool should_resize) +{ + dwfltracker_elftab_ent *htab = tracker->elftab; + ssize_t hval = djb2_hash(module_name); + ssize_t idx = 1 + (hval < tracker->elftab_size ? hval : hval % tracker->elftab_size); + + if (!DWFL_ELFTAB_ENT_USED(&htab[idx])) + goto found; + if (strcmp(htab[idx].module_name, module_name) == 0) + goto found; + + int64_t hash = 1 + hval % (tracker->elftab_size - 2); + do + { + if (idx <= hash) + idx = tracker->elftab_size + idx - hash; + else + idx -= hash; + + if (!DWFL_ELFTAB_ENT_USED(&htab[idx])) + goto found; + if (strcmp(htab[idx].module_name, module_name) == 0) + goto found; + } + while (true); + + found: + if (!DWFL_ELFTAB_ENT_USED(&htab[idx])) + { + if (100 * tracker->elftab_filled > 90 * tracker->elftab_size) + { + if (!should_resize || !__libdwfl_process_tracker_elftab_resize (tracker)) + return NULL; + } + /* XXX Caller is responsible for setting module_name, + calling __libdwfl_process_tracker_elftab_mark_used; + not guaranteed that caller will want to do this. */ + } + return &htab[idx]; +} + +void +__libdwfl_process_tracker_elftab_mark_used (Dwfl_Process_Tracker *tracker, + const dwfltracker_elftab_ent *ent) +{ + assert(DWFL_ELFTAB_ENT_USED(ent)); + tracker->elftab_filled ++; +} diff --git a/libdwfl/dwfl_process_tracker_find_elf.c b/libdwfl/dwfl_process_tracker_find_elf.c new file mode 100644 index 00000000..d1871071 --- /dev/null +++ b/libdwfl/dwfl_process_tracker_find_elf.c @@ -0,0 +1,98 @@ +/* Find Elf file from dwfl_linux_proc_report, cached via Dwfl_Process_Tracker. + Copyright (C) 2025, Red Hat, Inc. + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see . */ + + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#include "libdwflP.h" + +int +dwfl_process_tracker_find_elf (Dwfl_Module *mod, + void **userdata __attribute__ ((unused)), + const char *module_name, Dwarf_Addr base, + char **file_name, Elf **elfp) +{ + /* TODO(WIP): Do we need to handle if elfp is already set?? */ + assert (*elfp == NULL); + Dwfl_Process_Tracker *tracker = mod->dwfl->tracker; + dwfltracker_elftab_ent *ent = NULL; + int rc; + struct stat sb; + + if (tracker != NULL) + { + ent = __libdwfl_process_tracker_elftab_find(tracker, module_name, + true/*should_resize*/); + /* TODO: Also reopen the file when module_name set but fd not set? */ + if (DWFL_ELFTAB_ENT_USED(ent)) + { + rc = fstat(ent->fd, &sb); + if (rc < 0 || ent->dev != sb.st_dev || ent->ino != sb.st_ino + || ent->last_mtime != sb.st_mtime) + ent = NULL; /* file modified, fall back to uncached behaviour */ + else + { + *elfp = ent->elf; + *file_name = strdup(ent->module_name); + return ent->fd; + } + } + else if (ent->module_name == NULL) + { + /* TODO: For multithreaded access, we mark used here rather + than after the dwfl_linux_proc_find_elf() call. Need to + add appropriate locking. */ + ent->module_name = strdup(module_name); + __libdwfl_process_tracker_elftab_mark_used(tracker, ent); + } + } + + int fd = INTUSE(dwfl_linux_proc_find_elf) (mod, userdata, module_name, + base, file_name, elfp); + + /* XXX fd < 0 implies elf_from_remote_memory, uses base, not cacheable */ + if (tracker != NULL && ent != NULL && fd >= 0 && *file_name != NULL) + { + /* TODO(WIP): *elfp may be NULL here, need to be populated later. */ + ent->elf = *elfp; + ent->fd = fd; + rc = fstat(fd, &sb); + if (rc == 0) /* TODO: report error otherwise */ + { + ent->dev = sb.st_dev; + ent->ino = sb.st_ino; + ent->last_mtime = sb.st_mtime; + } + } + + return fd; +} diff --git a/libdwfl/libdwfl.h b/libdwfl/libdwfl.h index 4ba68cc2..c27da887 100644 --- a/libdwfl/libdwfl.h +++ b/libdwfl/libdwfl.h @@ -409,6 +409,13 @@ extern int dwfl_linux_proc_find_elf (Dwfl_Module *mod, void **userdata, const char *module_name, Dwarf_Addr base, char **file_name, Elf **); +/* The same callback, except this first attempts to look up a cached + Elf* and fd from the Dwfl_Module's Dwfl_Process_Tracker (if any). + If a new Elf* has to be created, this saves it to the cache. */ +extern int dwfl_process_tracker_find_elf (Dwfl_Module *mod, void **userdata, + const char *module_name, Dwarf_Addr base, + char **file_name, Elf **); + /* Standard argument parsing for using a standard callback set. */ struct argp; extern const struct argp *dwfl_standard_argp (void) __const_attribute__; diff --git a/libdwfl/libdwflP.h b/libdwfl/libdwflP.h index 9bcfea6c..f40f7720 100644 --- a/libdwfl/libdwflP.h +++ b/libdwfl/libdwflP.h @@ -101,12 +101,43 @@ typedef enum { DWFL_ERRORS DWFL_E_NUM } Dwfl_Error; extern int __libdwfl_canon_error (Dwfl_Error) internal_function; extern void __libdwfl_seterrno (Dwfl_Error) internal_function; +#define DWFL_ELFTAB_ENT_USED(e) ((e)->module_name != NULL) +typedef struct +{ + char *module_name; /* dwfltracker_elftab_ent is used iff non-NULL. */ + int fd; + Elf *elf; + dev_t dev; + ino_t ino; + time_t last_mtime; +} dwfltracker_elftab_ent; + struct Dwfl_Process_Tracker { const Dwfl_Callbacks *callbacks; - /* ... */ + + /* Table of cached Elf * including fd, path, fstat info. */ + ssize_t elftab_size; + ssize_t elftab_filled; + dwfltracker_elftab_ent *elftab; }; +/* Find the location for an existing or new MODULE_NAME and return a + dwfl_tracker_elftab_ent * for it. If MODULE_NAME is not found + and SHOULD_RESIZE is set, expand the table as necessary to make + room for the new entry. Otherwise, return NULL if MODULE_NAME is + not found. */ +dwfltracker_elftab_ent * +__libdwfl_process_tracker_elftab_find (Dwfl_Process_Tracker *tracker, + const char *module_name, + bool should_resize); + +/* After populating a dwfltracker_elftab_ent with data, update the + elftab_filled stats to properly mark the entry as used. */ +void +__libdwfl_process_tracker_elftab_mark_used (Dwfl_Process_Tracker *tracker, + const dwfltracker_elftab_ent *ent); + /* Resources we might keep for the user about the core file that the Dwfl might have been created from. Can currently only be set through std-argp. */ -- 2.39.5