From 3ce0d5ed6f878be84ff6a9962885fbe8101b1acb Mon Sep 17 00:00:00 2001 From: Serhei Makarov Date: Fri, 24 Oct 2025 10:26:03 -0400 Subject: [PATCH] libdwfl_stacktrace + libebl: dwflst_sample_getframes non-perf api This patch adds a generic dwflst_sample_getframes() API that does not depend on perf_events concepts, in particular the linux-kernel-specific enum defining the perf_regs_mask register order. This involves reworking the register-handling backend to use regs_mapping arrays rather than perf_regs_mask, and provide a way to translate perf_regs_mask to regs_mapping. A regs_mapping array, for each item in a provided regs[] array, specifies its position in the full register file expected by the DWARF functionality. * libdwfl_stacktrace/Makefile.am: Rename dwflst_sample_frame.c from dwflst_perf_frame.c. * libdwfl_stacktrace/libdwfl_stacktrace.h (dwflst_sample_getframes): New function providing unwinding functionality with a regs_mapping array rather than a linux-kernel-dependent perf_regs_mask. * libdw/libdw.map (ELFUTILS_0.194_EXPERIMENTAL): Add dwflst_sample_getframes. * libdwfl_stacktrace/dwflst_sample_frame.c: Renamed from dwflst_perf_frame.c. Remove linux/perf_event.h dependency. (struct sample_info): Rename from perf_sample_info, include regs_mapping field, replace abi with elfclass field. (sample_next_thread): Renamed struct sample_info. (sample_getthread): Renamed struct sample_info. (copy_word): Use elfclass instead of perf abi field. (elf_memory_read): Renamed struct sample_info, use elfclass. (sample_memory_read): Renamed struct sample_info, use elfclass. (sample_set_initial_registers): Renamed struct sample_info, pass regs_mapping to ebl_set_initial_registers_sample. (dwflst_sample_getframes): New function. (dwflst_perf_sample_getframes): Reimplement in terms of dwflst_sample_getframes and ebl_sample_perf_regs_mapping. * libebl/ebl-hooks.h (set_initial_registers_sample): Now takes regs_mapping instead of regs_mask. (sample_base_addr): Removed. (sample_pc): Removed. (sample_sp_pc): New function combining the removed functions for efficiency. (sample_perf_regs_mapping): New function translating perf_regs_mask to regs_mapping array. * libebl/eblinitreg_sample.c (ebl_sample_base_addr): Removed. (ebl_sample_pc): Removed. (ebl_sample_sp_pc): New function. (ebl_set_initial_registers_sample): Take regs_mapping, provide a default implementation for contiguous dwarf_regs array. (ebl_sample_perf_regs_mapping): New function. * libebl/eblclosebackend.c (ebl_closebackend): Free cached_regs_mapping. * libebl/libebl.h (ebl_set_initial_registers_sample): Now takes regs_mapping instead of regs_mask. (ebl_sample_base_addr): Removed. (ebl_sample_pc): Removed. (ebl_sample_sp_pc): New function. (ebl_sample_perf_regs_mapping): New function. * libebl/libeblP.h (struct ebl): Add caching fields to remove the need to repeat a sample_perf_regs_mapping() computation for every frame when the perf_regs_mask is consistent. * backends/Makefile.am: Remove no-longer-needed linux-perf-regs.c. * backends/i386_init.c (i386_init): Renamed sample_* functions, added cached_regs_mapping and related fields/functions. * backends/i386_initreg_sample.c (i386_sample_base_addr): Removed. (i386_sample_pc): Removed. (i386_sample_sp_pc): New function combining the removed functions. (i386_set_initial_registers_sample): Removed. (i386_sample_perf_regs_mapping): New function translating perf_regs_mask to regs_mapping array. * backends/linux-perf-regs.c: Removed as perf_sample_find_reg is no longer needed. * backends/x86_64_init.c (x86_64_init): Renamed sample_* functions, added cached_regs_mapping and related fields/functions. * backends/x86_64_initreg_sample.c (x86_64_sample_base_addr): Removed. (x86_64_sample_pc): Removed. (x86_64_sample_sp_pc): New function combining the removed functions. (x86_64_set_initial_registers_sample): Removed. (x86_64_sample_perf_regs_mapping): New function translating perf_regs_mask to regs_mapping array. * backends/x86_initreg_sample.c (x86_set_initial_registers_sample): Removed. (x86_sample_sp_pc): New function. (x86_sample_perf_regs_mapping): New function translating perf_regs_mask to regs_mapping array. Signed-off-by: Serhei Makarov --- backends/Makefile.am | 2 +- backends/i386_init.c | 9 +- backends/i386_initreg_sample.c | 72 +++-------- backends/linux-perf-regs.c | 48 ------- backends/x86_64_init.c | 9 +- backends/x86_64_initreg_sample.c | 70 +++-------- backends/x86_initreg_sample.c | 108 ++++++++++++---- libdw/libdw.map | 6 +- libdwfl_stacktrace/Makefile.am | 2 +- ...lst_perf_frame.c => dwflst_sample_frame.c} | 119 +++++++++++------- libdwfl_stacktrace/libdwfl_stacktrace.h | 31 +++-- libebl/ebl-hooks.h | 35 ++++-- libebl/eblclosebackend.c | 4 + libebl/eblinitreg_sample.c | 63 +++++++--- libebl/libebl.h | 50 +++++--- libebl/libeblP.h | 7 ++ 16 files changed, 337 insertions(+), 298 deletions(-) delete mode 100644 backends/linux-perf-regs.c rename libdwfl_stacktrace/{dwflst_perf_frame.c => dwflst_sample_frame.c} (73%) diff --git a/backends/Makefile.am b/backends/Makefile.am index 8ccbdb505..7a820df04 100644 --- a/backends/Makefile.am +++ b/backends/Makefile.am @@ -121,7 +121,7 @@ am_libebl_backends_pic_a_OBJECTS = $(libebl_backends_a_SOURCES:.c=.os) noinst_HEADERS = libebl_CPU.h libebl_PERF_FLAGS.h common-reloc.c \ linux-core-note.c x86_corenote.c \ - linux-perf-regs.c x86_initreg_sample.c + x86_initreg_sample.c EXTRA_DIST = $(modules:=_reloc.def) diff --git a/backends/i386_init.c b/backends/i386_init.c index e64ef6edf..a980e71ac 100644 --- a/backends/i386_init.c +++ b/backends/i386_init.c @@ -60,10 +60,13 @@ i386_init (Elf *elf __attribute__ ((unused)), (Likely an artifact of reusing that header between i386/x86_64.) */ eh->frame_nregs = 9; HOOK (eh, set_initial_registers_tid); - HOOK (eh, set_initial_registers_sample); - HOOK (eh, sample_base_addr); - HOOK (eh, sample_pc); + /* set_initial_registers_sample is default ver */ + HOOK (eh, sample_sp_pc); + HOOK (eh, sample_perf_regs_mapping); eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_I386; + eh->cached_perf_regs_mask = 0; + eh->cached_regs_mapping = NULL; + eh->cached_n_regs_mapping = -1; HOOK (eh, unwind); return eh; diff --git a/backends/i386_initreg_sample.c b/backends/i386_initreg_sample.c index 677393c91..94955191f 100644 --- a/backends/i386_initreg_sample.c +++ b/backends/i386_initreg_sample.c @@ -31,6 +31,7 @@ #endif #include +#include #if (defined __i386__ || defined __x86_64__) && defined(__linux__) # include # include @@ -40,69 +41,26 @@ #include "libebl_CPU.h" #include "libebl_PERF_FLAGS.h" #if (defined __i386__ || defined __x86_64__) && defined(__linux__) -# include "linux-perf-regs.c" # include "x86_initreg_sample.c" #endif -/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h, - enum perf_event_x86_regs: */ -Dwarf_Word -i386_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, - /* XXX hypothetically needed if abi varies - between samples in the same process; - not needed on x86 */ - uint32_t abi __attribute__((unused))) -{ -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - return 0; -#else /* __i386__ || __x86_64__ */ - (void)regs; - (void)n_regs; - (void)regs_mask; - return perf_sample_find_reg (regs, n_regs, regs_mask, - 7 /* index into perf_event_x86_regs */); -#endif -} - -Dwarf_Word -i386_sample_pc (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, - uint32_t abi __attribute__((unused))) +bool +i386_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, uint32_t n_regs_mapping, + Dwarf_Word *sp, Dwarf_Word *pc) { -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - return 0; -#else /* __i386__ || __x86_64__ */ - return perf_sample_find_reg (regs, n_regs, regs_mask, - 8 /* index into perf_event_x86_regs */); -#endif + /* XXX for dwarf_regs indices, compare i386_initreg.c */ + return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping, + sp, 4 /* index of sp in dwarf_regs */, + pc, 8 /* index of pc in dwarf_regs */); } bool -i386_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, - ebl_tid_registers_t *setfunc, - void *arg) +i386_sample_perf_regs_mapping (Ebl *ebl, + uint64_t perf_regs_mask, uint32_t abi, + const int **regs_mapping, + size_t *n_regs_mapping) { -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - (void)abi; - (void)setfunc; - (void)arg; - return false; -#else /* __i386__ || __x86_64__ */ - Dwarf_Word dwarf_regs[9]; - if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask, - abi, dwarf_regs, 9)) - return false; - return setfunc (0, 9, dwarf_regs, arg); -#endif + return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi, + regs_mapping, n_regs_mapping); } diff --git a/backends/linux-perf-regs.c b/backends/linux-perf-regs.c deleted file mode 100644 index 22ad67c6a..000000000 --- a/backends/linux-perf-regs.c +++ /dev/null @@ -1,48 +0,0 @@ -/* Common pieces for handling registers in a linux perf_events sample. - Copyright (C) 2025 Red Hat, Inc. - This file is part of elfutils. - - This file is free software; you can redistribute it and/or modify - it under the terms of either - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at - your option) any later version - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at - your option) any later version - - or both in parallel, as here. - - elfutils is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see . */ - -static Dwarf_Word -perf_sample_find_reg (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, - int target) -{ - int j, k; uint64_t bit; - for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1) - { - if (bit & regs_mask) { - if (n_regs <= (uint32_t) j) - return 0; /* regs_mask count doesn't match n_regs */ - if (k == target) - return regs[j]; - if (k > target) - return 0; /* regs_mask doesn't include desired reg */ - j++; - } - } - return 0; -} diff --git a/backends/x86_64_init.c b/backends/x86_64_init.c index 6a1cbc4b9..5f929758e 100644 --- a/backends/x86_64_init.c +++ b/backends/x86_64_init.c @@ -63,10 +63,13 @@ x86_64_init (Elf *elf __attribute__ ((unused)), /* gcc/config/ #define DWARF_FRAME_REGISTERS. */ eh->frame_nregs = 17; HOOK (eh, set_initial_registers_tid); - HOOK (eh, set_initial_registers_sample); - HOOK (eh, sample_base_addr); - HOOK (eh, sample_pc); + /* set_initial_registers_sample is default ver */ + HOOK (eh, sample_sp_pc); + HOOK (eh, sample_perf_regs_mapping); eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_X86_64; + eh->cached_perf_regs_mask = 0; + eh->cached_regs_mapping = NULL; + eh->cached_n_regs_mapping = -1; HOOK (eh, unwind); HOOK (eh, check_reloc_target_type); diff --git a/backends/x86_64_initreg_sample.c b/backends/x86_64_initreg_sample.c index 48d14bc86..9dd708c96 100644 --- a/backends/x86_64_initreg_sample.c +++ b/backends/x86_64_initreg_sample.c @@ -31,6 +31,7 @@ #endif #include +#include #if defined(__x86_64__) && defined(__linux__) # include # include @@ -40,67 +41,26 @@ #include "libebl_CPU.h" #include "libebl_PERF_FLAGS.h" #if defined(__x86_64__) && defined(__linux__) -# include "linux-perf-regs.c" # include "x86_initreg_sample.c" #endif -/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h, - enum perf_event_x86_regs: */ -Dwarf_Word -x86_64_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, - /* XXX hypothetically needed if abi varies - between samples in the same process; - not needed on x86*/ - uint32_t abi __attribute__((unused))) -{ -#if !defined(__x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - return 0; -#else /* __x86_64__ */ - return perf_sample_find_reg (regs, n_regs, regs_mask, - 7 /* index into perf_event_x86_regs */); -#endif -} - -Dwarf_Word -x86_64_sample_pc (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, - uint32_t abi __attribute__((unused))) +bool +x86_64_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, uint32_t n_regs_mapping, + Dwarf_Word *sp, Dwarf_Word *pc) { -#if !defined(__x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - return 0; -#else /* __x86_64__ */ - return perf_sample_find_reg (regs, n_regs, regs_mask, - 8 /* index into perf_event_x86_regs */); -#endif + /* XXX for dwarf_regs indices, compare x86_64_initreg.c */ + return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping, + sp, 7 /* index of sp in dwarf_regs */, + pc, 16 /* index of pc in dwarf_regs */); } bool -x86_64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, - ebl_tid_registers_t *setfunc, - void *arg) +x86_64_sample_perf_regs_mapping (Ebl *ebl, + uint64_t perf_regs_mask, uint32_t abi, + const int **regs_mapping, + size_t *n_regs_mapping) { -#if !defined(__x86_64__) || !defined(__linux__) - (void)regs; - (void)n_regs; - (void)regs_mask; - (void)abi; - (void)setfunc; - (void)arg; - return false; -#else /* __x86_64__ */ - Dwarf_Word dwarf_regs[17]; - if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask, - abi, dwarf_regs, 9)) - return false; - return setfunc (0, 17, dwarf_regs, arg); -#endif + return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi, + regs_mapping, n_regs_mapping); } - diff --git a/backends/x86_initreg_sample.c b/backends/x86_initreg_sample.c index 8d6b471b0..47cd91c23 100644 --- a/backends/x86_initreg_sample.c +++ b/backends/x86_initreg_sample.c @@ -1,4 +1,4 @@ -/* x86 linux perf_events register handling, pieces common to x86-64 and i386. +/* x86 stack sample register handling, pieces common to x86-64 and i386. Copyright (C) 2025 Red Hat, Inc. This file is part of elfutils. @@ -27,13 +27,52 @@ not, see . */ static bool -x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, - Dwarf_Word *dwarf_regs, int expected_regs) +x86_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, uint32_t n_regs_mapping, + Dwarf_Word *sp, uint sp_index /* into dwarf_regs */, + Dwarf_Word *pc, uint pc_index /* into dwarf_regs */) { -#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__) + if (sp != NULL) *sp = 0; + if (pc != NULL) *pc = 0; +#if !defined(__x86_64__) + (void)regs; + (void)n_regs; + (void)regs_mapping; + (void)n_regs_mapping; return false; -#else /* __i386__ || __x86_64__ */ +#else /* __x86_64__ */ + /* TODO: Register locations could be cached and rechecked on a + fastpath without needing to loop? */ + int j, need_sp = (sp != NULL), need_pc = (pc != NULL); + for (j = 0; (need_sp || need_pc) && n_regs_mapping > (uint32_t)j; j++) + { + if (n_regs < (uint32_t)j) break; + if (need_sp && regs_mapping[j] == (int)sp_index) + { + *sp = regs[j]; need_sp = false; + } + if (need_pc && regs_mapping[j] == (int)pc_index) + { + *pc = regs[j]; need_pc = false; + } + } + return (!need_sp && !need_pc); +#endif +} + +static bool +x86_sample_perf_regs_mapping (Ebl *ebl, + uint64_t perf_regs_mask, uint32_t abi, + const int **regs_mapping, + size_t *n_regs_mapping) +{ + if (perf_regs_mask != 0 && ebl->cached_perf_regs_mask == perf_regs_mask) + { + *regs_mapping = ebl->cached_regs_mapping; + *n_regs_mapping = ebl->cached_n_regs_mapping; + return true; + } + /* The following facts are needed to translate x86 registers correctly: - perf register order seen in linux arch/x86/include/uapi/asm/perf_regs.h The registers array is built in the same order as the enum! @@ -52,39 +91,58 @@ x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs, bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32); /* Locations of dwarf_regs in the perf_event_x86_regs enum order, - not the regs[i] array (which will include a subset of the regs): */ + not the regs[] array (which will include a subset of the regs): */ static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/}; static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/, 16/*r8 after flags+segment*/, 17, 18, 19, 20, 21, 22, 23, 8/*ip*/}; const int *dwarf_to_perf = is_abi32 ? regs_i386 : regs_x86_64; - /* Locations of perf_regs in the regs[] array, according to regs_mask: */ - int perf_to_regs[PERF_REG_X86_64_MAX]; - uint64_t expected_mask = is_abi32 ? PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64; - int j, k; uint64_t bit; - /* TODO: Is it worth caching this perf_to_regs computation as long - as regs_mask is kept the same across repeated calls? */ - for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1) + /* Count bits and allocate regs_mapping: */ + int j, k, kmax, count; uint64_t bit; + for (k = 0, kmax = -1, count = 0, bit = 1; + k < PERF_REG_X86_64_MAX; k++, bit <<= 1) { - if ((bit & expected_mask) && (bit & regs_mask)) { - if (n_regs <= (uint32_t)j) - return false; /* regs_mask count doesn't match n_regs */ - perf_to_regs[k] = j; - j++; - } else { - perf_to_regs[k] = -1; + if ((bit & perf_regs_mask)) { + count++; + kmax = k; } } + ebl->cached_perf_regs_mask = perf_regs_mask; + ebl->cached_regs_mapping = (int *)calloc (count, sizeof(int)); + ebl->cached_n_regs_mapping = count; - for (int i = 0; i < expected_regs; i++) + /* Locations of perf_regs in the regs[] array, according to + perf_regs_mask: */ + int perf_to_regs[PERF_REG_X86_64_MAX]; + uint64_t expected_mask = is_abi32 ? + PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64; + for (j = 0, k = 0, bit = 1; k <= kmax; k++, bit <<= 1) + { + if ((bit & expected_mask) && (bit & perf_regs_mask)) + { + perf_to_regs[k] = j; + j++; + } + else + { + perf_to_regs[k] = -1; + } + } + if (j > (int)ebl->cached_n_regs_mapping) + return false; + + /* Locations of perf_regs in the dwarf_regs array, according to + perf_regs_mask and perf_to_regs[]: */ + for (size_t i = 0; i < ebl->frame_nregs; i++) { k = dwarf_to_perf[i]; j = perf_to_regs[k]; if (j < 0) continue; - if (n_regs <= (uint32_t)j) continue; - dwarf_regs[i] = regs[j]; + ebl->cached_regs_mapping[j] = i; } + + *regs_mapping = ebl->cached_regs_mapping; + *n_regs_mapping = ebl->cached_n_regs_mapping; return true; -#endif /* __i386__ || __x86_64__ */ } diff --git a/libdw/libdw.map b/libdw/libdw.map index 137b57383..b45647e6f 100644 --- a/libdw/libdw.map +++ b/libdw/libdw.map @@ -395,7 +395,6 @@ ELFUTILS_0.193 { /* XXX Experimental libdwfl_stacktrace API. */ ELFUTILS_0.193_EXPERIMENTAL { global: - dwflst_perf_sample_preferred_regs_mask; dwflst_perf_sample_preferred_regs_mask; dwflst_tracker_begin; dwflst_tracker_dwfl_begin; @@ -407,3 +406,8 @@ ELFUTILS_0.193_EXPERIMENTAL { dwflst_tracker_find_pid; dwflst_perf_sample_getframes; }; + +ELFUTILS_0.194_EXPERIMENTAL { + global: + dwflst_sample_getframes; +} ELFUTILS_0.193_EXPERIMENTAL; diff --git a/libdwfl_stacktrace/Makefile.am b/libdwfl_stacktrace/Makefile.am index 99a80b5c0..b92421291 100644 --- a/libdwfl_stacktrace/Makefile.am +++ b/libdwfl_stacktrace/Makefile.am @@ -45,7 +45,7 @@ libdwfl_stacktrace_a_SOURCES = dwflst_process_tracker.c \ dwflst_tracker_elftab.c \ dwflst_tracker_dwfltab.c \ libdwfl_stacktrace_next_prime.c \ - dwflst_perf_frame.c + dwflst_sample_frame.c libdwfl_stacktrace = $(libdw) libdw = ../libdw/libdw.so diff --git a/libdwfl_stacktrace/dwflst_perf_frame.c b/libdwfl_stacktrace/dwflst_sample_frame.c similarity index 73% rename from libdwfl_stacktrace/dwflst_perf_frame.c rename to libdwfl_stacktrace/dwflst_sample_frame.c index 4fc60183d..090d32208 100644 --- a/libdwfl_stacktrace/dwflst_perf_frame.c +++ b/libdwfl_stacktrace/dwflst_sample_frame.c @@ -30,17 +30,6 @@ # include #endif -#if defined(__linux__) -# include -#else -/* XXX required by copy_word() below */ -enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, -}; -#endif - #include "libdwfl_stacktraceP.h" Ebl *default_ebl = NULL; @@ -67,7 +56,7 @@ uint64_t dwflst_perf_sample_preferred_regs_mask (GElf_Half machine) return 0; } -struct perf_sample_info { +struct sample_info { pid_t pid; pid_t tid; Dwarf_Addr base_addr; @@ -75,8 +64,9 @@ struct perf_sample_info { size_t stack_size; const Dwarf_Word *regs; uint n_regs; - uint64_t perf_regs_mask; - uint abi; + const int *regs_mapping; + size_t n_regs_mapping; + int elfclass; Dwarf_Addr pc; }; @@ -88,8 +78,8 @@ static pid_t sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg, void **thread_argp) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)dwfl_arg; + struct sample_info *sample_arg = + (struct sample_info *)dwfl_arg; if (*thread_argp == NULL) { *thread_argp = (void *)0xea7b3375; @@ -104,8 +94,8 @@ static bool sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid, void *dwfl_arg, void **thread_argp) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)dwfl_arg; + struct sample_info *sample_arg = + (struct sample_info *)dwfl_arg; *thread_argp = (void *)sample_arg; if (sample_arg->tid != tid) { @@ -127,10 +117,10 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid, else \ memcpy ((result), (d), sizeof (uint32_t)); -#define copy_word(result, d, abi) \ - if ((abi) == PERF_SAMPLE_REGS_ABI_64) \ +#define copy_word(result, d, elfclass) \ + if ((elfclass) == ELFCLASS64) \ { copy_word_64((result), (d)); } \ - else if ((abi) == PERF_SAMPLE_REGS_ABI_32) \ + else if ((elfclass) == ELFCLASS32) \ { copy_word_32((result), (d)); } \ else \ *(result) = 0; @@ -138,8 +128,8 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid, static bool elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)arg; + struct sample_info *sample_arg = + (struct sample_info *)arg; Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, addr); Dwarf_Addr bias; Elf_Scn *section = INTUSE(dwfl_module_address_section) (mod, &addr, &bias); @@ -153,7 +143,7 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) Elf_Data *data = elf_getdata(section, NULL); if (data && data->d_buf && data->d_size > addr) { uint8_t *d = ((uint8_t *)data->d_buf) + addr; - copy_word(result, d, sample_arg->abi); + copy_word(result, d, sample_arg->elfclass); return true; } __libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE); @@ -163,36 +153,37 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) static bool sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)arg; + struct sample_info *sample_arg = + (struct sample_info *)arg; /* Imitate read_cached_memory() with the stack sample data as the cache. */ if (addr < sample_arg->base_addr || addr - sample_arg->base_addr >= sample_arg->stack_size) return elf_memory_read(dwfl, addr, result, arg); const uint8_t *d = &sample_arg->stack[addr - sample_arg->base_addr]; - copy_word(result, d, sample_arg->abi); + copy_word(result, d, sample_arg->elfclass); return true; } + static bool sample_set_initial_registers (Dwfl_Thread *thread, void *arg) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)arg; + struct sample_info *sample_arg = + (struct sample_info *)arg; INTUSE(dwfl_thread_state_register_pc) (thread, sample_arg->pc); Dwfl_Process *process = thread->process; Ebl *ebl = process->ebl; return ebl_set_initial_registers_sample (ebl, sample_arg->regs, sample_arg->n_regs, - sample_arg->perf_regs_mask, sample_arg->abi, + sample_arg->regs_mapping, sample_arg->n_regs_mapping, __libdwfl_set_initial_registers_thread, thread); } static void sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg) { - struct perf_sample_info *sample_arg = - (struct perf_sample_info *)dwfl_arg; + struct sample_info *sample_arg = + (struct sample_info *)dwfl_arg; free (sample_arg); } @@ -207,18 +198,18 @@ static const Dwfl_Thread_Callbacks sample_thread_callbacks = }; int -dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, - pid_t pid, pid_t tid, - const void *stack, size_t stack_size, - const Dwarf_Word *regs, uint n_regs, - uint64_t perf_regs_mask, uint abi, - int (*callback) (Dwfl_Frame *state, void *arg), - void *arg) +dwflst_sample_getframes (Dwfl *dwfl, Elf *elf, + pid_t pid, pid_t tid, + const void *stack, size_t stack_size, + const Dwarf_Word *regs, uint n_regs, + const int *regs_mapping, size_t n_regs_mapping, + int (*callback) (Dwfl_Frame *state, void *arg), + void *arg) { /* TODO: Lock the dwfl to ensure attach_state does not interfere with other dwfl_perf_sample_getframes calls. */ - struct perf_sample_info *sample_arg; + struct sample_info *sample_arg; bool attached = false; if (dwfl->process != NULL) { @@ -241,21 +232,53 @@ dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, sample_arg->stack_size = stack_size; sample_arg->regs = regs; sample_arg->n_regs = n_regs; - sample_arg->perf_regs_mask = perf_regs_mask; - sample_arg->abi = abi; + sample_arg->regs_mapping = regs_mapping; + sample_arg->n_regs_mapping = n_regs_mapping; if (! attached && ! INTUSE(dwfl_attach_state) (dwfl, elf, pid, &sample_thread_callbacks, sample_arg)) - return -1; + return -1; - /* Now that Dwfl is attached, we can access its Ebl: */ Dwfl_Process *process = dwfl->process; Ebl *ebl = process->ebl; - sample_arg->base_addr = ebl_sample_base_addr(ebl, regs, n_regs, - perf_regs_mask, abi); - sample_arg->pc = ebl_sample_pc(ebl, regs, n_regs, - perf_regs_mask, abi); + sample_arg->elfclass = ebl_get_elfclass(ebl); + ebl_sample_sp_pc(ebl, regs, n_regs, + regs_mapping, n_regs_mapping, + &sample_arg->base_addr, &sample_arg->pc); return INTUSE(dwfl_getthread_frames) (dwfl, tid, callback, arg); } + +int +dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, + pid_t pid, pid_t tid, + const void *stack, size_t stack_size, + const Dwarf_Word *regs, uint32_t n_regs, + uint64_t perf_regs_mask, uint32_t abi, + int (*callback) (Dwfl_Frame *state, void *arg), + void *arg) +{ + /* Select the regs_mapping based on architecture. This will be + cached in ebl to avoid having to recompute the regs_mapping array + when perf_regs_mask is consistent for the entire session: */ + const int *regs_mapping; + size_t n_regs_mapping; + Dwfl_Process *process = dwfl->process; + Ebl *ebl = process->ebl; + /* XXX May want to check if abi matches ebl_get_elfclass(ebl). */ + if (!ebl_sample_perf_regs_mapping(ebl, + perf_regs_mask, abi, + ®s_mapping, &n_regs_mapping)) + { + __libdwfl_seterrno(DWFL_E_LIBEBL_BAD); + return -1; + } + + /* Then we can call dwflst_sample_getframes: */ + return dwflst_sample_getframes (dwfl, elf, pid, tid, + stack, stack_size, + regs, n_regs, + regs_mapping, n_regs_mapping, + callback, arg); +} diff --git a/libdwfl_stacktrace/libdwfl_stacktrace.h b/libdwfl_stacktrace/libdwfl_stacktrace.h index b236ddc4f..84cb69a30 100644 --- a/libdwfl_stacktrace/libdwfl_stacktrace.h +++ b/libdwfl_stacktrace/libdwfl_stacktrace.h @@ -113,14 +113,31 @@ extern int dwflst_tracker_linux_proc_find_elf (Dwfl_Module *mod, void **userdata const char *module_name, Dwarf_Addr base, char **file_name, Elf **); - /* Like dwfl_thread_getframes, but iterates through the frames for a - linux perf_events stack sample rather than a live thread. Calls - dwfl_attach_state on DWFL, with architecture specified by ELF, ELF - must remain valid during Dwfl lifetime. Returns zero if all frames - have been processed by the callback, returns -1 on error, or the - value of the callback when not DWARF_CB_OK. -1 returned on error - will set dwfl_errno (). */ + stack sample rather than a live thread. Register file for the stack + sample is specified by REGS and N_REGS. For each item in REGS, the + REGS_MAPPING array specifies its position in the full register file + expected by the DWARF infrastructure. Calls dwfl_attach_state on + DWFL, with architecture specified by ELF, ELF must remain vaild + during Dwfl lifetime. Returns zero if all frames have been + processed by the callback, returns -1 on error, or the value of the + callback when not DWARF_CB_OK. -1 returned on error will set + dwfl_errno (). */ +int dwflst_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid, + const void *stack, size_t stack_size, + const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, size_t n_regs_mapping, + int (*callback) (Dwfl_Frame *state, void *arg), + void *arg) + __nonnull_attribute__ (1, 5, 7, 9, 11); + +/* Adapts dwflst_sample_getframes to linux perf_events stack sample + and register file data format. Calls dwfl_attach_state on DWFL, + with architecture specified by ELF, ELF must remain valid during + Dwfl lifetime. Returns zero if all frames have been processed by + the callback, returns -1 on error, or the value of the callback + when not DWARF_CB_OK. -1 returned on error will set dwfl_errno + (). */ int dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid, const void *stack, size_t stack_size, const Dwarf_Word *regs, uint32_t n_regs, diff --git a/libebl/ebl-hooks.h b/libebl/ebl-hooks.h index 05474fbc8..29ce96494 100644 --- a/libebl/ebl-hooks.h +++ b/libebl/ebl-hooks.h @@ -158,21 +158,32 @@ bool EBLHOOK(set_initial_registers_tid) (pid_t tid, ebl_tid_registers_t *setfunc, void *arg); -/* Set process data from a perf_events sample and call SETFUNC one or more times. - Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0, otherwise the - backend doesn't support unwinding from perf_events data. */ -bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, +/* Set process data from a register sample and call SETFUNC one or more times. + Method should be present only when a 'default' strategy of populating an + array of DWARF regs and calling SETFUNC once would be inefficient, e.g. + on architectures with sparse/noncontiguous DWARF register files. */ +bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, + uint32_t n_regs, + const int *regs_mapping, + size_t n_regs_mapping, ebl_tid_registers_t *setfunc, void *arg); -/* Extract the stack address from a perf_events register sample. */ -Dwarf_Word EBLHOOK(sample_base_addr) (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi); - -/* Extract the instruction pointer from a perf_events register sample. */ -Dwarf_Word EBLHOOK(sample_pc) (const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi); +/* Extract the stack address and instruction pointer from a register sample. */ +bool EBLHOOK(sample_sp_pc) (const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, + uint32_t n_regs_mapping, + Dwarf_Word *sp, Dwarf_Word *pc); + +/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic + REGS_MAPPING array for use with ebl_set_initial_registers_sample(). + Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0, + otherwise the backend doesn't support unwinding from perf_events + data. */ +bool EBLHOOK(sample_perf_regs_mapping) (Ebl *ebl, + uint64_t perf_regs_mask, uint32_t abi, + const int **regs_mapping, + size_t *n_regs_mapping); /* Convert *REGNO as is in DWARF to a lower range suitable for Dwarf_Frame->REGS indexing. */ diff --git a/libebl/eblclosebackend.c b/libebl/eblclosebackend.c index 7fa068ec0..d80970784 100644 --- a/libebl/eblclosebackend.c +++ b/libebl/eblclosebackend.c @@ -43,6 +43,10 @@ ebl_closebackend (Ebl *ebl) /* Run the destructor. */ ebl->destr (ebl); + /* Free cached_regs_mapping. */ + if (ebl->cached_regs_mapping != NULL) + free (ebl->cached_regs_mapping); + /* Free the resources. */ free (ebl); } diff --git a/libebl/eblinitreg_sample.c b/libebl/eblinitreg_sample.c index 53244d1e7..d5704dfa6 100644 --- a/libebl/eblinitreg_sample.c +++ b/libebl/eblinitreg_sample.c @@ -34,34 +34,59 @@ #include #include -Dwarf_Word -ebl_sample_base_addr (Ebl *ebl, - const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi) -{ - assert (ebl->sample_base_addr != NULL); - return ebl->sample_base_addr (regs, n_regs, regs_mask, abi); -} - -Dwarf_Word -ebl_sample_pc (Ebl *ebl, - const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi) +bool +ebl_sample_sp_pc (Ebl *ebl, + const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, size_t n_regs_mapping, + Dwarf_Word *sp, Dwarf_Word *pc) { - assert (ebl->sample_pc != NULL); - return ebl->sample_pc (regs, n_regs, regs_mask, abi); + assert (ebl->sample_sp_pc != NULL); + return ebl->sample_sp_pc (regs, n_regs, + regs_mapping, n_regs_mapping, + sp, pc); } bool ebl_set_initial_registers_sample (Ebl *ebl, const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, + const int *regs_mapping, size_t n_regs_mapping, ebl_tid_registers_t *setfunc, void *arg) { - /* If set_initial_registers_sample is unsupported then PERF_FRAME_REGS_MASK is zero. */ - assert (ebl->set_initial_registers_sample != NULL); - return ebl->set_initial_registers_sample (regs, n_regs, regs_mask, abi, setfunc, arg); + /* If set_initial_registers_sample is defined for this arch, use it. */ + if (ebl->set_initial_registers_sample != NULL) + return ebl->set_initial_registers_sample (regs, n_regs, + regs_mapping, n_regs_mapping, + setfunc, arg); + + /* If set_initial_registers_sample is unspecified, then it is safe + to use the following generic code to populate a contiguous array + of dwarf_regs: */ + Dwarf_Word dwarf_regs[64]; + assert (ebl->frame_nregs < 64); + size_t i; + for (i = 0; i < ebl->frame_nregs; i++) + dwarf_regs[i] = 0x0; + for (i = 0; i < n_regs; i++) + { + if (i > n_regs_mapping) + break; + if (regs_mapping[i] < 0 || regs_mapping[i] >= (int)ebl->frame_nregs) + continue; + dwarf_regs[regs_mapping[i]] = regs[i]; + } + return setfunc (0, ebl->frame_nregs, dwarf_regs, arg); +} + +bool +ebl_sample_perf_regs_mapping (Ebl *ebl, + uint64_t perf_regs_mask, uint32_t abi, + const int **regs_mapping, size_t *n_regs_mapping) +{ + /* If sample_perf_regs_mapping is unsupported then PERF_FRAME_REGS_MASK is zero. */ + assert (ebl->sample_perf_regs_mapping != NULL); + return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi, + regs_mapping, n_regs_mapping); } uint64_t diff --git a/libebl/libebl.h b/libebl/libebl.h index a64d70e9c..5b0e70001 100644 --- a/libebl/libebl.h +++ b/libebl/libebl.h @@ -340,32 +340,46 @@ extern bool ebl_set_initial_registers_tid (Ebl *ebl, extern size_t ebl_frame_nregs (Ebl *ebl) __nonnull_attribute__ (1); -/* Callback to set process data from a linux perf_events sample. - EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, otherwise the - backend doesn't support unwinding from perf_events sample data. */ +/* Callback to set process data from a register sample. For each item + in REGS, the REGS_MAPPING array specifies its position in the full + register file expected by the DWARF infrastructure. */ extern bool ebl_set_initial_registers_sample (Ebl *ebl, - const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi, + const Dwarf_Word *regs, + uint32_t n_regs, + const int *regs_mapping, + size_t n_regs_mapping, ebl_tid_registers_t *setfunc, void *arg) __nonnull_attribute__ (1, 2, 6); -/* Extract the stack address from a perf_events register sample. */ -Dwarf_Word ebl_sample_base_addr (Ebl *ebl, - const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi) - __nonnull_attribute__ (1, 2); - -/* Extract the instruction pointer from a perf_events register sample. */ -Dwarf_Word ebl_sample_pc (Ebl *ebl, - const Dwarf_Word *regs, uint32_t n_regs, - uint64_t regs_mask, uint32_t abi) - __nonnull_attribute__ (1, 2); - +/* Extract stack address SP and instruction pointer PC from a register + sample. For each item in REGS, the REGS_MAPPING array specifies + its position in the full register file expected by the DWARF + infrastructure. */ +extern bool ebl_sample_sp_pc (Ebl *ebl, + const Dwarf_Word *regs, uint32_t n_regs, + const int *regs_mapping, size_t n_regs_mapping, + Dwarf_Word *sp, Dwarf_Word *pc) + __nonnull_attribute__ (1, 2, 4); + +/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic + REGS_MAPPING array for use with ebl_set_initial_registers_sample(). + EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, + otherwise the backend doesn't support unwinding from perf_events + sample data. The PERF_REGS_MASK and REGS_MAPPING are likely but + not guaranteed to stay constant throughout a profiling session, and + so the result is cached in the Ebl and only recomputed if an + unexpected PERF_REGS_MASK is passed to this function. */ +extern bool ebl_sample_perf_regs_mapping (Ebl *ebl, + uint64_t perf_regs_mask, + uint32_t abi, + const int **regs_mapping, + size_t *n_regs_mapping) + __nonnull_attribute__ (1, 4, 5); /* Preferred sample_regs_user mask to request from linux perf_events to allow unwinding on EBL architecture. Omitting some of these - registers may result in failed or inaccurate unwinding. */ + registers may result in failed or inaccurate unwinding. */ extern uint64_t ebl_perf_frame_regs_mask (Ebl *ebl) __nonnull_attribute__ (1); diff --git a/libebl/libeblP.h b/libebl/libeblP.h index be14cc20c..348da49e5 100644 --- a/libebl/libeblP.h +++ b/libebl/libeblP.h @@ -65,6 +65,13 @@ struct ebl perf_events sample data iff PERF_FRAME_REGS_MASK > 0. */ uint64_t perf_frame_regs_mask; + /* A cached mapping from a specified linux perf_events regs_mask to + the corresponding regs_mapping array, to reduce + ebl_sample_perf_regs_mapping() recomputations. */ + uint64_t cached_perf_regs_mask; + int *cached_regs_mapping; + size_t cached_n_regs_mapping; + /* Offset to apply to the value of the return_address_register, as fetched from a Dwarf CFI. This is used by some backends, where the return_address_register actually contains the call -- 2.47.3