]> git.ipfire.org Git - thirdparty/elfutils.git/commitdiff
libdwfl_stacktrace + libebl: dwflst_sample_getframes non-perf api
authorSerhei Makarov <serhei@serhei.io>
Fri, 24 Oct 2025 14:26:03 +0000 (10:26 -0400)
committerAaron Merey <amerey@redhat.com>
Fri, 24 Oct 2025 15:05:01 +0000 (11:05 -0400)
This patch adds a generic dwflst_sample_getframes() API that does not
depend on perf_events concepts, in particular the
linux-kernel-specific enum defining the perf_regs_mask register order.
This involves reworking the register-handling backend to use
regs_mapping arrays rather than perf_regs_mask, and provide a way to
translate perf_regs_mask to regs_mapping.

A regs_mapping array, for each item in a provided regs[] array,
specifies its position in the full register file expected by the DWARF
functionality.

* libdwfl_stacktrace/Makefile.am: Rename dwflst_sample_frame.c from
  dwflst_perf_frame.c.
* libdwfl_stacktrace/libdwfl_stacktrace.h (dwflst_sample_getframes):
  New function providing unwinding functionality with a regs_mapping
  array rather than a linux-kernel-dependent perf_regs_mask.
* libdw/libdw.map (ELFUTILS_0.194_EXPERIMENTAL): Add dwflst_sample_getframes.
* libdwfl_stacktrace/dwflst_sample_frame.c: Renamed from
  dwflst_perf_frame.c. Remove linux/perf_event.h dependency.
  (struct sample_info): Rename from perf_sample_info, include
  regs_mapping field, replace abi with elfclass field.
  (sample_next_thread): Renamed struct sample_info.
  (sample_getthread): Renamed struct sample_info.
  (copy_word): Use elfclass instead of perf abi field.
  (elf_memory_read): Renamed struct sample_info, use elfclass.
  (sample_memory_read): Renamed struct sample_info, use elfclass.
  (sample_set_initial_registers): Renamed struct sample_info,
  pass regs_mapping to ebl_set_initial_registers_sample.
  (dwflst_sample_getframes): New function.
  (dwflst_perf_sample_getframes): Reimplement in terms of
  dwflst_sample_getframes and ebl_sample_perf_regs_mapping.
* libebl/ebl-hooks.h (set_initial_registers_sample): Now
  takes regs_mapping instead of regs_mask.
  (sample_base_addr): Removed.
  (sample_pc): Removed.
  (sample_sp_pc): New function combining the removed functions for
  efficiency.
  (sample_perf_regs_mapping): New function translating
  perf_regs_mask to regs_mapping array.
* libebl/eblinitreg_sample.c (ebl_sample_base_addr): Removed.
  (ebl_sample_pc): Removed.
  (ebl_sample_sp_pc): New function.
  (ebl_set_initial_registers_sample): Take regs_mapping, provide
  a default implementation for contiguous dwarf_regs array.
  (ebl_sample_perf_regs_mapping): New function.
* libebl/eblclosebackend.c (ebl_closebackend):
  Free cached_regs_mapping.
* libebl/libebl.h (ebl_set_initial_registers_sample): Now takes
  regs_mapping instead of regs_mask.
  (ebl_sample_base_addr): Removed.
  (ebl_sample_pc): Removed.
  (ebl_sample_sp_pc): New function.
  (ebl_sample_perf_regs_mapping): New function.
* libebl/libeblP.h (struct ebl): Add caching fields to remove the
  need to repeat a sample_perf_regs_mapping() computation for
  every frame when the perf_regs_mask is consistent.
* backends/Makefile.am: Remove no-longer-needed linux-perf-regs.c.
* backends/i386_init.c (i386_init): Renamed sample_* functions,
  added cached_regs_mapping and related fields/functions.
* backends/i386_initreg_sample.c (i386_sample_base_addr): Removed.
  (i386_sample_pc): Removed.
  (i386_sample_sp_pc): New function combining the removed functions.
  (i386_set_initial_registers_sample): Removed.
  (i386_sample_perf_regs_mapping): New function translating
  perf_regs_mask to regs_mapping array.
* backends/linux-perf-regs.c: Removed as perf_sample_find_reg is no
  longer needed.
* backends/x86_64_init.c (x86_64_init): Renamed sample_* functions,
  added cached_regs_mapping and related fields/functions.
* backends/x86_64_initreg_sample.c (x86_64_sample_base_addr): Removed.
  (x86_64_sample_pc): Removed.
  (x86_64_sample_sp_pc): New function combining the removed functions.
  (x86_64_set_initial_registers_sample): Removed.
  (x86_64_sample_perf_regs_mapping): New function translating
  perf_regs_mask to regs_mapping array.
* backends/x86_initreg_sample.c (x86_set_initial_registers_sample):
  Removed.
  (x86_sample_sp_pc): New function.
  (x86_sample_perf_regs_mapping): New function translating
  perf_regs_mask to regs_mapping array.

Signed-off-by: Serhei Makarov <serhei@serhei.io>
16 files changed:
backends/Makefile.am
backends/i386_init.c
backends/i386_initreg_sample.c
backends/linux-perf-regs.c [deleted file]
backends/x86_64_init.c
backends/x86_64_initreg_sample.c
backends/x86_initreg_sample.c
libdw/libdw.map
libdwfl_stacktrace/Makefile.am
libdwfl_stacktrace/dwflst_sample_frame.c [moved from libdwfl_stacktrace/dwflst_perf_frame.c with 73% similarity]
libdwfl_stacktrace/libdwfl_stacktrace.h
libebl/ebl-hooks.h
libebl/eblclosebackend.c
libebl/eblinitreg_sample.c
libebl/libebl.h
libebl/libeblP.h

index 8ccbdb5053764f2bc11f0d6532d88cb089079473..7a820df0482766802f4dd7e851ea4d67d7a26869 100644 (file)
@@ -121,7 +121,7 @@ am_libebl_backends_pic_a_OBJECTS = $(libebl_backends_a_SOURCES:.c=.os)
 
 noinst_HEADERS = libebl_CPU.h libebl_PERF_FLAGS.h common-reloc.c \
            linux-core-note.c x86_corenote.c \
-           linux-perf-regs.c x86_initreg_sample.c
+           x86_initreg_sample.c
 
 EXTRA_DIST = $(modules:=_reloc.def)
 
index e64ef6edfc341d66659fd5c528aee319d0d28c5c..a980e71ac5949f2bf903c6631b43a3b7c2647e1f 100644 (file)
@@ -60,10 +60,13 @@ i386_init (Elf *elf __attribute__ ((unused)),
      (Likely an artifact of reusing that header between i386/x86_64.)  */
   eh->frame_nregs = 9;
   HOOK (eh, set_initial_registers_tid);
-  HOOK (eh, set_initial_registers_sample);
-  HOOK (eh, sample_base_addr);
-  HOOK (eh, sample_pc);
+  /* set_initial_registers_sample is default ver */
+  HOOK (eh, sample_sp_pc);
+  HOOK (eh, sample_perf_regs_mapping);
   eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_I386;
+  eh->cached_perf_regs_mask = 0;
+  eh->cached_regs_mapping = NULL;
+  eh->cached_n_regs_mapping = -1;
   HOOK (eh, unwind);
 
   return eh;
index 677393c915d6f37b1181d3be689d2c30d45d03c5..94955191fd042338677a8935ec7a6846e46cdc02 100644 (file)
@@ -31,6 +31,7 @@
 #endif
 
 #include <stdlib.h>
+#include <assert.h>
 #if (defined __i386__ || defined __x86_64__) && defined(__linux__)
 # include <linux/perf_event.h>
 # include <asm/perf_regs.h>
 #include "libebl_CPU.h"
 #include "libebl_PERF_FLAGS.h"
 #if (defined __i386__ || defined __x86_64__) && defined(__linux__)
-# include "linux-perf-regs.c"
 # include "x86_initreg_sample.c"
 #endif
 
-/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
-   enum perf_event_x86_regs: */
-Dwarf_Word
-i386_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
-                      uint64_t regs_mask,
-                      /* XXX hypothetically needed if abi varies
-                         between samples in the same process;
-                         not needed on x86 */
-                      uint32_t abi __attribute__((unused)))
-{
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  return 0;
-#else /* __i386__ || __x86_64__ */
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  return perf_sample_find_reg (regs, n_regs, regs_mask,
-                              7 /* index into perf_event_x86_regs */);
-#endif
-}
-
-Dwarf_Word
-i386_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
-               uint64_t regs_mask,
-               uint32_t abi __attribute__((unused)))
+bool
+i386_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+                   const int *regs_mapping, uint32_t n_regs_mapping,
+                   Dwarf_Word *sp, Dwarf_Word *pc)
 {
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  return 0;
-#else /* __i386__ || __x86_64__ */
-  return perf_sample_find_reg (regs, n_regs, regs_mask,
-                              8 /* index into perf_event_x86_regs */);
-#endif
+  /* XXX for dwarf_regs indices, compare i386_initreg.c */
+  return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
+                          sp, 4 /* index of sp in dwarf_regs */,
+                          pc, 8 /* index of pc in dwarf_regs */);
 }
 
 bool
-i386_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
-                                  uint64_t regs_mask, uint32_t abi,
-                                  ebl_tid_registers_t *setfunc,
-                                  void *arg)
+i386_sample_perf_regs_mapping (Ebl *ebl,
+                              uint64_t perf_regs_mask, uint32_t abi,
+                              const int **regs_mapping,
+                              size_t *n_regs_mapping)
 {
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  (void)abi;
-  (void)setfunc;
-  (void)arg;
-  return false;
-#else /* __i386__ || __x86_64__ */
-  Dwarf_Word dwarf_regs[9];
-  if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
-                                        abi, dwarf_regs, 9))
-    return false;
-  return setfunc (0, 9, dwarf_regs, arg);
-#endif
+  return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+                                      regs_mapping, n_regs_mapping);
 }
diff --git a/backends/linux-perf-regs.c b/backends/linux-perf-regs.c
deleted file mode 100644 (file)
index 22ad67c..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Common pieces for handling registers in a linux perf_events sample.
-   Copyright (C) 2025 Red Hat, Inc.
-   This file is part of elfutils.
-
-   This file is free software; you can redistribute it and/or modify
-   it under the terms of either
-
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at
-       your option) any later version
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at
-       your option) any later version
-
-   or both in parallel, as here.
-
-   elfutils is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see <http://www.gnu.org/licenses/>.  */
-
-static Dwarf_Word
-perf_sample_find_reg (const Dwarf_Word *regs, uint32_t n_regs,
-                     uint64_t regs_mask,
-                     int target)
-{
-  int j, k; uint64_t bit;
-  for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
-    {
-      if (bit & regs_mask) {
-       if (n_regs <= (uint32_t) j)
-         return 0; /* regs_mask count doesn't match n_regs */
-       if (k == target)
-         return regs[j];
-       if (k > target)
-         return 0; /* regs_mask doesn't include desired reg */
-       j++;
-      }
-    }
-  return 0;
-}
index 6a1cbc4b9b6467ed08b0ff6703c5ffb681320cc9..5f929758e653257d831bad6f708605be5db27ca4 100644 (file)
@@ -63,10 +63,13 @@ x86_64_init (Elf *elf __attribute__ ((unused)),
   /* gcc/config/ #define DWARF_FRAME_REGISTERS.  */
   eh->frame_nregs = 17;
   HOOK (eh, set_initial_registers_tid);
-  HOOK (eh, set_initial_registers_sample);
-  HOOK (eh, sample_base_addr);
-  HOOK (eh, sample_pc);
+  /* set_initial_registers_sample is default ver */
+  HOOK (eh, sample_sp_pc);
+  HOOK (eh, sample_perf_regs_mapping);
   eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_X86_64;
+  eh->cached_perf_regs_mask = 0;
+  eh->cached_regs_mapping = NULL;
+  eh->cached_n_regs_mapping = -1;
   HOOK (eh, unwind);
   HOOK (eh, check_reloc_target_type);
 
index 48d14bc86b33fefd2c597a3f1e4659a36e888612..9dd708c96cd97c999ba3bf77c29fb27a6e290522 100644 (file)
@@ -31,6 +31,7 @@
 #endif
 
 #include <stdlib.h>
+#include <assert.h>
 #if defined(__x86_64__) && defined(__linux__)
 # include <linux/perf_event.h>
 # include <asm/perf_regs.h>
 #include "libebl_CPU.h"
 #include "libebl_PERF_FLAGS.h"
 #if defined(__x86_64__) && defined(__linux__)
-# include "linux-perf-regs.c"
 # include "x86_initreg_sample.c"
 #endif
 
-/* Register ordering cf. linux arch/x86/include/uapi/asm/perf_regs.h,
-   enum perf_event_x86_regs: */
-Dwarf_Word
-x86_64_sample_base_addr (const Dwarf_Word *regs, uint32_t n_regs,
-                        uint64_t regs_mask,
-                        /* XXX hypothetically needed if abi varies
-                           between samples in the same process;
-                           not needed on x86*/
-                        uint32_t abi __attribute__((unused)))
-{
-#if !defined(__x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  return 0;
-#else /* __x86_64__ */
-  return perf_sample_find_reg (regs, n_regs, regs_mask,
-                              7 /* index into perf_event_x86_regs */);
-#endif
-}
-
-Dwarf_Word
-x86_64_sample_pc (const Dwarf_Word *regs, uint32_t n_regs,
-                 uint64_t regs_mask,
-                 uint32_t abi __attribute__((unused)))
+bool
+x86_64_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+                    const int *regs_mapping, uint32_t n_regs_mapping,
+                    Dwarf_Word *sp, Dwarf_Word *pc)
 {
-#if !defined(__x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  return 0;
-#else /* __x86_64__ */
-  return perf_sample_find_reg (regs, n_regs, regs_mask,
-                              8 /* index into perf_event_x86_regs */);
-#endif
+  /* XXX for dwarf_regs indices, compare x86_64_initreg.c */
+  return x86_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
+                          sp, 7 /* index of sp in dwarf_regs */,
+                          pc, 16 /* index of pc in dwarf_regs */);
 }
 
 bool
-x86_64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
-                                    uint64_t regs_mask, uint32_t abi,
-                                    ebl_tid_registers_t *setfunc,
-                                    void *arg)
+x86_64_sample_perf_regs_mapping (Ebl *ebl,
+                                uint64_t perf_regs_mask, uint32_t abi,
+                                const int **regs_mapping,
+                                size_t *n_regs_mapping)
 {
-#if !defined(__x86_64__) || !defined(__linux__)
-  (void)regs;
-  (void)n_regs;
-  (void)regs_mask;
-  (void)abi;
-  (void)setfunc;
-  (void)arg;
-  return false;
-#else /* __x86_64__ */
-  Dwarf_Word dwarf_regs[17];
-  if (!x86_set_initial_registers_sample (regs, n_regs, regs_mask,
-                                        abi, dwarf_regs, 9))
-    return false;
-  return setfunc (0, 17, dwarf_regs, arg);
-#endif
+  return x86_sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+                                      regs_mapping, n_regs_mapping);
 }
-
index 8d6b471b067216aa93f819ad3dc4bc7331f5667a..47cd91c238125152fdf1deba2444bf37107180e4 100644 (file)
@@ -1,4 +1,4 @@
-/* x86 linux perf_events register handling, pieces common to x86-64 and i386.
+/* x86 stack sample register handling, pieces common to x86-64 and i386.
    Copyright (C) 2025 Red Hat, Inc.
    This file is part of elfutils.
 
    not, see <http://www.gnu.org/licenses/>.  */
 
 static bool
-x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
-                                 uint64_t regs_mask, uint32_t abi,
-                                 Dwarf_Word *dwarf_regs, int expected_regs)
+x86_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
+                 const int *regs_mapping, uint32_t n_regs_mapping,
+                 Dwarf_Word *sp, uint sp_index /* into dwarf_regs */,
+                 Dwarf_Word *pc, uint pc_index /* into dwarf_regs */)
 {
-#if (!defined __i386__ && !defined __x86_64__) || !defined(__linux__)
+  if (sp != NULL) *sp = 0;
+  if (pc != NULL) *pc = 0;
+#if !defined(__x86_64__)
+  (void)regs;
+  (void)n_regs;
+  (void)regs_mapping;
+  (void)n_regs_mapping;
   return false;
-#else /* __i386__ || __x86_64__ */
+#else /* __x86_64__ */
+  /* TODO: Register locations could be cached and rechecked on a
+     fastpath without needing to loop? */
+  int j, need_sp = (sp != NULL), need_pc = (pc != NULL);
+  for (j = 0; (need_sp || need_pc) && n_regs_mapping > (uint32_t)j; j++)
+    {
+      if (n_regs < (uint32_t)j) break;
+      if (need_sp && regs_mapping[j] == (int)sp_index)
+       {
+         *sp = regs[j]; need_sp = false;
+       }
+      if (need_pc && regs_mapping[j] == (int)pc_index)
+       {
+         *pc = regs[j]; need_pc = false;
+       }
+    }
+  return (!need_sp && !need_pc);
+#endif
+}
+
+static bool
+x86_sample_perf_regs_mapping (Ebl *ebl,
+                             uint64_t perf_regs_mask, uint32_t abi,
+                             const int **regs_mapping,
+                             size_t *n_regs_mapping)
+{
+  if (perf_regs_mask != 0 && ebl->cached_perf_regs_mask == perf_regs_mask)
+    {
+      *regs_mapping = ebl->cached_regs_mapping;
+      *n_regs_mapping = ebl->cached_n_regs_mapping;
+      return true;
+    }
+
   /* The following facts are needed to translate x86 registers correctly:
      - perf register order seen in linux arch/x86/include/uapi/asm/perf_regs.h
        The registers array is built in the same order as the enum!
@@ -52,39 +91,58 @@ x86_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t n_regs,
   bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32);
 
   /* Locations of dwarf_regs in the perf_event_x86_regs enum order,
-     not the regs[i] array (which will include a subset of the regs): */
+     not the regs[] array (which will include a subset of the regs):  */
   static const int regs_i386[] = {0, 2, 3, 1, 7/*sp*/, 6, 4, 5, 8/*ip*/};
   static const int regs_x86_64[] = {0, 3, 2, 1, 4, 5, 6, 7/*sp*/,
                                    16/*r8 after flags+segment*/, 17, 18, 19, 20, 21, 22, 23,
                                    8/*ip*/};
   const int *dwarf_to_perf = is_abi32 ? regs_i386 : regs_x86_64;
 
-  /* Locations of perf_regs in the regs[] array, according to regs_mask: */
-  int perf_to_regs[PERF_REG_X86_64_MAX];
-  uint64_t expected_mask = is_abi32 ? PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64;
-  int j, k; uint64_t bit;
-  /* TODO: Is it worth caching this perf_to_regs computation as long
-     as regs_mask is kept the same across repeated calls? */
-  for (j = 0, k = 0, bit = 1; k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
+  /* Count bits and allocate regs_mapping:  */
+  int j, k, kmax, count; uint64_t bit;
+  for (k = 0, kmax = -1, count = 0, bit = 1;
+       k < PERF_REG_X86_64_MAX; k++, bit <<= 1)
     {
-      if ((bit & expected_mask) && (bit & regs_mask)) {
-       if (n_regs <= (uint32_t)j)
-         return false; /* regs_mask count doesn't match n_regs */
-       perf_to_regs[k] = j;
-       j++;
-      } else {
-       perf_to_regs[k] = -1;
+      if ((bit & perf_regs_mask)) {
+       count++;
+       kmax = k;
       }
     }
+  ebl->cached_perf_regs_mask = perf_regs_mask;
+  ebl->cached_regs_mapping = (int *)calloc (count, sizeof(int));
+  ebl->cached_n_regs_mapping = count;
 
-  for (int i = 0; i < expected_regs; i++)
+  /* Locations of perf_regs in the regs[] array, according to
+     perf_regs_mask:  */
+  int perf_to_regs[PERF_REG_X86_64_MAX];
+  uint64_t expected_mask = is_abi32 ?
+    PERF_FRAME_REGISTERS_I386 : PERF_FRAME_REGISTERS_X86_64;
+  for (j = 0, k = 0, bit = 1; k <= kmax; k++, bit <<= 1)
+    {
+      if ((bit & expected_mask) && (bit & perf_regs_mask))
+       {
+         perf_to_regs[k] = j;
+         j++;
+       }
+      else
+       {
+         perf_to_regs[k] = -1;
+       }
+    }
+  if (j > (int)ebl->cached_n_regs_mapping)
+      return false;
+
+  /* Locations of perf_regs in the dwarf_regs array, according to
+     perf_regs_mask and perf_to_regs[]:  */
+  for (size_t i = 0; i < ebl->frame_nregs; i++)
     {
       k = dwarf_to_perf[i];
       j = perf_to_regs[k];
       if (j < 0) continue;
-      if (n_regs <= (uint32_t)j) continue;
-      dwarf_regs[i] = regs[j];
+      ebl->cached_regs_mapping[j] = i;
     }
+
+  *regs_mapping = ebl->cached_regs_mapping;
+  *n_regs_mapping = ebl->cached_n_regs_mapping;
   return true;
-#endif /* __i386__ || __x86_64__ */
 }
index 137b5738339f40a3c3f6f5fea6e2288b0f66e0a0..b45647e6fa4c499762c0180da8f9337ceeaf764f 100644 (file)
@@ -395,7 +395,6 @@ ELFUTILS_0.193 {
 /* XXX Experimental libdwfl_stacktrace API. */
 ELFUTILS_0.193_EXPERIMENTAL {
   global:
-    dwflst_perf_sample_preferred_regs_mask;
     dwflst_perf_sample_preferred_regs_mask;
     dwflst_tracker_begin;
     dwflst_tracker_dwfl_begin;
@@ -407,3 +406,8 @@ ELFUTILS_0.193_EXPERIMENTAL {
     dwflst_tracker_find_pid;
     dwflst_perf_sample_getframes;
 };
+
+ELFUTILS_0.194_EXPERIMENTAL {
+  global:
+    dwflst_sample_getframes;
+} ELFUTILS_0.193_EXPERIMENTAL;
index 99a80b5c0804ff59c118bb094d86ad81f44b5e7a..b92421291acba784c8935a9524e51a82b172571b 100644 (file)
@@ -45,7 +45,7 @@ libdwfl_stacktrace_a_SOURCES = dwflst_process_tracker.c \
                               dwflst_tracker_elftab.c \
                               dwflst_tracker_dwfltab.c \
                               libdwfl_stacktrace_next_prime.c \
-                              dwflst_perf_frame.c
+                              dwflst_sample_frame.c
 
 libdwfl_stacktrace = $(libdw)
 libdw = ../libdw/libdw.so
similarity index 73%
rename from libdwfl_stacktrace/dwflst_perf_frame.c
rename to libdwfl_stacktrace/dwflst_sample_frame.c
index 4fc60183deef2ceed0a328e897d6a61c48634e28..090d322084c623d68381956a769a7579dafea45b 100644 (file)
 # include <config.h>
 #endif
 
-#if defined(__linux__)
-# include <linux/perf_event.h>
-#else
-/* XXX required by copy_word() below */
-enum perf_sample_regs_abi {
-    PERF_SAMPLE_REGS_ABI_NONE       = 0,
-    PERF_SAMPLE_REGS_ABI_32         = 1,
-    PERF_SAMPLE_REGS_ABI_64         = 2,
-};
-#endif
-
 #include "libdwfl_stacktraceP.h"
 
 Ebl *default_ebl = NULL;
@@ -67,7 +56,7 @@ uint64_t dwflst_perf_sample_preferred_regs_mask (GElf_Half machine)
   return 0;
 }
 
-struct perf_sample_info {
+struct sample_info {
   pid_t pid;
   pid_t tid;
   Dwarf_Addr base_addr;
@@ -75,8 +64,9 @@ struct perf_sample_info {
   size_t stack_size;
   const Dwarf_Word *regs;
   uint n_regs;
-  uint64_t perf_regs_mask;
-  uint abi;
+  const int *regs_mapping;
+  size_t n_regs_mapping;
+  int elfclass;
   Dwarf_Addr pc;
 };
 
@@ -88,8 +78,8 @@ static pid_t
 sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg,
                    void **thread_argp)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)dwfl_arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)dwfl_arg;
   if (*thread_argp == NULL)
     {
       *thread_argp = (void *)0xea7b3375;
@@ -104,8 +94,8 @@ static bool
 sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid,
                  void *dwfl_arg, void **thread_argp)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)dwfl_arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)dwfl_arg;
   *thread_argp = (void *)sample_arg;
   if (sample_arg->tid != tid)
     {
@@ -127,10 +117,10 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid,
   else \
     memcpy ((result), (d), sizeof (uint32_t));
 
-#define copy_word(result, d, abi) \
-  if ((abi) == PERF_SAMPLE_REGS_ABI_64)        \
+#define copy_word(result, d, elfclass) \
+  if ((elfclass) == ELFCLASS64)        \
     { copy_word_64((result), (d)); } \
-  else if ((abi) == PERF_SAMPLE_REGS_ABI_32) \
+  else if ((elfclass) == ELFCLASS32) \
     { copy_word_32((result), (d)); } \
   else \
     *(result) = 0;
@@ -138,8 +128,8 @@ sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid,
 static bool
 elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)arg;
   Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, addr);
   Dwarf_Addr bias;
   Elf_Scn *section = INTUSE(dwfl_module_address_section) (mod, &addr, &bias);
@@ -153,7 +143,7 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
   Elf_Data *data = elf_getdata(section, NULL);
   if (data && data->d_buf && data->d_size > addr) {
     uint8_t *d = ((uint8_t *)data->d_buf) + addr;
-    copy_word(result, d, sample_arg->abi);
+    copy_word(result, d, sample_arg->elfclass);
     return true;
   }
   __libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE);
@@ -163,36 +153,37 @@ elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
 static bool
 sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)arg;
   /* Imitate read_cached_memory() with the stack sample data as the cache. */
   if (addr < sample_arg->base_addr ||
       addr - sample_arg->base_addr >= sample_arg->stack_size)
     return elf_memory_read(dwfl, addr, result, arg);
   const uint8_t *d = &sample_arg->stack[addr - sample_arg->base_addr];
-  copy_word(result, d, sample_arg->abi);
+  copy_word(result, d, sample_arg->elfclass);
   return true;
 }
 
+
 static bool
 sample_set_initial_registers (Dwfl_Thread *thread, void *arg)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)arg;
   INTUSE(dwfl_thread_state_register_pc) (thread, sample_arg->pc);
   Dwfl_Process *process = thread->process;
   Ebl *ebl = process->ebl;
   return ebl_set_initial_registers_sample
     (ebl, sample_arg->regs, sample_arg->n_regs,
-     sample_arg->perf_regs_mask, sample_arg->abi,
+     sample_arg->regs_mapping, sample_arg->n_regs_mapping,
      __libdwfl_set_initial_registers_thread, thread);
 }
 
 static void
 sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg)
 {
-  struct perf_sample_info *sample_arg =
-    (struct perf_sample_info *)dwfl_arg;
+  struct sample_info *sample_arg =
+    (struct sample_info *)dwfl_arg;
   free (sample_arg);
 }
 
@@ -207,18 +198,18 @@ static const Dwfl_Thread_Callbacks sample_thread_callbacks =
   };
 
 int
-dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
-                             pid_t pid, pid_t tid,
-                             const void *stack, size_t stack_size,
-                             const Dwarf_Word *regs, uint n_regs,
-                             uint64_t perf_regs_mask, uint abi,
-                             int (*callback) (Dwfl_Frame *state, void *arg),
-                             void *arg)
+dwflst_sample_getframes (Dwfl *dwfl, Elf *elf,
+                        pid_t pid, pid_t tid,
+                        const void *stack, size_t stack_size,
+                        const Dwarf_Word *regs, uint n_regs,
+                        const int *regs_mapping, size_t n_regs_mapping,
+                        int (*callback) (Dwfl_Frame *state, void *arg),
+                        void *arg)
 {
   /* TODO: Lock the dwfl to ensure attach_state does not interfere
      with other dwfl_perf_sample_getframes calls. */
 
-  struct perf_sample_info *sample_arg;
+  struct sample_info *sample_arg;
   bool attached = false;
   if (dwfl->process != NULL)
     {
@@ -241,21 +232,53 @@ dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
   sample_arg->stack_size = stack_size;
   sample_arg->regs = regs;
   sample_arg->n_regs = n_regs;
-  sample_arg->perf_regs_mask = perf_regs_mask;
-  sample_arg->abi = abi;
+  sample_arg->regs_mapping = regs_mapping;
+  sample_arg->n_regs_mapping = n_regs_mapping;
 
   if (! attached
       && ! INTUSE(dwfl_attach_state) (dwfl, elf, pid,
                                      &sample_thread_callbacks, sample_arg))
-      return -1;
+    return -1;
 
-  /* Now that Dwfl is attached, we can access its Ebl: */
   Dwfl_Process *process = dwfl->process;
   Ebl *ebl = process->ebl;
-  sample_arg->base_addr = ebl_sample_base_addr(ebl, regs, n_regs,
-                                              perf_regs_mask, abi);
-  sample_arg->pc = ebl_sample_pc(ebl, regs, n_regs,
-                                perf_regs_mask, abi);
+  sample_arg->elfclass = ebl_get_elfclass(ebl);
+  ebl_sample_sp_pc(ebl, regs, n_regs,
+                   regs_mapping, n_regs_mapping,
+                   &sample_arg->base_addr, &sample_arg->pc);
 
   return INTUSE(dwfl_getthread_frames) (dwfl, tid, callback, arg);
 }
+
+int
+dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf,
+                             pid_t pid, pid_t tid,
+                             const void *stack, size_t stack_size,
+                             const Dwarf_Word *regs, uint32_t n_regs,
+                             uint64_t perf_regs_mask, uint32_t abi,
+                             int (*callback) (Dwfl_Frame *state, void *arg),
+                             void *arg)
+{
+  /* Select the regs_mapping based on architecture.  This will be
+     cached in ebl to avoid having to recompute the regs_mapping array
+     when perf_regs_mask is consistent for the entire session: */
+  const int *regs_mapping;
+  size_t n_regs_mapping;
+  Dwfl_Process *process = dwfl->process;
+  Ebl *ebl = process->ebl;
+  /* XXX May want to check if abi matches ebl_get_elfclass(ebl). */
+  if (!ebl_sample_perf_regs_mapping(ebl,
+                                   perf_regs_mask, abi,
+                                   &regs_mapping, &n_regs_mapping))
+    {
+      __libdwfl_seterrno(DWFL_E_LIBEBL_BAD);
+      return -1;
+    }
+
+  /* Then we can call dwflst_sample_getframes: */
+  return dwflst_sample_getframes (dwfl, elf, pid, tid,
+                                 stack, stack_size,
+                                 regs, n_regs,
+                                 regs_mapping, n_regs_mapping,
+                                 callback, arg);
+}
index b236ddc4f66e4d33ae2f6bcc7b42620cc7e9330e..84cb69a30a9e67865884d6e7692355a47ab6c39e 100644 (file)
@@ -113,14 +113,31 @@ extern int dwflst_tracker_linux_proc_find_elf (Dwfl_Module *mod, void **userdata
                                               const char *module_name, Dwarf_Addr base,
                                               char **file_name, Elf **);
 
-
 /* Like dwfl_thread_getframes, but iterates through the frames for a
-   linux perf_events stack sample rather than a live thread.  Calls
-   dwfl_attach_state on DWFL, with architecture specified by ELF, ELF
-   must remain valid during Dwfl lifetime.  Returns zero if all frames
-   have been processed by the callback, returns -1 on error, or the
-   value of the callback when not DWARF_CB_OK.  -1 returned on error
-   will set dwfl_errno ().  */
+   stack sample rather than a live thread.  Register file for the stack
+   sample is specified by REGS and N_REGS.  For each item in REGS, the
+   REGS_MAPPING array specifies its position in the full register file
+   expected by the DWARF infrastructure.  Calls dwfl_attach_state on
+   DWFL, with architecture specified by ELF, ELF must remain vaild
+   during Dwfl lifetime.  Returns zero if all frames have been
+   processed by the callback, returns -1 on error, or the value of the
+   callback when not DWARF_CB_OK. -1 returned on error will set
+   dwfl_errno (). */
+int dwflst_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
+                                 const void *stack, size_t stack_size,
+                                 const Dwarf_Word *regs, uint32_t n_regs,
+                                 const int *regs_mapping, size_t n_regs_mapping,
+                                 int (*callback) (Dwfl_Frame *state, void *arg),
+                                 void *arg)
+    __nonnull_attribute__ (1, 5, 7, 9, 11);
+
+/* Adapts dwflst_sample_getframes to linux perf_events stack sample
+   and register file data format.  Calls dwfl_attach_state on DWFL,
+   with architecture specified by ELF, ELF must remain valid during
+   Dwfl lifetime.  Returns zero if all frames have been processed by
+   the callback, returns -1 on error, or the value of the callback
+   when not DWARF_CB_OK. -1 returned on error will set dwfl_errno
+   (). */
 int dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid,
                                  const void *stack, size_t stack_size,
                                  const Dwarf_Word *regs, uint32_t n_regs,
index 05474fbc87a935cd092f029d3affc68104693860..29ce96494ee8f7783fff183b71c724a9d2f51a1a 100644 (file)
@@ -158,21 +158,32 @@ bool EBLHOOK(set_initial_registers_tid) (pid_t tid,
                                         ebl_tid_registers_t *setfunc,
                                         void *arg);
 
-/* Set process data from a perf_events sample and call SETFUNC one or more times.
-   Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0, otherwise the
-   backend doesn't support unwinding from perf_events data.  */
-bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs, uint32_t n_regs,
-                                           uint64_t regs_mask, uint32_t abi,
+/* Set process data from a register sample and call SETFUNC one or more times.
+   Method should be present only when a 'default' strategy of populating an
+   array of DWARF regs and calling SETFUNC once would be inefficient, e.g.
+   on architectures with sparse/noncontiguous DWARF register files.  */
+bool EBLHOOK(set_initial_registers_sample) (const Dwarf_Word *regs,
+                                           uint32_t n_regs,
+                                           const int *regs_mapping,
+                                           size_t n_regs_mapping,
                                            ebl_tid_registers_t *setfunc,
                                            void *arg);
 
-/* Extract the stack address from a perf_events register sample.  */
-Dwarf_Word EBLHOOK(sample_base_addr) (const Dwarf_Word *regs, uint32_t n_regs,
-                                     uint64_t regs_mask, uint32_t abi);
-
-/* Extract the instruction pointer from a perf_events register sample.  */
-Dwarf_Word EBLHOOK(sample_pc) (const Dwarf_Word *regs, uint32_t n_regs,
-                              uint64_t regs_mask, uint32_t abi);
+/* Extract the stack address and instruction pointer from a register sample.  */
+bool EBLHOOK(sample_sp_pc) (const Dwarf_Word *regs, uint32_t n_regs,
+                           const int *regs_mapping,
+                           uint32_t n_regs_mapping,
+                           Dwarf_Word *sp, Dwarf_Word *pc);
+
+/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
+   REGS_MAPPING array for use with ebl_set_initial_registers_sample().
+   Method should be present only when EBL_PERF_FRAME_REGS_MASK > 0,
+   otherwise the backend doesn't support unwinding from perf_events
+   data.  */
+bool EBLHOOK(sample_perf_regs_mapping) (Ebl *ebl,
+                                       uint64_t perf_regs_mask, uint32_t abi,
+                                       const int **regs_mapping,
+                                       size_t *n_regs_mapping);
 
 /* Convert *REGNO as is in DWARF to a lower range suitable for
    Dwarf_Frame->REGS indexing.  */
index 7fa068ec04012e7b132e78eb34499c744c6d1abf..d809707848198bfe9765d0af99849b04f22c746d 100644 (file)
@@ -43,6 +43,10 @@ ebl_closebackend (Ebl *ebl)
       /* Run the destructor.  */
       ebl->destr (ebl);
 
+      /* Free cached_regs_mapping. */
+      if (ebl->cached_regs_mapping != NULL)
+          free (ebl->cached_regs_mapping);
+
       /* Free the resources.  */
       free (ebl);
     }
index 53244d1e7da9a97ebaf8c213f5192555468959ec..d5704dfa60e26f00a97306752aad9b5409d83998 100644 (file)
 #include <libeblP.h>
 #include <assert.h>
 
-Dwarf_Word
-ebl_sample_base_addr (Ebl *ebl,
-                      const Dwarf_Word *regs, uint32_t n_regs,
-                     uint64_t regs_mask, uint32_t abi)
-{
-  assert (ebl->sample_base_addr != NULL);
-  return ebl->sample_base_addr (regs, n_regs, regs_mask, abi);
-}
-
-Dwarf_Word
-ebl_sample_pc (Ebl *ebl,
-              const Dwarf_Word *regs, uint32_t n_regs,
-              uint64_t regs_mask, uint32_t abi)
+bool
+ebl_sample_sp_pc (Ebl *ebl,
+                 const Dwarf_Word *regs, uint32_t n_regs,
+                 const int *regs_mapping, size_t n_regs_mapping,
+                 Dwarf_Word *sp, Dwarf_Word *pc)
 {
-  assert (ebl->sample_pc != NULL);
-  return ebl->sample_pc (regs, n_regs, regs_mask, abi);
+  assert (ebl->sample_sp_pc != NULL);
+  return ebl->sample_sp_pc (regs, n_regs,
+                           regs_mapping, n_regs_mapping,
+                           sp, pc);
 }
 
 bool
 ebl_set_initial_registers_sample (Ebl *ebl,
                                  const Dwarf_Word *regs, uint32_t n_regs,
-                                 uint64_t regs_mask, uint32_t abi,
+                                 const int *regs_mapping, size_t n_regs_mapping,
                                  ebl_tid_registers_t *setfunc,
                                  void *arg)
 {
-  /* If set_initial_registers_sample is unsupported then PERF_FRAME_REGS_MASK is zero.  */
-  assert (ebl->set_initial_registers_sample != NULL);
-  return ebl->set_initial_registers_sample (regs, n_regs, regs_mask, abi, setfunc, arg);
+  /* If set_initial_registers_sample is defined for this arch, use it.  */
+  if (ebl->set_initial_registers_sample != NULL)
+      return ebl->set_initial_registers_sample (regs, n_regs,
+                                               regs_mapping, n_regs_mapping,
+                                               setfunc, arg);
+
+  /* If set_initial_registers_sample is unspecified, then it is safe
+     to use the following generic code to populate a contiguous array
+     of dwarf_regs:  */
+  Dwarf_Word dwarf_regs[64];
+  assert (ebl->frame_nregs < 64);
+  size_t i;
+  for (i = 0; i < ebl->frame_nregs; i++)
+    dwarf_regs[i] = 0x0;
+  for (i = 0; i < n_regs; i++)
+    {
+      if (i > n_regs_mapping)
+       break;
+      if (regs_mapping[i] < 0 || regs_mapping[i] >= (int)ebl->frame_nregs)
+       continue;
+      dwarf_regs[regs_mapping[i]] = regs[i];
+    }
+  return setfunc (0, ebl->frame_nregs, dwarf_regs, arg);
+}
+
+bool
+ebl_sample_perf_regs_mapping (Ebl *ebl,
+                             uint64_t perf_regs_mask, uint32_t abi,
+                             const int **regs_mapping, size_t *n_regs_mapping)
+{
+  /* If sample_perf_regs_mapping is unsupported then PERF_FRAME_REGS_MASK is zero.  */
+  assert (ebl->sample_perf_regs_mapping != NULL);
+  return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
+                                       regs_mapping, n_regs_mapping);
 }
 
 uint64_t
index a64d70e9c3cafdfde552a8bbd9a5cd5a1ef8fc01..5b0e70001da5c68bf08b4110e97025ce73ba7afb 100644 (file)
@@ -340,32 +340,46 @@ extern bool ebl_set_initial_registers_tid (Ebl *ebl,
 extern size_t ebl_frame_nregs (Ebl *ebl)
   __nonnull_attribute__ (1);
 
-/* Callback to set process data from a linux perf_events sample.
-   EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0, otherwise the
-   backend doesn't support unwinding from perf_events sample data.  */
+/* Callback to set process data from a register sample.  For each item
+   in REGS, the REGS_MAPPING array specifies its position in the full
+   register file expected by the DWARF infrastructure.  */
 extern bool ebl_set_initial_registers_sample (Ebl *ebl,
-                                             const Dwarf_Word *regs, uint32_t n_regs,
-                                             uint64_t regs_mask, uint32_t abi,
+                                             const Dwarf_Word *regs,
+                                             uint32_t n_regs,
+                                             const int *regs_mapping,
+                                             size_t n_regs_mapping,
                                              ebl_tid_registers_t *setfunc,
                                              void *arg)
   __nonnull_attribute__ (1, 2, 6);
 
-/* Extract the stack address from a perf_events register sample.  */
-Dwarf_Word ebl_sample_base_addr (Ebl *ebl,
-                                const Dwarf_Word *regs, uint32_t n_regs,
-                                uint64_t regs_mask, uint32_t abi)
-  __nonnull_attribute__ (1, 2);
-
-/* Extract the instruction pointer from a perf_events register sample.  */
-Dwarf_Word ebl_sample_pc (Ebl *ebl,
-                         const Dwarf_Word *regs, uint32_t n_regs,
-                         uint64_t regs_mask, uint32_t abi)
-  __nonnull_attribute__ (1, 2);
-
+/* Extract stack address SP and instruction pointer PC from a register
+   sample.  For each item in REGS, the REGS_MAPPING array specifies
+   its position in the full register file expected by the DWARF
+   infrastructure.  */
+extern bool ebl_sample_sp_pc (Ebl *ebl,
+                             const Dwarf_Word *regs, uint32_t n_regs,
+                             const int *regs_mapping, size_t n_regs_mapping,
+                             Dwarf_Word *sp, Dwarf_Word *pc)
+  __nonnull_attribute__ (1, 2, 4);
+
+/* Translate from linux perf_events PERF_REGS_MASK and ABI to a generic
+   REGS_MAPPING array for use with ebl_set_initial_registers_sample().
+   EBL architecture has to have EBL_PERF_FRAME_REGS_MASK > 0,
+   otherwise the backend doesn't support unwinding from perf_events
+   sample data.  The PERF_REGS_MASK and REGS_MAPPING are likely but
+   not guaranteed to stay constant throughout a profiling session, and
+   so the result is cached in the Ebl and only recomputed if an
+   unexpected PERF_REGS_MASK is passed to this function.  */
+extern bool ebl_sample_perf_regs_mapping (Ebl *ebl,
+                                         uint64_t perf_regs_mask,
+                                         uint32_t abi,
+                                         const int **regs_mapping,
+                                         size_t *n_regs_mapping)
+  __nonnull_attribute__ (1, 4, 5);
 
 /* Preferred sample_regs_user mask to request from linux perf_events
    to allow unwinding on EBL architecture.  Omitting some of these
-   registers may result in failed or inaccurate unwinding. */
+   registers may result in failed or inaccurate unwinding.  */
 extern uint64_t ebl_perf_frame_regs_mask (Ebl *ebl)
   __nonnull_attribute__ (1);
 
index be14cc20c04bae847a2954ca7f13b7a156b64bd7..348da49e58a7522bf7e53d22cc8802fdf9540729 100644 (file)
@@ -65,6 +65,13 @@ struct ebl
      perf_events sample data iff PERF_FRAME_REGS_MASK > 0.  */
   uint64_t perf_frame_regs_mask;
 
+  /* A cached mapping from a specified linux perf_events regs_mask to
+     the corresponding regs_mapping array, to reduce
+     ebl_sample_perf_regs_mapping() recomputations.  */
+  uint64_t cached_perf_regs_mask;
+  int *cached_regs_mapping;
+  size_t cached_n_regs_mapping;
+
   /* Offset to apply to the value of the return_address_register, as
      fetched from a Dwarf CFI.  This is used by some backends, where
      the return_address_register actually contains the call