From 95f71807733f9227b101bf8086f2e891a45a1e35 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 6 Apr 2021 18:21:48 +0100 Subject: [PATCH] coredump: parse .note.package ELF section Parse the .note.package ELF section for each ELF object contained in a core file, if present. --- src/coredump/stacktrace.c | 210 +++++++++++++++++++++++++++++++++++--- src/coredump/stacktrace.h | 2 + 2 files changed, 196 insertions(+), 16 deletions(-) diff --git a/src/coredump/stacktrace.c b/src/coredump/stacktrace.c index cf4e9baa13b..ac404451d63 100644 --- a/src/coredump/stacktrace.c +++ b/src/coredump/stacktrace.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -18,6 +19,7 @@ #define FRAMES_MAX 64 #define THREADS_MAX 64 +#define ELF_PACKAGE_METADATA_ID 0xcafe1a7e struct stack_context { FILE *f; @@ -25,6 +27,8 @@ struct stack_context { Elf *elf; unsigned n_thread; unsigned n_frame; + JsonVariant **package_metadata; + Set **modules; }; static int frame_callback(Dwfl_Frame *frame, void *userdata) { @@ -113,13 +117,123 @@ static int thread_callback(Dwfl_Thread *thread, void *userdata) { return DWARF_CB_OK; } -static int module_callback (Dwfl_Module *mod, void **userdata, const char *name, Dwarf_Addr start, void *arg) -{ - _cleanup_free_ char *id_hex = NULL; +static int parse_package_metadata(const char *name, JsonVariant *id_json, Elf *elf, struct stack_context *c) { + size_t n_program_headers; + int r; + + assert(name); + assert(elf); + assert(c); + + /* When iterating over PT_LOAD we will visit modules more than once */ + if (set_contains(*c->modules, name)) + return DWARF_CB_OK; + + r = elf_getphdrnum(elf, &n_program_headers); + if (r < 0) /* Not the handle we are looking for - that's ok, skip it */ + return DWARF_CB_OK; + + /* Iterate over all program headers in that ELF object. These will have been copied by + * the kernel verbatim when the core file is generated. */ + for (size_t i = 0; i < n_program_headers; ++i) { + size_t note_offset = 0, name_offset, desc_offset; + GElf_Phdr mem, *program_header; + GElf_Nhdr note_header; + Elf_Data *data; + + /* Package metadata is in PT_NOTE headers. */ + program_header = gelf_getphdr(elf, i, &mem); + if (!program_header || program_header->p_type != PT_NOTE) + continue; + + /* Fortunately there is an iterator we can use to walk over the + * elements of a PT_NOTE program header. We are interested in the + * note with type. */ + data = elf_getdata_rawchunk(elf, + program_header->p_offset, + program_header->p_filesz, + ELF_T_NHDR); + + while (note_offset < data->d_size && + (note_offset = gelf_getnote(data, note_offset, ¬e_header, &name_offset, &desc_offset)) > 0) { + const char *note_name = (const char *)data->d_buf + name_offset; + const char *payload = (const char *)data->d_buf + desc_offset; + + if (note_header.n_namesz == 0 || note_header.n_descsz == 0) + continue; + + /* Package metadata might have different owners, but the + * magic ID is always the same. */ + if (note_header.n_type == ELF_PACKAGE_METADATA_ID) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *w = NULL; + char *name_key = NULL; + + r = json_parse(payload, 0, &v, NULL, NULL); + if (r < 0) { + log_error_errno(r, "json_parse on %s failed: %m", payload); + return DWARF_CB_ABORT; + } + + /* First pretty-print to the buffer, so that the metadata goes as + * plaintext in the journal. */ + fprintf(c->f, "Metadata for module %s owned by %s found: ", + name, note_name); + json_variant_dump(v, JSON_FORMAT_NEWLINE|JSON_FORMAT_PRETTY, c->f, NULL); + fputc('\n', c->f); + + /* Secondly, if we have a build-id, merge it in the same JSON object + * so that it apperas all nicely together in the logs/metadata. */ + if (id_json) { + r = json_variant_merge(&v, id_json); + if (r < 0) { + log_error_errno(r, "json_variant_merge of package meta with buildid failed: %m"); + return DWARF_CB_ABORT; + } + } + + /* Then we build a new object using the module name as the key, and merge it + * with the previous parses, so that in the end it all fits together in a single + * JSON blob. */ + r = json_build(&w, JSON_BUILD_OBJECT(JSON_BUILD_PAIR(name, JSON_BUILD_VARIANT(v)))); + if (r < 0) { + log_error_errno(r, "Failed to build JSON object: %m"); + return DWARF_CB_ABORT; + } + r = json_variant_merge(c->package_metadata, w); + if (r < 0) { + log_error_errno(r, "json_variant_merge of package meta with buildid failed: %m"); + return DWARF_CB_ABORT; + } + + /* Finally stash the name, so we avoid double visits. */ + name_key = strdup(name); + if (!name_key) { + log_oom(); + return DWARF_CB_ABORT; + } + r = set_ensure_consume(c->modules, &string_hash_ops, name_key); + if (r < 0) { + log_error_errno(r, "set_ensure_consume failed: %m"); + return DWARF_CB_ABORT; + } + + return DWARF_CB_OK; + } + } + } + + /* Didn't find package metadata for this module - that's ok, just go to the next. */ + return DWARF_CB_OK; +} + +static int module_callback(Dwfl_Module *mod, void **userdata, const char *name, Dwarf_Addr start, void *arg) { + _cleanup_(json_variant_unrefp) JsonVariant *id_json = NULL; struct stack_context *c = arg; - GElf_Addr id_vaddr; + size_t n_program_headers; + GElf_Addr id_vaddr, bias; const unsigned char *id; - int id_len; + int id_len, r; + Elf *elf; assert(mod); assert(c); @@ -127,26 +241,85 @@ static int module_callback (Dwfl_Module *mod, void **userdata, const char *name, if (!name) name = "(unnamed)"; /* For logging purposes */ - fprintf(c->f, "Found module %s", name); - /* We are iterating on each "module", which is what dwfl calls ELF objects contained in the * core file, and extracting the build-id first and then the package metadata. * We proceed in a best-effort fashion - not all ELF objects might contain both or either. * The build-id is easy, as libdwfl parses it during the dwfl_core_file_report() call and * stores it separately in an internal library struct. */ id_len = dwfl_module_build_id(mod, &id, &id_vaddr); - if (id_len == 0) { - fprintf(c->f, " without build-id\n"); - return DWARF_CB_OK; + if (id_len <= 0) { + /* If we don't find a build-id, note it in the journal message, and try + * anyway to find the package metadata. It's unlikely to have the latter + * without the former, but there's no hard rule. */ + fprintf(c->f, "Found module %s without build-id\n", name); + } else { + _cleanup_free_ char *id_hex = NULL, *id_hex_prefixed = NULL; + + id_hex = hexmem(id, id_len); + if (!id_hex) { + log_oom(); + return DWARF_CB_ABORT; + } + + fprintf(c->f, "Found module %s with build-id: %s\n", name, id_hex); + + /* We will later parse package metadata json and pass it to our caller. Prepare the + * build-id in json format too, so that it can be appended and parsed cleanly. It + * will then be added as metadata to the journal message with the stack trace. */ + id_hex_prefixed = strjoin("{\"buildid\":\"", id_hex, "\"}"); + if (!id_hex_prefixed) { + log_oom(); + return DWARF_CB_ABORT; + } + r = json_parse(id_hex_prefixed, 0, &id_json, NULL, NULL); + if (r < 0) { + log_error_errno(r, "json_parse on %s failed: %m", id_hex_prefixed); + return DWARF_CB_ABORT; + } } - id_hex = hexmem(id, id_len); - if (!id_hex) { - fprintf(c->f, "\n"); - return DWARF_CB_ABORT; + /* The .note.package metadata is more difficult. From the module, we need to get a reference + * to the ELF object first. We might be lucky and just get it from elfutils. */ + elf = dwfl_module_getelf(mod, &bias); + if (elf) + return parse_package_metadata(name, id_json, elf, c); + + /* We did not get the ELF object. That is likely because we didn't get direct + * access to the executable, and the version of elfutils does not yet support + * parsing it out of the core file directly. + * So fallback to manual extraction - get the PT_LOAD section from the core, + * and if it's the right one we can interpret it as an Elf object, and parse + * its notes manually. */ + + r = elf_getphdrnum(c->elf, &n_program_headers); + if (r < 0) { + log_warning("Could not parse number of program headers from core file: %s", + elf_errmsg(-1)); /* -1 retrieves the most recent error */ + return DWARF_CB_OK; } - fprintf(c->f, " with build-id: %s\n", id_hex); + for (size_t i = 0; i < n_program_headers; ++i) { + GElf_Phdr mem, *program_header; + Elf_Data *data; + + /* The core file stores the ELF files in the PT_LOAD segment .*/ + program_header = gelf_getphdr(c->elf, i, &mem); + if (!program_header || program_header->p_type != PT_LOAD) + continue; + + /* Now get a usable Elf reference, and parse the notes from it. */ + data = elf_getdata_rawchunk(c->elf, + program_header->p_offset, + program_header->p_filesz, + ELF_T_NHDR); + + Elf *memelf = elf_memory(data->d_buf, data->d_size); + if (!memelf) + continue; + r = parse_package_metadata(name, id_json, memelf, c); + if (r != DWARF_CB_OK) + return r; + } return DWARF_CB_OK; } @@ -159,7 +332,12 @@ static int parse_core(int fd, const char *executable, char **ret) { .find_debuginfo = dwfl_standard_find_debuginfo, }; - struct stack_context c = {}; + _cleanup_(json_variant_unrefp) JsonVariant *package_metadata = NULL; + _cleanup_(set_freep) Set *modules = NULL; + struct stack_context c = { + .package_metadata = &package_metadata, + .modules = &modules, + }; char *buf = NULL; size_t sz = 0; int r; diff --git a/src/coredump/stacktrace.h b/src/coredump/stacktrace.h index 7f25bf1a6b1..daeb38bf38e 100644 --- a/src/coredump/stacktrace.h +++ b/src/coredump/stacktrace.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include "json.h" + void coredump_parse_core(int fd, const char *executable, char **ret); -- 2.47.3