From: Petr Machata Date: Tue, 3 Feb 2009 13:58:18 +0000 (+0100) Subject: Do the .debug_loc and .debug_ranges validation after .debug_info X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=22d1c3748286db55c2396bfda57c2a61825f657c;p=thirdparty%2Felfutils.git Do the .debug_loc and .debug_ranges validation after .debug_info * ... as compared to /during/ .debug_info validation, which was the case before. --- diff --git a/src/dwarflint.c b/src/dwarflint.c index 5f87c7cf7..b5854b01e 100644 --- a/src/dwarflint.c +++ b/src/dwarflint.c @@ -403,7 +403,7 @@ static bool read_ctx_read_2ubyte (struct read_ctx *ctx, uint16_t *ret); static bool read_ctx_read_4ubyte (struct read_ctx *ctx, uint32_t *ret); static bool read_ctx_read_8ubyte (struct read_ctx *ctx, uint64_t *ret); static bool read_ctx_read_offset (struct read_ctx *ctx, bool dwarf64, - uint64_t *ret); + uint64_t *ret); static bool read_ctx_read_var (struct read_ctx *ctx, int width, uint64_t *ret); static bool read_ctx_skip (struct read_ctx *ctx, uint64_t len); static bool read_ctx_eof (struct read_ctx *ctx); @@ -501,6 +501,8 @@ struct cu uint64_t base; // DW_AT_low_pc value of CU DIE, 0 if not present. struct addr_record die_addrs; // Addresses where DIEs begin in this CU. struct ref_record die_refs; // DIE references into other CUs from this CU. + struct ref_record loc_refs; // references into .debug_loc from this CU. + struct ref_record range_refs; // references into .debug_ranges from this CU. struct where where; // Where was this section defined. bool has_arange; // Whether we saw arange section pointing to this CU. bool has_pubnames; // Likewise for pubnames. @@ -516,28 +518,31 @@ static struct cu *cu_find_cu (struct cu *cu_chain, uint64_t offset); static struct cu *check_debug_info_structural (struct read_ctx *ctx, struct abbrev_table *abbrev_chain, - Elf_Data *strings, - Elf_Data *loc, - Elf_Data *ranges); + Elf_Data *strings); + static bool check_cu_structural (struct read_ctx *ctx, struct cu *const cu, struct abbrev_table *abbrev_chain, Elf_Data *strings, - Elf_Data *loc, - Elf_Data *ranges, bool dwarf_64, struct ref_record *die_refs, - struct addr_record *loc_addrs, - struct addr_record *ranges_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage, - struct coverage *ranges_coverage); + struct coverage *strings_coverage); + static bool check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain); + static bool check_pub_structural (struct read_ctx *ctx, struct cu *cu_chain, enum section_id sec); +static void check_location_expression (struct read_ctx *ctx, + struct where *wh, + bool addr_64); + +static bool check_loc_or_range_structural (struct read_ctx *ctx, + struct cu *cu_chain, + enum section_id sec); + const char * where_fmt (const struct where *wh, char *ptr) @@ -745,19 +750,32 @@ process_file (int fd __attribute__((unused)), { read_ctx_init (&ctx, dwarf, info_data); cu_chain = check_debug_info_structural (&ctx, abbrev_chain, - str_data, loc_data, - ranges_data); + str_data); } else if (!tolerate_nodebug) /* Hard error, not a message. We can't debug without this. */ wr_error (NULL, ".debug_info data not found.\n"); } + bool ranges_sound; + if (ranges_data != NULL && cu_chain != NULL) + { + read_ctx_init (&ctx, dwarf, ranges_data); + ranges_sound = check_loc_or_range_structural (&ctx, cu_chain, sec_ranges); + } + else + ranges_sound = false; + + if (loc_data != NULL && cu_chain != NULL) + { + read_ctx_init (&ctx, dwarf, loc_data); + check_loc_or_range_structural (&ctx, cu_chain, sec_loc); + } + if (aranges_data != NULL) { read_ctx_init (&ctx, dwarf, aranges_data); - if (check_aranges_structural (&ctx, cu_chain)) - /* XXX only do this if .debug_ranges are also OK. */ + if (check_aranges_structural (&ctx, cu_chain) && ranges_sound) check_matching_ranges (dwarf); } @@ -1740,9 +1758,7 @@ check_zero_padding (struct read_ctx *ctx, static struct cu * check_debug_info_structural (struct read_ctx *ctx, struct abbrev_table *abbrev_chain, - Elf_Data *strings, - Elf_Data *loc, - Elf_Data *ranges) + Elf_Data *strings) { struct ref_record die_refs; memset (&die_refs, 0, sizeof (die_refs)); @@ -1758,26 +1774,6 @@ check_debug_info_structural (struct read_ctx *ctx, strings_coverage = &strings_coverage_mem; } - struct coverage loc_coverage_mem, *loc_coverage = NULL; - struct addr_record loc_addrs_mem, *loc_addrs = NULL; - if (loc != NULL) - { - coverage_init (&loc_coverage_mem, loc->d_size); - loc_coverage = &loc_coverage_mem; - memset (&loc_addrs_mem, 0, sizeof (loc_addrs_mem)); - loc_addrs = &loc_addrs_mem; - } - - struct coverage ranges_coverage_mem, *ranges_coverage = NULL; - struct addr_record ranges_addrs_mem, *ranges_addrs = NULL; - if (ranges != NULL) - { - coverage_init (&ranges_coverage_mem, ranges->d_size); - ranges_coverage = &ranges_coverage_mem; - memset (&ranges_addrs_mem, 0, sizeof (ranges_addrs_mem)); - ranges_addrs = &ranges_addrs_mem; - } - while (!read_ctx_eof (ctx)) { const unsigned char *cu_begin = ctx->ptr; @@ -1853,11 +1849,9 @@ check_debug_info_structural (struct read_ctx *ctx, cu_ctx.ptr = ctx->ptr; if (!check_cu_structural (&cu_ctx, cur, abbrev_chain, - strings, loc, ranges, + strings, dwarf_64, &die_refs, - loc_addrs, ranges_addrs, - strings_coverage, - loc_coverage, ranges_coverage)) + strings_coverage)) { success = false; break; @@ -1909,480 +1903,171 @@ check_debug_info_structural (struct read_ctx *ctx, coverage_free (strings_coverage); } - if (loc_coverage != NULL) - { - if (success) - coverage_find_holes (loc_coverage, found_hole, - &((struct hole_info) - {sec_loc, mc_loc, address_size, - loc->d_buf})); - coverage_free (loc_coverage); - } - - if (ranges_coverage != NULL) - { - if (success) - coverage_find_holes (ranges_coverage, found_hole, - &((struct hole_info) - {sec_ranges, mc_ranges, address_size, - ranges->d_buf})); - coverage_free (ranges_coverage); - } - - if (loc_addrs != NULL) - addr_record_free (loc_addrs); - if (!success || !references_sound) { cu_free (cu_chain); cu_chain = NULL; } + /* Reverse the chain, so that it's organized "naturally". Has + significant impact on performance when handling loc_ref and + range_ref fields in loc/range validation. */ + struct cu *last = NULL; + for (struct cu *it = cu_chain; it != NULL; ) + { + struct cu *next = it->next; + it->next = last; + last = it; + it = next; + } + cu_chain = last; + return cu_chain; } -/* Operands are passed back as attribute forms. In particular, - DW_FORM_dataX for X-byte operands, DW_FORM_[us]data for - ULEB128/SLEB128 operands, and DW_FORM_addr for 32b/64b operands. - If the opcode takes no operands, 0 is passed. - - Return value is false if we couldn't determine (i.e. invalid - opcode). +/* + Returns: + -1 in case of error + +0 in case of no error, but the chain only consisted of a + terminating zero die. + +1 in case some dies were actually loaded */ -static bool -get_location_opcode_operands (uint8_t opcode, uint8_t *op1, uint8_t *op2) -{ - switch (opcode) - { -#define DEF_DW_OP(OPCODE, OP1, OP2) \ - case OPCODE: *op1 = OP1; *op2 = OP2; return true; -# include "expr_opcodes.h" -#undef DEF_DW_OP - default: - return false; - }; -} - -static void -check_location_expression (struct read_ctx *ctx, struct where *wh, bool addr_64) +static int +read_die_chain (struct read_ctx *ctx, + struct cu *cu, + struct abbrev_table *abbrevs, + Elf_Data *strings, + bool dwarf_64, bool addr_64, + struct ref_record *die_refs, + struct ref_record *die_loc_refs, + struct coverage *strings_coverage) { - struct ref_record oprefs; - memset (&oprefs, 0, sizeof (oprefs)); - - struct addr_record opaddrs; - memset (&opaddrs, 0, sizeof (opaddrs)); + bool got_die = false; + uint64_t sibling_addr = 0; + uint64_t die_off, prev_die_off = 0; + struct abbrev *abbrev, *prev_abbrev = NULL; + struct where where = WHERE (sec_info, NULL); while (!read_ctx_eof (ctx)) { - struct where where = WHERE (sec_locexpr, wh); - uint64_t opcode_off = read_ctx_get_offset (ctx); - where_reset_1 (&where, opcode_off); - addr_record_add (&opaddrs, opcode_off); + where = cu->where; + die_off = read_ctx_get_offset (ctx); + /* Shift reported DIE offset by CU offset, to match the way + readelf reports DIEs. */ + where_reset_2 (&where, die_off + cu->offset); - uint8_t opcode; - if (!read_ctx_read_ubyte (ctx, &opcode)) + uint64_t abbr_code; + + prev_die_off = die_off; + if (!checked_read_uleb128 (ctx, &abbr_code, &where, "abbrev code")) + return -1; + + /* Check sibling value advertised last time through the loop. */ + if (sibling_addr != 0) { - wr_error (&where, ": can't read opcode.\n"); - break; + if (abbr_code == 0) + wr_error (&where, + ": is the last sibling in chain, but has a DW_AT_sibling attribute.\n"); + else if (sibling_addr != die_off) + wr_error (&where, ": This DIE should have had its sibling at 0x%" + PRIx64 ", but it's at 0x%" PRIx64 " instead.\n", + sibling_addr, die_off); + sibling_addr = 0; } + else if (prev_abbrev != NULL && prev_abbrev->has_children) + /* Even if it has children, the DIE can't have a sibling + attribute if it's the last DIE in chain. That's the reason + we can't simply check this when loading abbrevs. */ + wr_message (mc_die_rel | mc_acc_suboptimal | mc_impact_4, &where, + ": This DIE had children, but no DW_AT_sibling attribute.\n"); - uint8_t op1, op2; - if (!get_location_opcode_operands (opcode, &op1, &op2)) + /* The section ended. */ + if (abbr_code == 0) + break; + if (read_ctx_eof (ctx)) { - wr_error (&where, ": can't decode opcode \"%s\".\n", - dwarf_locexpr_opcode_string (opcode)); + wr_error (&where, ": DIE chain not terminated with DIE with zero abbrev code.\n"); break; } -#define READ_FORM(OP, STR, PTR) \ - do { \ - if (OP != 0 \ - && !read_ctx_read_form (ctx, addr_64, (OP), \ - PTR, &where, STR " operand")) \ - { \ - wr_error (&where, ": opcode \"%s\"" \ - ": can't read " STR " operand (form \"%s\").\n", \ - dwarf_locexpr_opcode_string (opcode), \ - dwarf_form_string ((OP))); \ - goto out; \ - } \ - } while (0) + prev_die_off = die_off; + got_die = true; - uint64_t value1, value2; - READ_FORM (op1, "1st", &value1); - READ_FORM (op2, "2st", &value2); -#undef READ_FORM + /* Find the abbrev matching the code. */ + abbrev = abbrev_table_find_abbrev (abbrevs, abbr_code); + if (abbrev == NULL) + { + wr_error (&where, + ": abbrev section at 0x%" PRIx64 + " doesn't contain code %" PRIu64 ".\n", + abbrevs->offset, abbr_code); + return -1; + } + abbrev->used = true; - switch (opcode) + addr_record_add (&cu->die_addrs, cu->offset + die_off); + + /* Attribute values. */ + for (struct abbrev_attrib *it = abbrev->attribs; + it->name != 0; ++it) { - case DW_OP_bra: - case DW_OP_skip: + where.ref = &it->where; + + void record_ref (uint64_t addr, struct where *who, bool local) { - int16_t skip = (uint16_t)value1; + struct ref_record *record = &cu->die_refs; + if (local) + { + assert (ctx->end > ctx->begin); + if (addr > (uint64_t)(ctx->end - ctx->begin)) + { + wr_error (&where, + ": invalid reference outside the CU: 0x%" PRIx64 ".\n", + addr); + return; + } - if (skip == 0) - wr_message (mc_loc | mc_acc_bloat | mc_impact_3, &where, - ": %s with skip 0.\n", - dwarf_locexpr_opcode_string (opcode)); - else if (skip > 0 && !read_ctx_need_data (ctx, (size_t)skip)) - wr_error (&where, ": %s branches out of location expression.\n", - dwarf_locexpr_opcode_string (opcode)); - /* Compare with the offset after the two-byte skip value. */ - else if (skip < 0 && ((uint64_t)-skip) > read_ctx_get_offset (ctx)) - wr_error (&where, - ": %s branches before the beginning of location expression.\n", - dwarf_locexpr_opcode_string (opcode)); - else - ref_record_add (&oprefs, opcode_off + skip, &where); + /* Address holds a CU-local reference, so add CU + offset to turn it into section offset. */ + addr += cu->offset; + record = die_loc_refs; + } - break; + if (record != NULL) + ref_record_add (record, addr, who); } - case DW_OP_const8u: - case DW_OP_const8s: - if (!addr_64) - wr_error (&where, ": %s on 32-bit machine.\n", - dwarf_locexpr_opcode_string (opcode)); - break; + uint8_t form = it->form; + bool indirect = form == DW_FORM_indirect; + if (indirect) + { + uint64_t value; + if (!checked_read_uleb128 (ctx, &value, &where, + "indirect attribute form")) + return -1; - default: - if (!addr_64 - && (opcode == DW_OP_constu - || opcode == DW_OP_consts - || opcode == DW_OP_deref_size - || opcode == DW_OP_plus_uconst) - && (value1 > (uint64_t)(uint32_t)-1)) - wr_error (&where, ": %s with operand %#" PRIx64 " on 32-bit machine.\n", - dwarf_locexpr_opcode_string (opcode), value1); - }; - } + if (!attrib_form_valid (value)) + { + wr_error (&where, + ": invalid indirect form 0x%" PRIx64 ".\n", value); + return -1; + } + form = value; - out: - for (size_t i = 0; i < oprefs.size; ++i) - { - struct ref *ref = oprefs.refs + i; - if (!addr_record_has_addr (&opaddrs, ref->addr)) - wr_error (&ref->who, - ": unresolved reference to opcode at %#" PRIx64 ".\n", - ref->addr); - } + if (it->name == DW_AT_sibling) + switch (check_sibling_form (form)) + { + case -1: + wr_message (mc_die_rel | mc_impact_2, &where, + ": DW_AT_sibling attribute with (indirect) form DW_FORM_ref_addr.\n"); + break; - addr_record_free (&opaddrs); - ref_record_free (&oprefs); -} - -static bool -check_loc_or_range_ref (struct read_ctx *ctx, - struct cu *cu, - struct coverage *coverage, - struct addr_record *addrs, - uint64_t addr, - bool addr_64, - struct where *wh, - enum message_category cat, - enum section_id sec) -{ - assert (sec == sec_loc || sec == sec_ranges); - assert (cat == mc_loc || cat == mc_ranges); - - if (coverage == NULL) - return true; - - if (!read_ctx_skip (ctx, addr)) - { - wr_error (wh, ": invalid reference outside the section " - "%#" PRIx64 ", size only %#tx.\n", - addr, ctx->end - ctx->begin); - return false; - } - - bool retval = true; - bool contains_locations = sec == sec_loc; - - if (coverage_is_covered (coverage, addr)) - { - if (!addr_record_has_addr (addrs, addr)) - { - wr_error (wh, ": reference to 0x%" PRIx64 - " points at the middle of location or range list.\n", addr); - retval = false; - } - else - return true; - } - else - addr_record_add (addrs, addr); - - uint64_t escape = addr_64 ? (uint64_t)-1 : (uint64_t)(uint32_t)-1; - - bool overlap = false; - uint64_t base = cu->base; - while (!read_ctx_eof (ctx)) - { - struct where where = WHERE (sec, wh); - where_reset_1 (&where, read_ctx_get_offset (ctx)); - -#define HAVE_OVERLAP \ - do { \ - wr_error (&where, ": range definitions overlap.\n"); \ - retval = false; \ - overlap = true; \ - } while (0) - - /* begin address */ - uint64_t begin_addr; - if (!overlap - && !coverage_pristine (coverage, - read_ctx_get_offset (ctx), - addr_64 ? 8 : 4)) - HAVE_OVERLAP; - - if (!read_ctx_read_offset (ctx, addr_64, &begin_addr)) - { - wr_error (&where, ": can't read address range beginning.\n"); - return false; - } - - /* end address */ - uint64_t end_addr; - if (!overlap - && !coverage_pristine (coverage, - read_ctx_get_offset (ctx), - addr_64 ? 8 : 4)) - HAVE_OVERLAP; - - if (!read_ctx_read_offset (ctx, addr_64, &end_addr)) - { - wr_error (&where, ": can't read address range ending.\n"); - return false; - } - - bool done = begin_addr == 0 && end_addr == 0; - - if (!done && begin_addr != escape) - { - if (base == (uint64_t)-1) - { - wr_error (&where, ": address range with no base address set.\n"); - base = (uint64_t)-2; /* Only report once. */ - } - - if (end_addr < begin_addr) - wr_message (cat | mc_error, &where, - ": has negative range 0x%" PRIx64 "..0x%" PRIx64 ".\n", - begin_addr, end_addr); - else if (begin_addr == end_addr) - /* 2.6.6: A location list entry [...] whose beginning - and ending addresses are equal has no effect. */ - wr_message (cat | mc_acc_bloat | mc_impact_3, &where, - ": entry covers no range.\n"); - - if (contains_locations) - { - /* location expression length */ - uint16_t len; - if (!overlap - && !coverage_pristine (coverage, - read_ctx_get_offset (ctx), 2)) - HAVE_OVERLAP; - - if (!read_ctx_read_2ubyte (ctx, &len)) - { - wr_error (&where, ": can't read length of location expression.\n"); - return false; - } - - /* location expression itself */ - struct read_ctx expr_ctx; - if (!read_ctx_init_sub (&expr_ctx, ctx, ctx->ptr, ctx->ptr + len)) - { - not_enough: - wr_error (&where, PRI_NOT_ENOUGH, "location expression"); - return false; - } - - uint64_t expr_start = read_ctx_get_offset (ctx); - check_location_expression (&expr_ctx, &where, addr_64); - uint64_t expr_end = read_ctx_get_offset (ctx); - if (!overlap - && !coverage_pristine (coverage, - expr_start, expr_end - expr_start)) - HAVE_OVERLAP; - - if (!read_ctx_skip (ctx, len)) - /* "can't happen" */ - goto not_enough; - } - } - else if (!done) - { - if (end_addr == base) - wr_message (cat | mc_acc_bloat | mc_impact_3, &where, - ": base address selection doesn't change base address" - " (%#" PRIx64 ").\n", base); - else - base = end_addr; - } -#undef HAVE_OVERLAP - - coverage_add (coverage, where.addr1, read_ctx_get_offset (ctx) - 1); - if (done) - break; - } - - return retval; -} - - -/* - Returns: - -1 in case of error - +0 in case of no error, but the chain only consisted of a - terminating zero die. - +1 in case some dies were actually loaded - */ -static int -read_die_chain (struct read_ctx *ctx, - struct cu *cu, - struct abbrev_table *abbrevs, - Elf_Data *strings, - Elf_Data *loc, - Elf_Data *ranges, - bool dwarf_64, bool addr_64, - struct ref_record *die_refs, - struct ref_record *die_loc_refs, - struct addr_record *loc_addrs, - struct addr_record *ranges_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage, - struct coverage *ranges_coverage) -{ - bool got_die = false; - uint64_t sibling_addr = 0; - uint64_t die_off, prev_die_off = 0; - struct abbrev *abbrev, *prev_abbrev = NULL; - struct where where = WHERE (sec_info, NULL); - - while (!read_ctx_eof (ctx)) - { - where = cu->where; - die_off = read_ctx_get_offset (ctx); - /* Shift reported DIE offset by CU offset, to match the way - readelf reports DIEs. */ - where_reset_2 (&where, die_off + cu->offset); - - uint64_t abbr_code; - - prev_die_off = die_off; - if (!checked_read_uleb128 (ctx, &abbr_code, &where, "abbrev code")) - return -1; - - /* Check sibling value advertised last time through the loop. */ - if (sibling_addr != 0) - { - if (abbr_code == 0) - wr_error (&where, - ": is the last sibling in chain, but has a DW_AT_sibling attribute.\n"); - else if (sibling_addr != die_off) - wr_error (&where, ": This DIE should have had its sibling at 0x%" - PRIx64 ", but it's at 0x%" PRIx64 " instead.\n", - sibling_addr, die_off); - sibling_addr = 0; - } - else if (prev_abbrev != NULL && prev_abbrev->has_children) - /* Even if it has children, the DIE can't have a sibling - attribute if it's the last DIE in chain. That's the reason - we can't simply check this when loading abbrevs. */ - wr_message (mc_die_rel | mc_acc_suboptimal | mc_impact_4, &where, - ": This DIE had children, but no DW_AT_sibling attribute.\n"); - - /* The section ended. */ - if (abbr_code == 0) - break; - if (read_ctx_eof (ctx)) - { - wr_error (&where, ": DIE chain not terminated with DIE with zero abbrev code.\n"); - break; - } - - prev_die_off = die_off; - got_die = true; - - /* Find the abbrev matching the code. */ - abbrev = abbrev_table_find_abbrev (abbrevs, abbr_code); - if (abbrev == NULL) - { - wr_error (&where, - ": abbrev section at 0x%" PRIx64 - " doesn't contain code %" PRIu64 ".\n", - abbrevs->offset, abbr_code); - return -1; - } - abbrev->used = true; - - addr_record_add (&cu->die_addrs, cu->offset + die_off); - - /* Attribute values. */ - for (struct abbrev_attrib *it = abbrev->attribs; - it->name != 0; ++it) - { - where.ref = &it->where; - - void record_ref (uint64_t addr, struct where *who, bool local) - { - struct ref_record *record = &cu->die_refs; - if (local) - { - assert (ctx->end > ctx->begin); - if (addr > (uint64_t)(ctx->end - ctx->begin)) - { - wr_error (&where, - ": invalid reference outside the CU: 0x%" PRIx64 ".\n", - addr); - return; - } - - /* Address holds a CU-local reference, so add CU - offset to turn it into section offset. */ - addr += cu->offset; - record = die_loc_refs; - } - - if (record != NULL) - ref_record_add (record, addr, who); - } - - uint8_t form = it->form; - bool indirect = form == DW_FORM_indirect; - if (indirect) - { - uint64_t value; - if (!checked_read_uleb128 (ctx, &value, &where, - "indirect attribute form")) - return -1; - - if (!attrib_form_valid (value)) - { - wr_error (&where, - ": invalid indirect form 0x%" PRIx64 ".\n", value); - return -1; - } - form = value; - - if (it->name == DW_AT_sibling) - switch (check_sibling_form (form)) - { - case -1: - wr_message (mc_die_rel | mc_impact_2, &where, - ": DW_AT_sibling attribute with (indirect) form DW_FORM_ref_addr.\n"); - break; - - case -2: - wr_error (&where, - ": DW_AT_sibling attribute with non-reference (indirect) form \"%s\".\n", - dwarf_form_string (value)); - }; - } + case -2: + wr_error (&where, + ": DW_AT_sibling attribute with non-reference (indirect) form \"%s\".\n", + dwarf_form_string (value)); + }; + } bool check_locptr = false; if (is_location_attrib (it->name)) @@ -2551,27 +2236,15 @@ read_die_chain (struct read_ctx *ctx, sibling_addr = value; else if (check_locptr || check_rangeptr) { - Elf_Data *d = check_locptr ? loc : ranges; - struct coverage *cov - = check_locptr ? loc_coverage : ranges_coverage; - struct addr_record *rec - = check_locptr ? loc_addrs : ranges_addrs; - enum message_category cat - = check_locptr ? mc_loc : mc_ranges; - enum section_id sec_id - = check_locptr ? sec_loc : sec_ranges; - if (check_rangeptr && (value % cu->address_size != 0)) wr_message (mc_ranges | mc_impact_4, &where, ": rangeptr value %#" PRIx32 " not aligned to CU address size.\n", value); - struct read_ctx sub_ctx; - read_ctx_init (&sub_ctx, ctx->dbg, d); - check_loc_or_range_ref (&sub_ctx, cu, cov, - rec, value, addr_64, &where, - cat, sec_id); + struct ref_record *ref + = check_locptr ? &cu->loc_refs : &cu->range_refs; + ref_record_add (ref, value, &where); } else if (it->form == DW_FORM_ref4) record_ref (value, &where, true); @@ -2589,27 +2262,15 @@ read_die_chain (struct read_ctx *ctx, sibling_addr = value; else if (check_locptr || check_rangeptr) { - Elf_Data *d = check_locptr ? loc : ranges; - struct coverage *cov - = check_locptr ? loc_coverage : ranges_coverage; - struct addr_record *rec - = check_locptr ? loc_addrs : ranges_addrs; - enum message_category cat - = check_locptr ? mc_loc : mc_ranges; - enum section_id sec_id - = check_locptr ? sec_loc : sec_ranges; - if (check_rangeptr && (value % cu->address_size != 0)) wr_message (mc_ranges | mc_impact_4, &where, ": rangeptr value %#" PRIx64 " not aligned to CU address size.\n", value); - struct read_ctx sub_ctx; - read_ctx_init (&sub_ctx, ctx->dbg, d); - check_loc_or_range_ref (&sub_ctx, cu, cov, - rec, value, addr_64, &where, - cat, sec_id); + struct ref_record *ref + = check_locptr ? &cu->loc_refs : &cu->range_refs; + ref_record_add (ref, value, &where); } else if (it->form == DW_FORM_ref8) record_ref (value, &where, true); @@ -2665,221 +2326,518 @@ read_die_chain (struct read_ctx *ctx, check_location_expression (&sub_ctx, &where, addr_64); } - if (!read_ctx_skip (ctx, length)) - goto cant_read; + if (!read_ctx_skip (ctx, length)) + goto cant_read; + + break; + } + + case DW_FORM_indirect: + wr_error (&where, ": indirect form is again indirect.\n"); + return -1; + + default: + wr_error (&where, + ": internal error: unhandled form 0x%x\n", it->form); + } + } + + if (abbrev->has_children) + { + int st = read_die_chain (ctx, cu, abbrevs, strings, + dwarf_64, addr_64, + die_refs, die_loc_refs, + strings_coverage); + if (st == -1) + return -1; + else if (st == 0) + wr_message (mc_impact_3 | mc_acc_suboptimal | mc_die_rel, + &where, + ": Abbrev has_children, but the chain was empty.\n"); + } + } + + if (sibling_addr != 0) + wr_error (&where, + ": This DIE should have had its sibling at 0x%" + PRIx64 ", but the DIE chain ended.\n", sibling_addr); + + return got_die ? 1 : 0; +} + +static bool +read_version (struct read_ctx *ctx, bool dwarf_64, + uint16_t *versionp, struct where *wh) +{ + bool retval = read_ctx_read_2ubyte (ctx, versionp); + + if (!retval) + { + wr_error (wh, ": can't read version.\n"); + return false; + } + + if (*versionp < 2 || *versionp > 3) + { + wr_error (wh, ": %s version %d.\n", + (*versionp < 2 ? "invalid" : "unsupported"), *versionp); + return false; + } + + if (*versionp == 2 && dwarf_64) + /* Keep going. It's a standard violation, but we may still be + able to read the unit under consideration and do high-level + checks. */ + wr_error (wh, ": invalid 64-bit unit in DWARF 2 format.\n"); + + return true; +} + +static bool +check_cu_structural (struct read_ctx *ctx, + struct cu *const cu, + struct abbrev_table *abbrev_chain, + Elf_Data *strings, + bool dwarf_64, + struct ref_record *die_refs, + struct coverage *strings_coverage) +{ + uint16_t version; + uint64_t abbrev_offset; + uint8_t address_size; + + /* Version. */ + if (!read_version (ctx, dwarf_64, &version, &cu->where)) + return false; + + /* Abbrev offset. */ + if (!read_ctx_read_offset (ctx, dwarf_64, &abbrev_offset)) + { + wr_error (&cu->where, ": can't read abbrev offset.\n"); + return false; + } + + /* Address size. */ + if (!read_ctx_read_ubyte (ctx, &address_size)) + { + wr_error (&cu->where, ": can't read address size.\n"); + return false; + } + if (address_size != 4 && address_size != 8) + { + wr_error (&cu->where, + ": invalid address size: %d (only 4 or 8 allowed).\n", + address_size); + return false; + } + cu->address_size = address_size; + + struct abbrev_table *abbrevs = abbrev_chain; + for (; abbrevs != NULL; abbrevs = abbrevs->next) + if (abbrevs->offset == abbrev_offset) + break; + + if (abbrevs == NULL) + { + wr_error (&cu->where, + ": couldn't find abbrev section with offset 0x%" PRIx64 ".\n", + abbrev_offset); + return false; + } + + struct ref_record die_loc_refs; + memset (&die_loc_refs, 0, sizeof (die_loc_refs)); + + bool retval = true; + if (read_die_chain (ctx, cu, abbrevs, strings, + dwarf_64, address_size == 8, + die_refs, &die_loc_refs, + strings_coverage) >= 0) + { + for (size_t i = 0; i < abbrevs->size; ++i) + if (!abbrevs->abbr[i].used) + wr_message (mc_impact_3 | mc_acc_bloat | mc_abbrevs, &cu->where, + ": abbreviation with code %" PRIu64 " is never used.\n", + abbrevs->abbr[i].code); + + if (!check_die_references (cu, &die_loc_refs)) + retval = false; + } + else + retval = false; + + ref_record_free (&die_loc_refs); + return retval; +} + + +static bool +check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain) +{ + struct where where = WHERE (sec_aranges, NULL); + bool retval = true; + bool cov_retval = true; + + Elf *elf = ctx->dbg->elf; + struct section_coverage + { + Elf_Scn *scn; + GElf_Shdr shdr; + struct coverage cov; + bool hit; /* true if COV is not pristine. */ + }; + struct coverage_map + { + size_t size; + size_t alloc; + struct section_coverage *scos; + }; + + struct coverage_map coverage_map; + memset (&coverage_map, 0, sizeof (coverage_map)); + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (elf, &ehdr_mem); + if (ehdr == NULL) + { + invalid_elf: + wr_error (&where, + ": couldn't read ELF, skipping coverage analysis.\n"); + retval = false; + } + else + for (size_t i = 0; i < ehdr->e_shnum; ++i) + { + Elf_Scn *scn = elf_getscn (elf, i); + if (scn == NULL) + goto invalid_elf; + + GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem); + if (shdr == NULL) + goto invalid_elf; + + if ((shdr->sh_flags & SHF_ALLOC) && (shdr->sh_flags & SHF_EXECINSTR)) + { + REALLOC (&coverage_map, scos); + struct section_coverage *sco + = coverage_map.scos + coverage_map.size++; + sco->scn = scn; + sco->shdr = *shdr; + coverage_init (&sco->cov, shdr->sh_size); + sco->hit = false; + } + } + + while (!read_ctx_eof (ctx)) + { + where_reset_1 (&where, read_ctx_get_offset (ctx)); + const unsigned char *atab_begin = ctx->ptr; + + /* Size. */ + uint32_t size32; + uint64_t size; + bool dwarf_64; + if (!read_ctx_read_4ubyte (ctx, &size32)) + { + wr_error (&where, ": can't read table length.\n"); + return false; + } + if (!read_size_extra (ctx, size32, &size, &dwarf_64, &where)) + return false; + + struct read_ctx sub_ctx; + const unsigned char *atab_end = ctx->ptr + size; + if (!read_ctx_init_sub (&sub_ctx, ctx, atab_begin, atab_end)) + { + not_enough: + wr_error (&where, PRI_NOT_ENOUGH, "next table"); + return false; + } + + sub_ctx.ptr = ctx->ptr; + + /* Version. */ + uint16_t version; + if (!read_version (&sub_ctx, dwarf_64, &version, &where)) + { + retval = false; + goto next; + } + + /* CU offset. */ + uint64_t cu_off; + if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &cu_off)) + { + wr_error (&where, ": can't read debug info offset.\n"); + retval = false; + goto next; + } + struct cu *cu = NULL; + if (cu_chain != NULL && (cu = cu_find_cu (cu_chain, cu_off)) == NULL) + wr_error (&where, ": unresolved reference to " PRI_CU ".\n", cu_off); + if (cu != NULL) + { + where.ref = &cu->where; + if (cu->has_arange) + wr_message (mc_impact_2 | mc_aranges, &where, + ": there has already been arange section for this CU.\n"); + else + cu->has_arange = true; + } + + /* Address size. */ + uint8_t address_size; + if (!read_ctx_read_ubyte (&sub_ctx, &address_size)) + { + wr_error (&where, ": can't read address size.\n"); + retval = false; + goto next; + } + if (cu != NULL) + { + if (address_size != cu->address_size) + { + wr_error (&where, + ": address size %d doesn't match referred CU.\n", + address_size); + retval = false; + } + } + /* Try to parse it anyway, unless the address size is wacky. */ + else if (address_size != 4 && address_size != 8) + { + wr_error (&where, ": invalid address size: %d.\n", address_size); + retval = false; + goto next; + } - break; - } + /* Segment size. */ + uint8_t segment_size; + if (!read_ctx_read_ubyte (&sub_ctx, &segment_size)) + { + wr_error (&where, ": can't read unit segment size.\n"); + retval = false; + goto next; + } + if (segment_size != 0) + { + wr_warning (&where, ": dwarflint can't handle segment_size != 0.\n"); + retval = false; + goto next; + } - case DW_FORM_indirect: - wr_error (&where, ": indirect form is again indirect.\n"); - return -1; - default: - wr_error (&where, - ": internal error: unhandled form 0x%x\n", it->form); + /* 7.20: The first tuple following the header in each set begins + at an offset that is a multiple of the size of a single tuple + (that is, twice the size of an address). The header is + padded, if necessary, to the appropriate boundary. */ + const uint8_t tuple_size = 2 * address_size; + uint64_t off = read_ctx_get_offset (&sub_ctx); + if ((off % tuple_size) != 0) + { + uint64_t noff = ((off / tuple_size) + 1) * tuple_size; + for (uint64_t i = off; i < noff; ++i) + { + uint8_t c; + if (!read_ctx_read_ubyte (&sub_ctx, &c)) + { + wr_error (&where, + ": section ends after the header, but before the first entry.\n"); + retval = false; + goto next; + } + if (c != 0) + wr_message (mc_impact_2 | mc_aranges, &where, + ": non-zero byte at 0x%" PRIx64 + " in padding before the first entry.\n", + read_ctx_get_offset (&sub_ctx)); } } + assert ((read_ctx_get_offset (&sub_ctx) % tuple_size) == 0); - if (abbrev->has_children) + while (!read_ctx_eof (&sub_ctx)) { - int st = read_die_chain (ctx, cu, abbrevs, strings, - loc, ranges, - dwarf_64, addr_64, - die_refs, die_loc_refs, - loc_addrs, ranges_addrs, - strings_coverage, - loc_coverage, ranges_coverage); - if (st == -1) - return -1; - else if (st == 0) - wr_message (mc_impact_3 | mc_acc_suboptimal | mc_die_rel, - &where, - ": Abbrev has_children, but the chain was empty.\n"); - } - } + where_reset_2 (&where, read_ctx_get_offset (&sub_ctx)); + uint64_t address, length; + if (!read_ctx_read_var (&sub_ctx, address_size, &address)) + { + wr_error (&where, ": can't read address field.\n"); + retval = false; + goto next; + } + if (!read_ctx_read_var (&sub_ctx, address_size, &length)) + { + wr_error (&where, ": can't read length field.\n"); + retval = false; + goto next; + } - if (sibling_addr != 0) - wr_error (&where, - ": This DIE should have had its sibling at 0x%" - PRIx64 ", but the DIE chain ended.\n", sibling_addr); + if (address == 0 && length == 0) + break; - return got_die ? 1 : 0; -} + /* Coverage analysis. Skip if we have errors. */ + if (retval) + { + bool found = false; + bool crosses_boundary = false; + uint64_t end = address + length; -static bool -read_version (struct read_ctx *ctx, bool dwarf_64, - uint16_t *versionp, struct where *wh) -{ - bool retval = read_ctx_read_2ubyte (ctx, versionp); + /* This is for analyzing how much of the current range + falls into AX sections. Whatever is left uncovered + doesn't fall into sections that describe IP values. */ + struct coverage range_cov; + coverage_init (&range_cov, length); - if (!retval) - { - wr_error (wh, ": can't read version.\n"); - return false; - } + for (size_t i = 0; i < coverage_map.size; ++i) + { + struct section_coverage *sco = coverage_map.scos + i; + GElf_Shdr *shdr = &sco->shdr; + struct coverage *cov = &sco->cov; - if (*versionp < 2 || *versionp > 3) - { - wr_error (wh, ": %s version %d.\n", - (*versionp < 2 ? "invalid" : "unsupported"), *versionp); - return false; - } + Elf64_Addr s_end = shdr->sh_addr + shdr->sh_size; + if (end < shdr->sh_addr || address >= s_end) + /* no overlap */ + continue; - if (*versionp == 2 && dwarf_64) - /* Keep going. It's a standard violation, but we may still be - able to read the unit under consideration and do high-level - checks. */ - wr_error (wh, ": invalid 64-bit unit in DWARF 2 format.\n"); + if (found && !crosses_boundary) + { + /* While probably not an error, it's very suspicious. */ + wr_message (mc_aranges | mc_impact_2, &where, + ": arange crosses section boundaries.\n"); + cov_retval = false; + crosses_boundary = true; + } - return true; -} + found = true; -static bool -check_cu_structural (struct read_ctx *ctx, - struct cu *const cu, - struct abbrev_table *abbrev_chain, - Elf_Data *strings, - Elf_Data *loc, - Elf_Data *ranges, - bool dwarf_64, - struct ref_record *die_refs, - struct addr_record *loc_addrs, - struct addr_record *ranges_addrs, - struct coverage *strings_coverage, - struct coverage *loc_coverage, - struct coverage *ranges_coverage) -{ - uint16_t version; - uint64_t abbrev_offset; - uint8_t address_size; + uint64_t cov_begin + = address < shdr->sh_addr ? 0 : address - shdr->sh_addr; + uint64_t cov_end + = (end < s_end ? end - shdr->sh_addr + : shdr->sh_size) - 1; /* -1 because coverage + endpoint is inclusive. */ - /* Version. */ - if (!read_version (ctx, dwarf_64, &version, &cu->where)) - return false; + uint64_t r_cov_begin = cov_begin + shdr->sh_addr - address; + uint64_t r_cov_end = cov_end + shdr->sh_addr - address; - /* Abbrev offset. */ - if (!read_ctx_read_offset (ctx, dwarf_64, &abbrev_offset)) - { - wr_error (&cu->where, ": can't read abbrev offset.\n"); - return false; - } + if (!coverage_pristine (cov, cov_begin, cov_end - cov_begin)) + /* Not a show stopper, this shouldn't derain high-level. ou*/ + wr_message (mc_aranges | mc_impact_2 | mc_error, &where, + ": address range %#" PRIx64 "..%#" PRIx64 + " overlaps with another one defined earlier.\n", + address, end); - /* Address size. */ - if (!read_ctx_read_ubyte (ctx, &address_size)) - { - wr_error (&cu->where, ": can't read address size.\n"); - return false; - } - if (address_size != 4 && address_size != 8) - { - wr_error (&cu->where, - ": invalid address size: %d (only 4 or 8 allowed).\n", - address_size); - return false; + /* Section coverage... */ + coverage_add (cov, cov_begin, cov_end); + sco->hit = true; + + /* And range coverage... */ + coverage_add (&range_cov, r_cov_begin, r_cov_end); + } + + if (!found) + { + /* Not a show stopper. */ + wr_error (&where, + ": couldn't find a section that the range %#" + PRIx64 "..%#" PRIx64 " covers.\n", address, end); + continue; + } + else + { + void range_hole (uint64_t h_begin, uint64_t h_end, + void *user __attribute__ ((unused))) + { + wr_error (&where, + ": portion %#" PRIx64 "..%#" PRIx64 + ", of the range %#" PRIx64 "..%#" PRIx64 + " doesn't fall into any ALLOC & EXEC section.\n", + h_begin + address, h_end + address, + address, end); + } + coverage_find_holes (&range_cov, range_hole, NULL); + } + } + } + + if (sub_ctx.ptr != sub_ctx.end + && !check_zero_padding (&sub_ctx, mc_pubtables, + &WHERE (where.section, NULL))) + { + wr_message_padding_n0 (mc_pubtables | mc_error, + &WHERE (where.section, NULL), + read_ctx_get_offset (&sub_ctx), size); + retval = false; + } + + next: + if (!read_ctx_skip (ctx, size)) + /* A "can't happen" error. */ + goto not_enough; } - cu->address_size = address_size; - struct abbrev_table *abbrevs = abbrev_chain; - for (; abbrevs != NULL; abbrevs = abbrevs->next) - if (abbrevs->offset == abbrev_offset) - break; + if (retval) + for (size_t i = 0; i < coverage_map.size; ++i) + { + struct section_coverage *sco = coverage_map.scos + i; + Elf_Data *data = elf_getdata (sco->scn, NULL); + if (data == NULL) + wr_error (&WHERE (sec_aranges, NULL), + ": couldn't read section data, coverage analysis may be inaccurate.\n"); + else if (data->d_buf == NULL) + wr_error (&WHERE (sec_aranges, NULL), + ": data-less section data, coverage analysis may be inaccurate.\n"); - if (abbrevs == NULL) - { - wr_error (&cu->where, - ": couldn't find abbrev section with offset 0x%" PRIx64 ".\n", - abbrev_offset); - return false; - } + void section_hole (uint64_t h_begin, uint64_t h_end, + void *user __attribute__ ((unused))) + { + const char *scnname = elf_strptr (elf, ehdr->e_shstrndx, + sco->shdr.sh_name); - struct ref_record die_loc_refs; - memset (&die_loc_refs, 0, sizeof (die_loc_refs)); + /* We don't expect some sections to be covered. But if they + are at least partially covered, we expect the same + coverage criteria as for .text. */ + if (!sco->hit + && (strcmp (scnname, ".init") == 0 + || strcmp (scnname, ".fini") == 0 + || strcmp (scnname, ".plt") == 0)) + return; - bool retval = true; - if (read_die_chain (ctx, cu, abbrevs, strings, loc, ranges, - dwarf_64, address_size == 8, - die_refs, &die_loc_refs, - loc_addrs, ranges_addrs, - strings_coverage, - loc_coverage, ranges_coverage) >= 0) - { - for (size_t i = 0; i < abbrevs->size; ++i) - if (!abbrevs->abbr[i].used) - wr_message (mc_impact_3 | mc_acc_bloat | mc_abbrevs, &cu->where, - ": abbreviation with code %" PRIu64 " is never used.\n", - abbrevs->abbr[i].code); + uint64_t base = sco->shdr.sh_addr; + if (data != NULL && data->d_buf != NULL) + { + bool zeroes = true; + for (uint64_t j = h_begin; j < h_end; ++j) + if (((char *)data->d_buf)[j] != 0) + { + zeroes = false; + break; + } + if (!zeroes) + return; + } - if (!check_die_references (cu, &die_loc_refs)) - retval = false; - } - else - retval = false; + wr_error (&where, + ": addresses %#" PRIx64 "..%#" PRIx64 + " of section %s are not covered.\n", + h_begin + base, h_end + base, scnname); + } - ref_record_free (&die_loc_refs); - return retval; -} + coverage_find_holes (&sco->cov, section_hole, NULL); + } + return retval && cov_retval; +} static bool -check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain) +check_pub_structural (struct read_ctx *ctx, struct cu *cu_chain, + enum section_id sec) { - struct where where = WHERE (sec_aranges, NULL); bool retval = true; - bool cov_retval = true; - - Elf *elf = ctx->dbg->elf; - struct section_coverage - { - Elf_Scn *scn; - GElf_Shdr shdr; - struct coverage cov; - bool hit; /* true if COV is not pristine. */ - }; - struct coverage_map - { - size_t size; - size_t alloc; - struct section_coverage *scos; - }; - - struct coverage_map coverage_map; - memset (&coverage_map, 0, sizeof (coverage_map)); - GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (elf, &ehdr_mem); - if (ehdr == NULL) - { - invalid_elf: - wr_error (&where, - ": couldn't read ELF, skipping coverage analysis.\n"); - retval = false; - } - else - for (size_t i = 0; i < ehdr->e_shnum; ++i) - { - Elf_Scn *scn = elf_getscn (elf, i); - if (scn == NULL) - goto invalid_elf; - - GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem); - if (shdr == NULL) - goto invalid_elf; - - if ((shdr->sh_flags & SHF_ALLOC) && (shdr->sh_flags & SHF_EXECINSTR)) - { - REALLOC (&coverage_map, scos); - struct section_coverage *sco - = coverage_map.scos + coverage_map.size++; - sco->scn = scn; - sco->shdr = *shdr; - coverage_init (&sco->cov, shdr->sh_size); - sco->hit = false; - } - } while (!read_ctx_eof (ctx)) { + struct where where = WHERE (sec, NULL); where_reset_1 (&where, read_ctx_get_offset (ctx)); - const unsigned char *atab_begin = ctx->ptr; + const unsigned char *set_begin = ctx->ptr; /* Size. */ uint32_t size32; @@ -2894,26 +2852,32 @@ check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain) return false; struct read_ctx sub_ctx; - const unsigned char *atab_end = ctx->ptr + size; - if (!read_ctx_init_sub (&sub_ctx, ctx, atab_begin, atab_end)) + const unsigned char *set_end = ctx->ptr + size; + if (!read_ctx_init_sub (&sub_ctx, ctx, set_begin, set_end)) { - not_enough: - wr_error (&where, PRI_NOT_ENOUGH, "next table"); + wr_error (&where, PRI_NOT_ENOUGH, "next set"); return false; } - sub_ctx.ptr = ctx->ptr; /* Version. */ uint16_t version; - if (!read_version (&sub_ctx, dwarf_64, &version, &where)) + if (!read_ctx_read_2ubyte (&sub_ctx, &version)) + { + wr_error (&where, ": can't read set version.\n"); + retval = false; + goto next; + } + if (version != 2) { + wr_error (&where, ": %s set version.\n", + version < 2 ? "invalid" : "unsupported"); retval = false; goto next; } /* CU offset. */ - uint64_t cu_off; + uint64_t cu_off = 0; if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &cu_off)) { wr_error (&where, ": can't read debug info offset.\n"); @@ -2926,393 +2890,422 @@ check_aranges_structural (struct read_ctx *ctx, struct cu *cu_chain) if (cu != NULL) { where.ref = &cu->where; - if (cu->has_arange) + bool *has = sec == sec_pubnames + ? &cu->has_pubnames : &cu->has_pubtypes; + if (*has) wr_message (mc_impact_2 | mc_aranges, &where, - ": there has already been arange section for this CU.\n"); + ": there has already been section for this CU.\n"); else - cu->has_arange = true; - } - - /* Address size. */ - uint8_t address_size; - if (!read_ctx_read_ubyte (&sub_ctx, &address_size)) - { - wr_error (&where, ": can't read address size.\n"); - retval = false; - goto next; - } - if (cu != NULL) - { - if (address_size != cu->address_size) - { - wr_error (&where, - ": address size %d doesn't match referred CU.\n", - address_size); - retval = false; - } - } - /* Try to parse it anyway, unless the address size is wacky. */ - else if (address_size != 4 && address_size != 8) - { - wr_error (&where, ": invalid address size: %d.\n", address_size); - retval = false; - goto next; + *has = true; } - /* Segment size. */ - uint8_t segment_size; - if (!read_ctx_read_ubyte (&sub_ctx, &segment_size)) + /* Covered length. */ + uint64_t cu_len; + if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &cu_len)) { - wr_error (&where, ": can't read unit segment size.\n"); + wr_error (&where, ": can't read covered length.\n"); retval = false; goto next; } - if (segment_size != 0) + if (cu != NULL && cu_len != cu->length) { - wr_warning (&where, ": dwarflint can't handle segment_size != 0.\n"); + wr_error (&where, + ": the table covers length %" PRId64 + " but CU has length %" PRId64 ".\n", cu_len, cu->length); retval = false; goto next; } - - /* 7.20: The first tuple following the header in each set begins - at an offset that is a multiple of the size of a single tuple - (that is, twice the size of an address). The header is - padded, if necessary, to the appropriate boundary. */ - const uint8_t tuple_size = 2 * address_size; - uint64_t off = read_ctx_get_offset (&sub_ctx); - if ((off % tuple_size) != 0) - { - uint64_t noff = ((off / tuple_size) + 1) * tuple_size; - for (uint64_t i = off; i < noff; ++i) - { - uint8_t c; - if (!read_ctx_read_ubyte (&sub_ctx, &c)) - { - wr_error (&where, - ": section ends after the header, but before the first entry.\n"); - retval = false; - goto next; - } - if (c != 0) - wr_message (mc_impact_2 | mc_aranges, &where, - ": non-zero byte at 0x%" PRIx64 - " in padding before the first entry.\n", - read_ctx_get_offset (&sub_ctx)); - } - } - assert ((read_ctx_get_offset (&sub_ctx) % tuple_size) == 0); - + /* Records... */ while (!read_ctx_eof (&sub_ctx)) { - where_reset_2 (&where, read_ctx_get_offset (&sub_ctx)); - uint64_t address, length; - if (!read_ctx_read_var (&sub_ctx, address_size, &address)) + uint64_t pair_off = read_ctx_get_offset (&sub_ctx); + where_reset_2 (&where, pair_off); + + uint64_t offset; + if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &offset)) { - wr_error (&where, ": can't read address field.\n"); + wr_error (&where, ": can't read offset field.\n"); retval = false; goto next; } - if (!read_ctx_read_var (&sub_ctx, address_size, &length)) + if (offset == 0) + break; + + if (cu != NULL + && !addr_record_has_addr (&cu->die_addrs, offset + cu->offset)) { - wr_error (&where, ": can't read length field.\n"); + wr_error (&where, + ": unresolved reference to " PRI_DIE ".\n", offset); retval = false; goto next; } - if (address == 0 && length == 0) - break; - - /* Coverage analysis. Skip if we have errors. */ - if (retval) - { - bool found = false; - bool crosses_boundary = false; - uint64_t end = address + length; - - /* This is for analyzing how much of the current range - falls into AX sections. Whatever is left uncovered - doesn't fall into sections that describe IP values. */ - struct coverage range_cov; - coverage_init (&range_cov, length); + uint8_t c; + do + if (!read_ctx_read_ubyte (&sub_ctx, &c)) + { + wr_error (&where, ": can't read symbol name.\n"); + retval = false; + goto next; + } + while (c); + } - for (size_t i = 0; i < coverage_map.size; ++i) - { - struct section_coverage *sco = coverage_map.scos + i; - GElf_Shdr *shdr = &sco->shdr; - struct coverage *cov = &sco->cov; + if (sub_ctx.ptr != sub_ctx.end + && !check_zero_padding (&sub_ctx, mc_pubtables, &WHERE (sec, NULL))) + { + wr_message_padding_n0 (mc_pubtables | mc_error, &WHERE (sec, NULL), + read_ctx_get_offset (&sub_ctx), size); + retval = false; + } - Elf64_Addr s_end = shdr->sh_addr + shdr->sh_size; - if (end < shdr->sh_addr || address >= s_end) - /* no overlap */ - continue; + next: + ctx->ptr += size; + } - if (found && !crosses_boundary) - { - /* While probably not an error, it's very suspicious. */ - wr_message (mc_aranges | mc_impact_2, &where, - ": arange crosses section boundaries.\n"); - cov_retval = false; - crosses_boundary = true; - } + return retval; +} - found = true; - uint64_t cov_begin - = address < shdr->sh_addr ? 0 : address - shdr->sh_addr; - uint64_t cov_end - = (end < s_end ? end - shdr->sh_addr - : shdr->sh_size) - 1; /* -1 because coverage - endpoint is inclusive. */ +/* Operands are passed back as attribute forms. In particular, + DW_FORM_dataX for X-byte operands, DW_FORM_[us]data for + ULEB128/SLEB128 operands, and DW_FORM_addr for 32b/64b operands. + If the opcode takes no operands, 0 is passed. - uint64_t r_cov_begin = cov_begin + shdr->sh_addr - address; - uint64_t r_cov_end = cov_end + shdr->sh_addr - address; + Return value is false if we couldn't determine (i.e. invalid + opcode). + */ +static bool +get_location_opcode_operands (uint8_t opcode, uint8_t *op1, uint8_t *op2) +{ + switch (opcode) + { +#define DEF_DW_OP(OPCODE, OP1, OP2) \ + case OPCODE: *op1 = OP1; *op2 = OP2; return true; +# include "expr_opcodes.h" +#undef DEF_DW_OP + default: + return false; + }; +} - if (!coverage_pristine (cov, cov_begin, cov_end - cov_begin)) - /* Not a show stopper, this shouldn't derain high-level. ou*/ - wr_message (mc_aranges | mc_impact_2 | mc_error, &where, - ": address range %#" PRIx64 "..%#" PRIx64 - " overlaps with another one defined earlier.\n", - address, end); +static void +check_location_expression (struct read_ctx *ctx, struct where *wh, bool addr_64) +{ + struct ref_record oprefs; + memset (&oprefs, 0, sizeof (oprefs)); - /* Section coverage... */ - coverage_add (cov, cov_begin, cov_end); - sco->hit = true; + struct addr_record opaddrs; + memset (&opaddrs, 0, sizeof (opaddrs)); - /* And range coverage... */ - coverage_add (&range_cov, r_cov_begin, r_cov_end); - } + while (!read_ctx_eof (ctx)) + { + struct where where = WHERE (sec_locexpr, wh); + uint64_t opcode_off = read_ctx_get_offset (ctx); + where_reset_1 (&where, opcode_off); + addr_record_add (&opaddrs, opcode_off); - if (!found) - { - /* Not a show stopper. */ - wr_error (&where, - ": couldn't find a section that the range %#" - PRIx64 "..%#" PRIx64 " covers.\n", address, end); - continue; - } - else - { - void range_hole (uint64_t h_begin, uint64_t h_end, - void *user __attribute__ ((unused))) - { - wr_error (&where, - ": portion %#" PRIx64 "..%#" PRIx64 - ", of the range %#" PRIx64 "..%#" PRIx64 - " doesn't fall into any ALLOC & EXEC section.\n", - h_begin + address, h_end + address, - address, end); - } - coverage_find_holes (&range_cov, range_hole, NULL); - } - } + uint8_t opcode; + if (!read_ctx_read_ubyte (ctx, &opcode)) + { + wr_error (&where, ": can't read opcode.\n"); + break; } - if (sub_ctx.ptr != sub_ctx.end - && !check_zero_padding (&sub_ctx, mc_pubtables, - &WHERE (where.section, NULL))) + uint8_t op1, op2; + if (!get_location_opcode_operands (opcode, &op1, &op2)) { - wr_message_padding_n0 (mc_pubtables | mc_error, - &WHERE (where.section, NULL), - read_ctx_get_offset (&sub_ctx), size); - retval = false; + wr_error (&where, ": can't decode opcode \"%s\".\n", + dwarf_locexpr_opcode_string (opcode)); + break; } - next: - if (!read_ctx_skip (ctx, size)) - /* A "can't happen" error. */ - goto not_enough; - } +#define READ_FORM(OP, STR, PTR) \ + do { \ + if (OP != 0 \ + && !read_ctx_read_form (ctx, addr_64, (OP), \ + PTR, &where, STR " operand")) \ + { \ + wr_error (&where, ": opcode \"%s\"" \ + ": can't read " STR " operand (form \"%s\").\n", \ + dwarf_locexpr_opcode_string (opcode), \ + dwarf_form_string ((OP))); \ + goto out; \ + } \ + } while (0) - if (retval) - for (size_t i = 0; i < coverage_map.size; ++i) - { - struct section_coverage *sco = coverage_map.scos + i; - Elf_Data *data = elf_getdata (sco->scn, NULL); - if (data == NULL) - wr_error (&WHERE (sec_aranges, NULL), - ": couldn't read section data, coverage analysis may be inaccurate.\n"); - else if (data->d_buf == NULL) - wr_error (&WHERE (sec_aranges, NULL), - ": data-less section data, coverage analysis may be inaccurate.\n"); + uint64_t value1, value2; + READ_FORM (op1, "1st", &value1); + READ_FORM (op2, "2st", &value2); +#undef READ_FORM - void section_hole (uint64_t h_begin, uint64_t h_end, - void *user __attribute__ ((unused))) + switch (opcode) { - const char *scnname = elf_strptr (elf, ehdr->e_shstrndx, - sco->shdr.sh_name); + case DW_OP_bra: + case DW_OP_skip: + { + int16_t skip = (uint16_t)value1; - /* We don't expect some sections to be covered. But if they - are at least partially covered, we expect the same - coverage criteria as for .text. */ - if (!sco->hit - && (strcmp (scnname, ".init") == 0 - || strcmp (scnname, ".fini") == 0 - || strcmp (scnname, ".plt") == 0)) - return; + if (skip == 0) + wr_message (mc_loc | mc_acc_bloat | mc_impact_3, &where, + ": %s with skip 0.\n", + dwarf_locexpr_opcode_string (opcode)); + else if (skip > 0 && !read_ctx_need_data (ctx, (size_t)skip)) + wr_error (&where, ": %s branches out of location expression.\n", + dwarf_locexpr_opcode_string (opcode)); + /* Compare with the offset after the two-byte skip value. */ + else if (skip < 0 && ((uint64_t)-skip) > read_ctx_get_offset (ctx)) + wr_error (&where, + ": %s branches before the beginning of location expression.\n", + dwarf_locexpr_opcode_string (opcode)); + else + ref_record_add (&oprefs, opcode_off + skip, &where); - uint64_t base = sco->shdr.sh_addr; - if (data != NULL && data->d_buf != NULL) - { - bool zeroes = true; - for (uint64_t j = h_begin; j < h_end; ++j) - if (((char *)data->d_buf)[j] != 0) - { - zeroes = false; - break; - } - if (!zeroes) - return; - } + break; + } - wr_error (&where, - ": addresses %#" PRIx64 "..%#" PRIx64 - " of section %s are not covered.\n", - h_begin + base, h_end + base, scnname); - } + case DW_OP_const8u: + case DW_OP_const8s: + if (!addr_64) + wr_error (&where, ": %s on 32-bit machine.\n", + dwarf_locexpr_opcode_string (opcode)); + break; - coverage_find_holes (&sco->cov, section_hole, NULL); - } + default: + if (!addr_64 + && (opcode == DW_OP_constu + || opcode == DW_OP_consts + || opcode == DW_OP_deref_size + || opcode == DW_OP_plus_uconst) + && (value1 > (uint64_t)(uint32_t)-1)) + wr_error (&where, ": %s with operand %#" PRIx64 " on 32-bit machine.\n", + dwarf_locexpr_opcode_string (opcode), value1); + }; + } + + out: + for (size_t i = 0; i < oprefs.size; ++i) + { + struct ref *ref = oprefs.refs + i; + if (!addr_record_has_addr (&opaddrs, ref->addr)) + wr_error (&ref->who, + ": unresolved reference to opcode at %#" PRIx64 ".\n", + ref->addr); + } - return retval && cov_retval; + addr_record_free (&opaddrs); + ref_record_free (&oprefs); } static bool -check_pub_structural (struct read_ctx *ctx, struct cu *cu_chain, - enum section_id sec) +check_loc_or_range_ref (struct read_ctx *ctx, + struct cu *cu, + struct coverage *coverage, + struct addr_record *addrs, + uint64_t addr, + bool addr_64, + struct where *wh, + enum message_category cat, + enum section_id sec) { + assert (sec == sec_loc || sec == sec_ranges); + assert (cat == mc_loc || cat == mc_ranges); + assert (coverage != NULL); + + if (!read_ctx_skip (ctx, addr)) + { + wr_error (wh, ": invalid reference outside the section " + "%#" PRIx64 ", size only %#tx.\n", + addr, ctx->end - ctx->begin); + return false; + } + bool retval = true; + bool contains_locations = sec == sec_loc; + + if (coverage_is_covered (coverage, addr)) + { + if (!addr_record_has_addr (addrs, addr)) + { + wr_error (wh, ": reference to 0x%" PRIx64 + " points at the middle of location or range list.\n", addr); + retval = false; + } + else + return true; + } + else + addr_record_add (addrs, addr); + + uint64_t escape = addr_64 ? (uint64_t)-1 : (uint64_t)(uint32_t)-1; + bool overlap = false; + uint64_t base = cu->base; while (!read_ctx_eof (ctx)) { struct where where = WHERE (sec, NULL); + where.ref = wh; where_reset_1 (&where, read_ctx_get_offset (ctx)); - const unsigned char *set_begin = ctx->ptr; - /* Size. */ - uint32_t size32; - uint64_t size; - bool dwarf_64; - if (!read_ctx_read_4ubyte (ctx, &size32)) +#define HAVE_OVERLAP \ + do { \ + wr_error (&where, ": range definitions overlap.\n"); \ + retval = false; \ + overlap = true; \ + } while (0) + + /* begin address */ + uint64_t begin_addr; + if (!overlap + && !coverage_pristine (coverage, + read_ctx_get_offset (ctx), + addr_64 ? 8 : 4)) + HAVE_OVERLAP; + + if (!read_ctx_read_offset (ctx, addr_64, &begin_addr)) { - wr_error (&where, ": can't read table length.\n"); + wr_error (&where, ": can't read address range beginning.\n"); return false; } - if (!read_size_extra (ctx, size32, &size, &dwarf_64, &where)) - return false; - struct read_ctx sub_ctx; - const unsigned char *set_end = ctx->ptr + size; - if (!read_ctx_init_sub (&sub_ctx, ctx, set_begin, set_end)) + /* end address */ + uint64_t end_addr; + if (!overlap + && !coverage_pristine (coverage, + read_ctx_get_offset (ctx), + addr_64 ? 8 : 4)) + HAVE_OVERLAP; + + if (!read_ctx_read_offset (ctx, addr_64, &end_addr)) { - wr_error (&where, PRI_NOT_ENOUGH, "next set"); + wr_error (&where, ": can't read address range ending.\n"); return false; } - sub_ctx.ptr = ctx->ptr; - /* Version. */ - uint16_t version; - if (!read_ctx_read_2ubyte (&sub_ctx, &version)) - { - wr_error (&where, ": can't read set version.\n"); - retval = false; - goto next; - } - if (version != 2) - { - wr_error (&where, ": %s set version.\n", - version < 2 ? "invalid" : "unsupported"); - retval = false; - goto next; - } + bool done = begin_addr == 0 && end_addr == 0; - /* CU offset. */ - uint64_t cu_off = 0; - if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &cu_off)) + if (!done && begin_addr != escape) { - wr_error (&where, ": can't read debug info offset.\n"); - retval = false; - goto next; + if (base == (uint64_t)-1) + { + wr_error (&where, ": address range with no base address set.\n"); + base = (uint64_t)-2; /* Only report once. */ + } + + if (end_addr < begin_addr) + wr_message (cat | mc_error, &where, + ": has negative range 0x%" PRIx64 "..0x%" PRIx64 ".\n", + begin_addr, end_addr); + else if (begin_addr == end_addr) + /* 2.6.6: A location list entry [...] whose beginning + and ending addresses are equal has no effect. */ + wr_message (cat | mc_acc_bloat | mc_impact_3, &where, + ": entry covers no range.\n"); + + if (contains_locations) + { + /* location expression length */ + uint16_t len; + if (!overlap + && !coverage_pristine (coverage, + read_ctx_get_offset (ctx), 2)) + HAVE_OVERLAP; + + if (!read_ctx_read_2ubyte (ctx, &len)) + { + wr_error (&where, ": can't read length of location expression.\n"); + return false; + } + + /* location expression itself */ + struct read_ctx expr_ctx; + if (!read_ctx_init_sub (&expr_ctx, ctx, ctx->ptr, ctx->ptr + len)) + { + not_enough: + wr_error (&where, PRI_NOT_ENOUGH, "location expression"); + return false; + } + + uint64_t expr_start = read_ctx_get_offset (ctx); + check_location_expression (&expr_ctx, &where, addr_64); + uint64_t expr_end = read_ctx_get_offset (ctx); + if (!overlap + && !coverage_pristine (coverage, + expr_start, expr_end - expr_start)) + HAVE_OVERLAP; + + if (!read_ctx_skip (ctx, len)) + /* "can't happen" */ + goto not_enough; + } } - struct cu *cu = NULL; - if (cu_chain != NULL && (cu = cu_find_cu (cu_chain, cu_off)) == NULL) - wr_error (&where, ": unresolved reference to " PRI_CU ".\n", cu_off); - if (cu != NULL) + else if (!done) { - where.ref = &cu->where; - bool *has = sec == sec_pubnames - ? &cu->has_pubnames : &cu->has_pubtypes; - if (*has) - wr_message (mc_impact_2 | mc_aranges, &where, - ": there has already been section for this CU.\n"); + if (end_addr == base) + wr_message (cat | mc_acc_bloat | mc_impact_3, &where, + ": base address selection doesn't change base address" + " (%#" PRIx64 ").\n", base); else - *has = true; + base = end_addr; } +#undef HAVE_OVERLAP - /* Covered length. */ - uint64_t cu_len; - if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &cu_len)) - { - wr_error (&where, ": can't read covered length.\n"); - retval = false; - goto next; - } - if (cu != NULL && cu_len != cu->length) - { - wr_error (&where, - ": the table covers length %" PRId64 - " but CU has length %" PRId64 ".\n", cu_len, cu->length); - retval = false; - goto next; - } + coverage_add (coverage, where.addr1, read_ctx_get_offset (ctx) - 1); + if (done) + break; + } - /* Records... */ - while (!read_ctx_eof (&sub_ctx)) - { - uint64_t pair_off = read_ctx_get_offset (&sub_ctx); - where_reset_2 (&where, pair_off); + return retval; +} - uint64_t offset; - if (!read_ctx_read_offset (&sub_ctx, dwarf_64, &offset)) - { - wr_error (&where, ": can't read offset field.\n"); - retval = false; - goto next; - } - if (offset == 0) - break; +static bool +check_loc_or_range_structural (struct read_ctx *ctx, + struct cu *cu_chain, + enum section_id sec) +{ + assert (sec == sec_loc || sec == sec_ranges); + assert (cu_chain != NULL); + assert (ctx != NULL); - if (cu != NULL - && !addr_record_has_addr (&cu->die_addrs, offset + cu->offset)) - { - wr_error (&where, - ": unresolved reference to " PRI_DIE ".\n", offset); - retval = false; - goto next; - } + struct coverage coverage; + coverage_init (&coverage, ctx->data->d_size); - uint8_t c; - do - if (!read_ctx_read_ubyte (&sub_ctx, &c)) - { - wr_error (&where, ": can't read symbol name.\n"); - retval = false; - goto next; - } - while (c); - } + struct addr_record addrs; + memset (&addrs, 0, sizeof (addrs)); - if (sub_ctx.ptr != sub_ctx.end - && !check_zero_padding (&sub_ctx, mc_pubtables, &WHERE (sec, NULL))) + enum message_category cat = sec == sec_loc ? mc_loc : mc_ranges; + bool retval = true; + + for (struct cu *cu = cu_chain; cu != NULL; cu = cu->next) + { + struct ref_record *rec + = sec == sec_loc ? &cu->loc_refs : &cu->range_refs; + for (size_t i = 0; i < rec->size; ++i) { - wr_message_padding_n0 (mc_pubtables | mc_error, &WHERE (sec, NULL), - read_ctx_get_offset (&sub_ctx), size); - retval = false; - } + struct ref *ref = rec->refs + i; - next: - ctx->ptr += size; + struct read_ctx sub_ctx; + read_ctx_init (&sub_ctx, ctx->dbg, ctx->data); + + if (!check_loc_or_range_ref (&sub_ctx, cu, &coverage, &addrs, + ref->addr, cu->address_size == 8, + &ref->who, cat, sec)) + retval = false; + } } + if (retval) + /* We check that all CUs have the same address size when building + the CU chain. So just take the address size of the first CU in + chain. */ + coverage_find_holes (&coverage, found_hole, + &((struct hole_info) + {sec, cat, cu_chain->address_size, + ctx->data->d_buf})); + + coverage_free (&coverage); + addr_record_free (&addrs); + return retval; }