From a043f0a060ceaf466eaef621c68d6239c1dc4495 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Mon, 15 Aug 2011 09:42:34 +0000 Subject: [PATCH] Remove the assumption, in m_debuginfo, that each address is associated with only one symbol. Instead, allow an address to have arbitrarily many names. This reflects reality better, particularly for systemy libraries such as glibc and ld.so, and is background work needed for fixing #275284. This is not in itself a fix for #275284. A followup commit to un-break compilation on OSX will follow shortly. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11981 --- coregrind/m_debuginfo/debuginfo.c | 60 +++-- coregrind/m_debuginfo/priv_storage.h | 46 +++- coregrind/m_debuginfo/readelf.c | 58 ++--- coregrind/m_debuginfo/readpdb.c | 104 ++++---- coregrind/m_debuginfo/storage.c | 364 ++++++++++++++++++++------- coregrind/m_redir.c | 32 +-- include/pub_tool_debuginfo.h | 22 +- 7 files changed, 467 insertions(+), 219 deletions(-) diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c index dcc2719c66..66ac344931 100644 --- a/coregrind/m_debuginfo/debuginfo.c +++ b/coregrind/m_debuginfo/debuginfo.c @@ -210,12 +210,24 @@ static void free_DebugInfo ( DebugInfo* di ) vg_assert(di != NULL); if (di->filename) ML_(dinfo_free)(di->filename); - if (di->symtab) ML_(dinfo_free)(di->symtab); if (di->loctab) ML_(dinfo_free)(di->loctab); if (di->cfsi) ML_(dinfo_free)(di->cfsi); if (di->cfsi_exprs) VG_(deleteXA)(di->cfsi_exprs); if (di->fpo) ML_(dinfo_free)(di->fpo); + if (di->symtab) { + /* We have to visit all the entries so as to free up any + sec_names arrays that might exist. */ + n = di->symtab_used; + for (i = 0; i < n; i++) { + DiSym* sym = &di->symtab[i]; + if (sym->sec_names) + ML_(dinfo_free)(sym->sec_names); + } + /* and finally .. */ + ML_(dinfo_free)(di->symtab); + } + for (chunk = di->strchunks; chunk != NULL; chunk = next) { next = chunk->next; ML_(dinfo_free)(chunk); @@ -1263,8 +1275,9 @@ Bool get_sym_name ( Bool do_cxx_demangling, Bool do_z_demangling, if (di == NULL) return False; + vg_assert(di->symtab[sno].pri_name); VG_(demangle) ( do_cxx_demangling, do_z_demangling, - di->symtab[sno].name, buf, nbuf ); + di->symtab[sno].pri_name, buf, nbuf ); /* Do the below-main hack */ // To reduce the endless nuisance of multiple different names @@ -1612,12 +1625,27 @@ Bool VG_(lookup_symbol_SLOW)(UChar* sopatt, UChar* name, continue; } for (i = 0; i < si->symtab_used; i++) { - if (0==VG_(strcmp)(name, si->symtab[i].name) + UChar* pri_name = si->symtab[i].pri_name; + tl_assert(pri_name); + if (0==VG_(strcmp)(name, pri_name) && (require_pToc ? si->symtab[i].tocptr : True)) { *pEnt = si->symtab[i].addr; *pToc = si->symtab[i].tocptr; return True; } + UChar** sec_names = si->symtab[i].sec_names; + if (sec_names) { + tl_assert(sec_names[0]); + while (*sec_names) { + if (0==VG_(strcmp)(name, *sec_names) + && (require_pToc ? si->symtab[i].tocptr : True)) { + *pEnt = si->symtab[i].addr; + *pToc = si->symtab[i].tocptr; + return True; + } + sec_names++; + } + } } } return False; @@ -3611,20 +3639,22 @@ Int VG_(DebugInfo_syms_howmany) ( const DebugInfo *si ) void VG_(DebugInfo_syms_getidx) ( const DebugInfo *si, Int idx, - /*OUT*/Addr* avma, - /*OUT*/Addr* tocptr, - /*OUT*/UInt* size, - /*OUT*/HChar** name, - /*OUT*/Bool* isText, - /*OUT*/Bool* isIFunc ) + /*OUT*/Addr* avma, + /*OUT*/Addr* tocptr, + /*OUT*/UInt* size, + /*OUT*/UChar** pri_name, + /*OUT*/UChar*** sec_names, + /*OUT*/Bool* isText, + /*OUT*/Bool* isIFunc ) { vg_assert(idx >= 0 && idx < si->symtab_used); - if (avma) *avma = si->symtab[idx].addr; - if (tocptr) *tocptr = si->symtab[idx].tocptr; - if (size) *size = si->symtab[idx].size; - if (name) *name = (HChar*)si->symtab[idx].name; - if (isText) *isText = si->symtab[idx].isText; - if (isIFunc) *isIFunc = si->symtab[idx].isIFunc; + if (avma) *avma = si->symtab[idx].addr; + if (tocptr) *tocptr = si->symtab[idx].tocptr; + if (size) *size = si->symtab[idx].size; + if (pri_name) *pri_name = si->symtab[idx].pri_name; + if (sec_names) *sec_names = si->symtab[idx].sec_names; + if (isText) *isText = si->symtab[idx].isText; + if (isIFunc) *isIFunc = si->symtab[idx].isIFunc; } diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h index 7921a3f9a0..77fa773fbb 100644 --- a/coregrind/m_debuginfo/priv_storage.h +++ b/coregrind/m_debuginfo/priv_storage.h @@ -45,19 +45,37 @@ /* --------------------- SYMBOLS --------------------- */ -/* A structure to hold an ELF/MachO symbol (very crudely). */ +/* A structure to hold an ELF/MachO symbol (very crudely). Usually + the symbol only has one name, which is stored in ::pri_name, and + ::sec_names is NULL. If there are other names, these are stored in + ::sec_names, which is a NULL terminated vector holding the names. + The vector is allocated in VG_AR_DINFO, the names themselves live + in DebugInfo::strchunks. + + From the point of view of ELF, the primary vs secondary distinction + is artificial: they are all just names associated with the address, + none of which has higher precedence than any other. However, from + the point of view of mapping an address to a name to display to the + user, we need to choose one "preferred" name, and so that might as + well be installed as the pri_name, whilst all others can live in + sec_names[]. This has the convenient side effect that, in the + common case where there is only one name for the address, + sec_names[] does not need to be allocated. +*/ typedef struct { - Addr addr; /* lowest address of entity */ - Addr tocptr; /* ppc64-linux only: value that R2 should have */ - UChar *name; /* name */ - // XXX: this could be shrunk (on 32-bit platforms) by using 31 bits for - // the size and 1 bit for the isText. If you do this, make sure that - // all assignments to isText use 0 or 1 (or True or False), and that a - // positive number larger than 1 is never used to represent True. - UInt size; /* size in bytes */ - Bool isText; - Bool isIFunc; /* symbol is an indirect function? */ + Addr addr; /* lowest address of entity */ + Addr tocptr; /* ppc64-linux only: value that R2 should have */ + UChar* pri_name; /* primary name, never NULL */ + UChar** sec_names; /* NULL, or a NULL term'd array of other names */ + // XXX: this could be shrunk (on 32-bit platforms) by using 30 + // bits for the size and 1 bit each for isText and isIFunc. If you + // do this, make sure that all assignments to the latter two use + // 0 or 1 (or True or False), and that a positive number larger + // than 1 is never used to represent True. + UInt size; /* size in bytes */ + Bool isText; + Bool isIFunc; /* symbol is an indirect function? */ } DiSym; @@ -726,7 +744,11 @@ struct _DebugInfo { /* ------ Adding ------ */ -/* Add a symbol to si's symbol table. */ +/* Add a symbol to si's symbol table. The contents of 'sym' are + copied. It is assumed (and checked) that 'sym' only contains one + name, so there is no auxiliary ::sec_names vector to duplicate. + IOW, the copy is a shallow copy, and there are assertions in place + to ensure that's OK. */ extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); /* Add a line-number record to a DebugInfo. */ diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c index ce876b89dc..141b2555f3 100644 --- a/coregrind/m_debuginfo/readelf.c +++ b/coregrind/m_debuginfo/readelf.c @@ -588,7 +588,7 @@ void read_elf_symtab__normal( Int sym_size; Addr sym_tocptr; Bool from_opd, is_text, is_ifunc; - DiSym risym; + DiSym disym; ElfXX_Sym *sym; if (strtab_img == NULL || symtab_img == NULL) { @@ -621,24 +621,25 @@ void read_elf_symtab__normal( &sym_tocptr, &from_opd, &is_text, &is_ifunc)) { - risym.addr = sym_avma_really; - risym.size = sym_size; - risym.name = ML_(addStr) ( di, sym_name_really, -1 ); - risym.tocptr = sym_tocptr; - risym.isText = is_text; - risym.isIFunc = is_ifunc; - vg_assert(risym.name != NULL); - vg_assert(risym.tocptr == 0); /* has no role except on ppc64-linux */ - ML_(addSym) ( di, &risym ); + disym.addr = sym_avma_really; + disym.tocptr = sym_tocptr; + disym.pri_name = ML_(addStr) ( di, sym_name_really, -1 ); + disym.sec_names = NULL; + disym.size = sym_size; + disym.isText = is_text; + disym.isIFunc = is_ifunc; + vg_assert(disym.pri_name); + vg_assert(disym.tocptr == 0); /* has no role except on ppc64-linux */ + ML_(addSym) ( di, &disym ); if (di->trace_symtab) { VG_(printf)(" rec(%c) [%4ld]: " " val %#010lx, sz %4d %s\n", is_text ? 't' : 'd', i, - risym.addr, - (Int)risym.size, - (HChar*)risym.name + disym.addr, + (Int)disym.size, + (HChar*)disym.pri_name ); } @@ -691,7 +692,7 @@ void read_elf_symtab__ppc64_linux( Int sym_size; Addr sym_tocptr; Bool from_opd, modify_size, modify_tocptr, is_text, is_ifunc; - DiSym risym; + DiSym disym; ElfXX_Sym *sym; OSet *oset; TempSymKey key; @@ -828,24 +829,25 @@ void read_elf_symtab__ppc64_linux( VG_(OSetGen_ResetIter)( oset ); while ( (elem = VG_(OSetGen_Next)(oset)) ) { - risym.addr = elem->key.addr; - risym.size = elem->size; - risym.name = ML_(addStr) ( di, elem->key.name, -1 ); - risym.tocptr = elem->tocptr; - risym.isText = elem->is_text; - risym.isIFunc = elem->is_ifunc; - vg_assert(risym.name != NULL); - - ML_(addSym) ( di, &risym ); + disym.addr = elem->key.addr; + disym.tocptr = elem->tocptr; + disym.pri_name = ML_(addStr) ( di, elem->key.name, -1 ); + disym.sec_names = NULL; + disym.size = elem->size; + disym.isText = elem->is_text; + disym.isIFunc = elem->is_ifunc; + vg_assert(disym.pri_name != NULL); + + ML_(addSym) ( di, &disym ); if (di->trace_symtab) { VG_(printf)(" rec(%c) [%4ld]: " " val %#010lx, toc %#010lx, sz %4d %s\n", - risym.isText ? 't' : 'd', + disym.isText ? 't' : 'd', i, - risym.addr, - risym.tocptr, - (Int) risym.size, - (HChar*)risym.name + disym.addr, + disym.tocptr, + (Int) disym.size, + (HChar*)disym.pri_name ); } i++; diff --git a/coregrind/m_debuginfo/readpdb.c b/coregrind/m_debuginfo/readpdb.c index 8a8fc7f3b7..71ad4b4e65 100644 --- a/coregrind/m_debuginfo/readpdb.c +++ b/coregrind/m_debuginfo/readpdb.c @@ -1285,14 +1285,15 @@ static ULong DEBUG_SnarfCodeView( if (0 /*VG_(needs).data_syms*/) { nmstr = ML_(addStr)(di, symname, sym->data_v1.p_name.namelen); - - vsym.addr = bias + sectp[sym->data_v1.segment-1].VirtualAddress - + sym->data_v1.offset; - vsym.name = nmstr; - vsym.size = sym->data_v1.p_name.namelen; - // FIXME: .namelen is sizeof(.data) including .name[] - vsym.isText = (sym->generic.id == S_PUB_V1); - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->data_v1.segment-1].VirtualAddress + + sym->data_v1.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = sym->data_v1.p_name.namelen; + // FIXME: .namelen is sizeof(.data) including .name[] + vsym.isText = (sym->generic.id == S_PUB_V1); + vsym.isIFunc = False; ML_(addSym)( di, &vsym ); n_syms_read++; } @@ -1310,16 +1311,17 @@ static ULong DEBUG_SnarfCodeView( if (sym->generic.id==S_PUB_V2 /*VG_(needs).data_syms*/) { nmstr = ML_(addStr)(di, symname, k); - - vsym.addr = bias + sectp[sym->data_v2.segment-1].VirtualAddress - + sym->data_v2.offset; - vsym.name = nmstr; - vsym.size = 4000; - // FIXME: data_v2.len is sizeof(.data), - // not size of function! - vsym.isText = !!(IMAGE_SCN_CNT_CODE - & sectp[sym->data_v2.segment-1].Characteristics); - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->data_v2.segment-1].VirtualAddress + + sym->data_v2.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = 4000; + // FIXME: data_v2.len is sizeof(.data), + // not size of function! + vsym.isText = !!(IMAGE_SCN_CNT_CODE + & sectp[sym->data_v2.segment-1].Characteristics); + vsym.isIFunc = False; ML_(addSym)( di, &vsym ); n_syms_read++; } @@ -1343,16 +1345,17 @@ static ULong DEBUG_SnarfCodeView( if (1 /*sym->generic.id==S_PUB_FUNC1_V3 || sym->generic.id==S_PUB_FUNC2_V3*/) { nmstr = ML_(addStr)(di, symname, k); - - vsym.addr = bias + sectp[sym->public_v3.segment-1].VirtualAddress - + sym->public_v3.offset; - vsym.name = nmstr; - vsym.size = 4000; - // FIXME: public_v3.len is not length of the - // .text of the function - vsym.isText = !!(IMAGE_SCN_CNT_CODE - & sectp[sym->data_v2.segment-1].Characteristics); - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->public_v3.segment-1].VirtualAddress + + sym->public_v3.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = 4000; + // FIXME: public_v3.len is not length of the + // .text of the function + vsym.isText = !!(IMAGE_SCN_CNT_CODE + & sectp[sym->data_v2.segment-1].Characteristics); + vsym.isIFunc = False; ML_(addSym)( di, &vsym ); n_syms_read++; } @@ -1378,13 +1381,14 @@ static ULong DEBUG_SnarfCodeView( sym->proc_v1.p_name.namelen); symname[sym->proc_v1.p_name.namelen] = '\0'; nmstr = ML_(addStr)(di, symname, sym->proc_v1.p_name.namelen); - - vsym.addr = bias + sectp[sym->proc_v1.segment-1].VirtualAddress - + sym->proc_v1.offset; - vsym.name = nmstr; - vsym.size = sym->proc_v1.proc_len; - vsym.isText = True; - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->proc_v1.segment-1].VirtualAddress + + sym->proc_v1.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = sym->proc_v1.proc_len; + vsym.isText = True; + vsym.isIFunc = False; if (debug) VG_(message)(Vg_UserMsg, " Adding function %s addr=%#lx length=%d\n", @@ -1399,13 +1403,14 @@ static ULong DEBUG_SnarfCodeView( sym->proc_v2.p_name.namelen); symname[sym->proc_v2.p_name.namelen] = '\0'; nmstr = ML_(addStr)(di, symname, sym->proc_v2.p_name.namelen); - - vsym.addr = bias + sectp[sym->proc_v2.segment-1].VirtualAddress - + sym->proc_v2.offset; - vsym.name = nmstr; - vsym.size = sym->proc_v2.proc_len; - vsym.isText = True; - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->proc_v2.segment-1].VirtualAddress + + sym->proc_v2.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = sym->proc_v2.proc_len; + vsym.isText = True; + vsym.isIFunc = False; if (debug) VG_(message)(Vg_UserMsg, " Adding function %s addr=%#lx length=%d\n", @@ -1422,13 +1427,14 @@ static ULong DEBUG_SnarfCodeView( if (1) { nmstr = ML_(addStr)(di, sym->proc_v3.name, VG_(strlen)(sym->proc_v3.name)); - - vsym.addr = bias + sectp[sym->proc_v3.segment-1].VirtualAddress - + sym->proc_v3.offset; - vsym.name = nmstr; - vsym.size = sym->proc_v3.proc_len; - vsym.isText = 1; - vsym.isIFunc = False; + vsym.addr = bias + sectp[sym->proc_v3.segment-1].VirtualAddress + + sym->proc_v3.offset; + vsym.tocptr = 0; + vsym.pri_name = nmstr; + vsym.sec_names = NULL; + vsym.size = sym->proc_v3.proc_len; + vsym.isText = 1; + vsym.isIFunc = False; ML_(addSym)( di, &vsym ); n_syms_read++; } diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c index 8b4060a8d6..4dbd3bb5fa 100644 --- a/coregrind/m_debuginfo/storage.c +++ b/coregrind/m_debuginfo/storage.c @@ -90,11 +90,22 @@ void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ) /* Print a symbol. */ void ML_(ppSym) ( Int idx, DiSym* sym ) { - VG_(printf)( "%5d: %#8lx .. %#8lx (%d) %s\n", + UChar** sec_names = sym->sec_names; + vg_assert(sym->pri_name); + if (sec_names) + vg_assert(sec_names); + VG_(printf)( "%5d: %#8lx .. %#8lx (%d) %s%s", idx, sym->addr, sym->addr + sym->size - 1, sym->size, - sym->name ); + sym->pri_name, sec_names ? " " : "" ); + if (sec_names) { + while (*sec_names) { + VG_(printf)("%s%s", *sec_names, *(sec_names+1) ? " " : ""); + sec_names++; + } + } + VG_(printf)("\n"); } /* Print a call-frame-info summary. */ @@ -235,13 +246,18 @@ UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ) } -/* Add a symbol to the symbol table. +/* Add a symbol to the symbol table, by copying *sym. 'sym' may only + have one name, so there's no complexities to do with deep vs + shallow copying of the sec_name array. This is checked. */ void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ) { UInt new_sz, i; DiSym* new_tab; + vg_assert(sym->pri_name != NULL); + vg_assert(sym->sec_names == NULL); + /* Ignore zero-sized syms. */ if (sym->size == 0) return; @@ -259,8 +275,7 @@ void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ) di->symtab_size = new_sz; } - di->symtab[di->symtab_used] = *sym; - di->symtab_used++; + di->symtab[di->symtab_used++] = *sym; vg_assert(di->symtab_used <= di->symtab_size); } @@ -1102,11 +1117,13 @@ static Int compare_DiSym ( void* va, void* vb ) } -/* Two symbols have the same address. Which name do we prefer? In order: +/* An address is associated with more than one name. Which do we + prefer as the "display" name (that we show the user in stack + traces)? In order: - Prefer "PMPI_" over "MPI_". - - Else, prefer a non-NULL name over a NULL one. + - Else, prefer a non-empty name over an empty one. - Else, prefer a non-whitespace name over an all-whitespace name. @@ -1121,14 +1138,20 @@ static Int compare_DiSym ( void* va, void* vb ) - Else, use alphabetical ordering. - - Otherwise, they must be the same; use the symbol with the lower address. + - Otherwise, they must be the same; use the name with the lower address. Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are aliases in glibc, we choose the 'bcmp' symbol because it's shorter, so we can misdescribe memcmp() as bcmp()). This is hard to avoid. It's mentioned in the FAQ file. + + Returned value is True if a_name is preferred, False if b_name is + preferred. */ -static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) +static +Bool preferName ( struct _DebugInfo* di, + UChar* a_name, UChar* b_name, + Addr sym_avma/*exposition only*/ ) { Word cmp; Word vlena, vlenb; /* length without version */ @@ -1137,36 +1160,40 @@ static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) Bool preferA = False; Bool preferB = False; - vg_assert(a->addr == b->addr); + vg_assert(a_name); + vg_assert(b_name); + vg_assert(a_name != b_name); - vlena = VG_(strlen)(a->name); - vlenb = VG_(strlen)(b->name); + vlena = VG_(strlen)(a_name); + vlenb = VG_(strlen)(b_name); -#if defined(VGO_linux) -# define VERSION_CHAR '@' -#elif defined(VGO_darwin) -# define VERSION_CHAR '$' -#else -# error Unknown OS -#endif +# if defined(VGO_linux) +# define VERSION_CHAR '@' +# elif defined(VGO_darwin) +# define VERSION_CHAR '$' +# else +# error Unknown OS +# endif + + vpa = VG_(strchr)(a_name, VERSION_CHAR); + vpb = VG_(strchr)(b_name, VERSION_CHAR); - vpa = VG_(strchr)(a->name, VERSION_CHAR); - vpb = VG_(strchr)(b->name, VERSION_CHAR); +# undef VERSION_CHAR if (vpa) - vlena = vpa - a->name; + vlena = vpa - a_name; if (vpb) - vlenb = vpb - b->name; + vlenb = vpb - b_name; /* MPI hack: prefer PMPI_Foo over MPI_Foo */ - if (0==VG_(strncmp)(a->name, "MPI_", 4) - && 0==VG_(strncmp)(b->name, "PMPI_", 5) - && 0==VG_(strcmp)(a->name, 1+b->name)) { + if (0==VG_(strncmp)(a_name, "MPI_", 4) + && 0==VG_(strncmp)(b_name, "PMPI_", 5) + && 0==VG_(strcmp)(a_name, 1+b_name)) { preferB = True; goto out; } - if (0==VG_(strncmp)(b->name, "MPI_", 4) - && 0==VG_(strncmp)(a->name, "PMPI_", 5) - && 0==VG_(strcmp)(b->name, 1+a->name)) { + if (0==VG_(strncmp)(b_name, "MPI_", 4) + && 0==VG_(strncmp)(a_name, "PMPI_", 5) + && 0==VG_(strcmp)(b_name, 1+a_name)) { preferA = True; goto out; } @@ -1183,14 +1210,14 @@ static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) Bool blankA = True; Bool blankB = True; Char *s; - s = a->name; + s = a_name; while (*s) { if (!VG_(isspace)(*s++)) { blankA = False; break; } } - s = b->name; + s = b_name; while (*s) { if (!VG_(isspace)(*s++)) { blankB = False; @@ -1224,7 +1251,7 @@ static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) /* Either both versioned or neither is versioned; select them alphabetically */ - cmp = VG_(strcmp)(a->name, b->name); + cmp = VG_(strcmp)(a_name, b_name); if (cmp < 0) { preferA = True; goto out; } @@ -1238,31 +1265,87 @@ static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b ) well choose the one with the lowest DiSym* address, so as to try and make the comparison mechanism more stable (a la sorting parlance). Also, skip the diagnostic printing in this case. */ - return a <= b ? a : b; + return a_name <= b_name ? True : False; /*NOTREACHED*/ vg_assert(0); out: if (preferA && !preferB) { TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", - a->addr, a->name, b->name ); - return a; + sym_avma, a_name, b_name ); + return True; } if (preferB && !preferA) { TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n", - b->addr, b->name, a->name ); - return b; + sym_avma, b_name, a_name ); + return False; } /*NOTREACHED*/ vg_assert(0); } + +/* Add the names in FROM to the names in TO. */ +static +void add_DiSym_names_to_from ( DebugInfo* di, DiSym* to, DiSym* from ) +{ + vg_assert(to->pri_name); + vg_assert(from->pri_name); + /* Figure out how many names there will be in the new combined + secondary vector. */ + UChar** to_sec = to->sec_names; + UChar** from_sec = from->sec_names; + Word n_new_sec = 1; + if (from_sec) { + while (*from_sec) { + n_new_sec++; + from_sec++; + } + } + if (to_sec) { + while (*to_sec) { + n_new_sec++; + to_sec++; + } + } + if (0) + TRACE_SYMTAB("merge: -> %ld\n", n_new_sec); + /* Create the new sec and copy stuff into it, putting the new + entries at the end. */ + UChar** new_sec = ML_(dinfo_zalloc)( "di.storage.aDntf.1", + (n_new_sec+1) * sizeof(UChar*) ); + from_sec = from->sec_names; + to_sec = to->sec_names; + Word i = 0; + if (to_sec) { + while (*to_sec) { + new_sec[i++] = *to_sec; + to_sec++; + } + } + new_sec[i++] = from->pri_name; + if (from_sec) { + while (*from_sec) { + new_sec[i++] = *from_sec; + from_sec++; + } + } + vg_assert(i == n_new_sec); + vg_assert(new_sec[i] == NULL); + /* If we're replacing an existing secondary vector, free it. */ + if (to->sec_names) { + ML_(dinfo_free)(to->sec_names); + } + to->sec_names = new_sec; +} + + static void canonicaliseSymtab ( struct _DebugInfo* di ) { - Word i, j, n_merged, n_truncated; - Addr s1, s2, e1, e2, p1, p2; - UChar *n1, *n2; - Bool t1, t2, f1, f2; + Word i, j, n_truncated; + Addr sta1, sta2, end1, end2, toc1, toc2; + UChar *pri1, *pri2, **sec1, **sec2; + Bool ist1, ist2, isf1, isf2; # define SWAP(ty,aa,bb) \ do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) @@ -1270,34 +1353,68 @@ static void canonicaliseSymtab ( struct _DebugInfo* di ) if (di->symtab_used == 0) return; + /* Check initial invariants */ + for (i = 0; i < di->symtab_used; i++) { + DiSym* sym = &di->symtab[i]; + vg_assert(sym->pri_name); + vg_assert(!sym->sec_names); + } + + /* Sort by address. */ VG_(ssort)(di->symtab, di->symtab_used, sizeof(*di->symtab), compare_DiSym); cleanup_more: - /* If two symbols have identical address ranges, we pick one - using prefersym() (see it for details). */ - do { + /* If two symbols have identical address ranges, and agree on + .isText and .isIFunc, merge them into a single entry, but + preserve both names, so we end up knowing all the names for that + particular address range. */ + while (1) { + Word r, w, n_merged; n_merged = 0; - j = di->symtab_used; - di->symtab_used = 0; - for (i = 0; i < j; i++) { - if (i < j-1 - && di->symtab[i].addr == di->symtab[i+1].addr - && di->symtab[i].size == di->symtab[i+1].size - ) { - n_merged++; + w = 0; + /* A pass merging entries together */ + for (r = 1; r < di->symtab_used; r++) { + vg_assert(w < r); + if ( di->symtab[w].addr == di->symtab[r].addr + && di->symtab[w].size == di->symtab[r].size + && !!di->symtab[w].isText == !!di->symtab[r].isText + && !!di->symtab[w].isIFunc == !!di->symtab[r].isIFunc) { /* merge the two into one */ - di->symtab[di->symtab_used++] - = *prefersym(di, &di->symtab[i], &di->symtab[i+1]); - i++; + n_merged++; + add_DiSym_names_to_from(di, &di->symtab[w], &di->symtab[r]); + /* and use ::pri_names to indicate this slot is no longer in use */ + di->symtab[r].pri_name = NULL; + if (di->symtab[r].sec_names) { + ML_(dinfo_free)(di->symtab[r].sec_names); + di->symtab[r].sec_names = NULL; + } + /* Completely zap the entry -- paranoia to make it more + likely we'll notice if we inadvertantly use it + again. */ + VG_(memset)(&di->symtab[r], 0, sizeof(DiSym)); } else { - di->symtab[di->symtab_used++] = di->symtab[i]; + w = r; } } TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged); + if (n_merged == 0) + break; + /* Now a pass to squeeze out any unused ones */ + w = 0; + for (r = 0; r < di->symtab_used; r++) { + vg_assert(w <= r); + if (di->symtab[r].pri_name == NULL) + continue; + if (w < r) { + di->symtab[w] = di->symtab[r]; + } + w++; + } + vg_assert(w + n_merged == di->symtab_used); + di->symtab_used = w; } - while (n_merged > 0); /* Detect and "fix" overlapping address ranges. */ n_truncated = 0; @@ -1321,46 +1438,56 @@ static void canonicaliseSymtab ( struct _DebugInfo* di ) } /* Truncate one or the other. */ - s1 = di->symtab[i].addr; - e1 = s1 + di->symtab[i].size - 1; - p1 = di->symtab[i].tocptr; - n1 = di->symtab[i].name; - t1 = di->symtab[i].isText; - f1 = di->symtab[i].isIFunc; - s2 = di->symtab[i+1].addr; - e2 = s2 + di->symtab[i+1].size - 1; - p2 = di->symtab[i+1].tocptr; - n2 = di->symtab[i+1].name; - t2 = di->symtab[i+1].isText; - f2 = di->symtab[i+1].isIFunc; - if (s1 < s2) { - e1 = s2-1; + sta1 = di->symtab[i].addr; + end1 = sta1 + di->symtab[i].size - 1; + toc1 = di->symtab[i].tocptr; + pri1 = di->symtab[i].pri_name; + sec1 = di->symtab[i].sec_names; + ist1 = di->symtab[i].isText; + isf1 = di->symtab[i].isIFunc; + + sta2 = di->symtab[i+1].addr; + end2 = sta2 + di->symtab[i+1].size - 1; + toc2 = di->symtab[i+1].tocptr; + pri2 = di->symtab[i+1].pri_name; + sec2 = di->symtab[i+1].sec_names; + ist2 = di->symtab[i+1].isText; + isf2 = di->symtab[i+1].isIFunc; + + if (sta1 < sta2) { + end1 = sta2 - 1; } else { - vg_assert(s1 == s2); - if (e1 > e2) { - s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); SWAP(Addr,p1,p2); - SWAP(UChar *,n1,n2); SWAP(Bool,t1,t2); + vg_assert(sta1 == sta2); + if (end1 > end2) { + sta1 = end2 + 1; + SWAP(Addr,sta1,sta2); SWAP(Addr,end1,end2); SWAP(Addr,toc1,toc2); + SWAP(UChar*,pri1,pri2); SWAP(UChar**,sec1,sec2); + SWAP(Bool,ist1,ist2); SWAP(Bool,isf1,isf2); } else - if (e1 < e2) { - s2 = e1+1; + if (end1 < end2) { + sta2 = end1 + 1; } else { - /* e1 == e2. Identical addr ranges. We'll eventually wind + /* end1 == end2. Identical addr ranges. We'll eventually wind up back at cleanup_more, which will take care of it. */ } } - di->symtab[i].addr = s1; - di->symtab[i].size = e1 - s1 + 1; - di->symtab[i].tocptr = p1; - di->symtab[i].name = n1; - di->symtab[i].isText = t1; - di->symtab[i].isIFunc = f1; - di->symtab[i+1].addr = s2; - di->symtab[i+1].size = e2 - s2 + 1; - di->symtab[i+1].tocptr = p2; - di->symtab[i+1].name = n2; - di->symtab[i+1].isText = t2; - di->symtab[i+1].isIFunc = f2; - vg_assert(s1 <= s2); + di->symtab[i].addr = sta1; + di->symtab[i].size = end1 - sta1 + 1; + di->symtab[i].tocptr = toc1; + di->symtab[i].pri_name = pri1; + di->symtab[i].sec_names = sec1; + di->symtab[i].isText = ist1; + di->symtab[i].isIFunc = isf1; + + di->symtab[i+1].addr = sta2; + di->symtab[i+1].size = end2 - sta2 + 1; + di->symtab[i+1].tocptr = toc2; + di->symtab[i+1].pri_name = pri2; + di->symtab[i+1].sec_names = sec2; + di->symtab[i+1].isText = ist2; + di->symtab[i+1].isIFunc = isf2; + + vg_assert(sta1 <= sta2); vg_assert(di->symtab[i].size > 0); vg_assert(di->symtab[i+1].size > 0); /* It may be that the i+1 entry now needs to be moved further @@ -1385,7 +1512,62 @@ static void canonicaliseSymtab ( struct _DebugInfo* di ) /* No overlaps. */ vg_assert(di->symtab[i].addr + di->symtab[i].size - 1 < di->symtab[i+1].addr); + /* Names are sane(ish) */ + vg_assert(di->symtab[i].pri_name); + if (di->symtab[i].sec_names) { + vg_assert(di->symtab[i].sec_names[0]); + } + } + + /* For each symbol that has more than one name, use preferName to + select the primary name. This is a complete kludge in that + doing it properly requires making a total ordering on the + candidate names, whilst what we have to work with is an ad-hoc + binary relation (preferName) that certainly doesn't have the + relevant transitivity etc properties that are needed to induce a + legitimate total order. Doesn't matter though if it doesn't + always work right since this is only used to generate names to + show the user. */ + for (i = 0; i < ((Word)di->symtab_used)-1; i++) { + DiSym* sym = &di->symtab[i]; + UChar** sec = sym->sec_names; + if (!sec) + continue; + /* Slow but simple. Copy all the cands into a temp array, + choose the primary name, and copy them all back again. */ + Word n_tmp = 1; + while (*sec) { n_tmp++; sec++; } + j = 0; + UChar** tmp = ML_(dinfo_zalloc)( "di.storage.cS.1", + (n_tmp+1) * sizeof(UChar*) ); + tmp[j++] = sym->pri_name; + sec = sym->sec_names; + while (*sec) { tmp[j++] = *sec; sec++; } + vg_assert(j == n_tmp); + vg_assert(tmp[n_tmp] == NULL); /* because of zalloc */ + /* Choose the most favoured. */ + Word best = 0; + for (j = 1; j < n_tmp; j++) { + if (preferName(di, tmp[best], tmp[j], di->symtab[i].addr)) { + /* best is unchanged */ + } else { + best = j; + } + } + vg_assert(best >= 0 && best < n_tmp); + /* Copy back */ + sym->pri_name = tmp[best]; + UChar** cursor = sym->sec_names; + for (j = 0; j < n_tmp; j++) { + if (j == best) + continue; + *cursor = tmp[j]; + cursor++; + } + vg_assert(*cursor == NULL); + ML_(dinfo_free)( tmp ); } + # undef SWAP } diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c index ef3897789b..e5f72d2651 100644 --- a/coregrind/m_redir.c +++ b/coregrind/m_redir.c @@ -329,7 +329,7 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newsi ) Spec* spec; TopSpec* ts; TopSpec* newts; - HChar* sym_name; + UChar* sym_name_pri; Addr sym_addr, sym_toc; HChar demangled_sopatt[N_DEMANGLED]; HChar demangled_fnpatt[N_DEMANGLED]; @@ -357,8 +357,8 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newsi ) nsyms = VG_(DebugInfo_syms_howmany)( newsi ); for (i = 0; i < nsyms; i++) { VG_(DebugInfo_syms_getidx)( newsi, i, &sym_addr, &sym_toc, - NULL, &sym_name, &isText, NULL ); - ok = VG_(maybe_Z_demangle)( sym_name, demangled_sopatt, N_DEMANGLED, + NULL, &sym_name_pri, NULL, &isText, NULL ); + ok = VG_(maybe_Z_demangle)( sym_name_pri, demangled_sopatt, N_DEMANGLED, demangled_fnpatt, N_DEMANGLED, &isWrap ); /* ignore data symbols */ if (!isText) @@ -366,7 +366,7 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newsi ) if (!ok) { /* It's not a full-scale redirect, but perhaps it is a load-notify fn? Let the load-notify department see it. */ - handle_maybe_load_notifier( newsi_soname, sym_name, sym_addr ); + handle_maybe_load_notifier( newsi_soname, sym_name_pri, sym_addr ); continue; } if (check_ppcTOCs && sym_toc == 0) { @@ -395,10 +395,11 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newsi ) if (check_ppcTOCs) { for (i = 0; i < nsyms; i++) { VG_(DebugInfo_syms_getidx)( newsi, i, &sym_addr, &sym_toc, - NULL, &sym_name, &isText, NULL ); + NULL, &sym_name_pri, NULL, + &isText, NULL ); ok = isText && VG_(maybe_Z_demangle)( - sym_name, demangled_sopatt, N_DEMANGLED, + sym_name_pri, demangled_sopatt, N_DEMANGLED, demangled_fnpatt, N_DEMANGLED, &isWrap ); if (!ok) /* not a redirect. Ignore. */ @@ -526,7 +527,7 @@ void generate_and_add_actives ( Active act; Int nsyms, i; Addr sym_addr; - HChar* sym_name; + UChar* sym_name_pri; /* First figure out which of the specs match the seginfo's soname. Also clear the 'done' bits, so that after the main loop below @@ -548,7 +549,7 @@ void generate_and_add_actives ( nsyms = VG_(DebugInfo_syms_howmany)( di ); for (i = 0; i < nsyms; i++) { VG_(DebugInfo_syms_getidx)( di, i, &sym_addr, NULL, NULL, - &sym_name, &isText, &isIFunc ); + &sym_name_pri, NULL, &isText, &isIFunc ); /* ignore data symbols */ if (!isText) @@ -557,7 +558,7 @@ void generate_and_add_actives ( for (sp = specs; sp; sp = sp->next) { if (!sp->mark) continue; /* soname doesn't match */ - if (VG_(string_match)( sp->from_fnpatt, sym_name )) { + if (VG_(string_match)( sp->from_fnpatt, sym_name_pri )) { /* got a new binding. Add to collection. */ act.from_addr = sym_addr; act.to_addr = sp->to_addr; @@ -1214,16 +1215,17 @@ static void handle_require_text_symbols ( DebugInfo* di ) HChar* fnpatt = fnpatts[i]; Int nsyms = VG_(DebugInfo_syms_howmany)(di); for (j = 0; j < nsyms; j++) { - Bool isText = False; - HChar* sym_name = NULL; + Bool isText = False; + UChar* sym_name_pri = NULL; VG_(DebugInfo_syms_getidx)( di, j, NULL, NULL, - NULL, &sym_name, &isText, NULL ); + NULL, &sym_name_pri, NULL, + &isText, NULL ); /* ignore data symbols */ - if (0) VG_(printf)("QQQ %s\n", sym_name); - vg_assert(sym_name); + if (0) VG_(printf)("QQQ %s\n", sym_name_pri); + vg_assert(sym_name_pri); if (!isText) continue; - if (VG_(string_match)(fnpatt, sym_name)) { + if (VG_(string_match)(fnpatt, sym_name_pri)) { found = True; break; } diff --git a/include/pub_tool_debuginfo.h b/include/pub_tool_debuginfo.h index 22590479b2..53695ade94 100644 --- a/include/pub_tool_debuginfo.h +++ b/include/pub_tool_debuginfo.h @@ -202,18 +202,22 @@ PtrdiffT VG_(DebugInfo_get_text_bias) ( const DebugInfo *di ); const DebugInfo* VG_(next_DebugInfo) ( const DebugInfo *di ); /* Functions for traversing all the symbols in a DebugInfo. _howmany - tells how many there are. _getidx retrieves the n'th, for n in 0 - .. _howmany-1. You may not modify the function name thereby - acquired; if you want to do so, first strdup it. */ + tells how many symbol table entries there are. _getidx retrieves + the n'th entry, for n in 0 .. _howmany-1. You may not modify the + function names thereby acquired; if you want to do so, first strdup + them. The primary name is returned in *pri_name, and *sec_names is + set either to NULL or to a NULL terminated vector containing + pointers to the secondary names. */ Int VG_(DebugInfo_syms_howmany) ( const DebugInfo *di ); void VG_(DebugInfo_syms_getidx) ( const DebugInfo *di, Int idx, - /*OUT*/Addr* avma, - /*OUT*/Addr* tocptr, - /*OUT*/UInt* size, - /*OUT*/HChar** name, - /*OUT*/Bool* isText, - /*OUT*/Bool* isIFunc ); + /*OUT*/Addr* avma, + /*OUT*/Addr* tocptr, + /*OUT*/UInt* size, + /*OUT*/UChar** pri_name, + /*OUT*/UChar*** sec_names, + /*OUT*/Bool* isText, + /*OUT*/Bool* isIFunc ); /* A simple enumeration to describe the 'kind' of various kinds of segments that arise from the mapping of object files. */ -- 2.47.2